From 6394eb83f7a095c46362606f7ebcb918db181eef Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Thu, 16 Oct 2025 09:28:05 -0700 Subject: [PATCH 01/18] add schema --- rust/sedona-raster/Cargo.toml | 0 rust/sedona-raster/src/lib.rs | 0 rust/sedona-raster/src/raster.rs | 132 +++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 rust/sedona-raster/Cargo.toml create mode 100644 rust/sedona-raster/src/lib.rs create mode 100644 rust/sedona-raster/src/raster.rs diff --git a/rust/sedona-raster/Cargo.toml b/rust/sedona-raster/Cargo.toml new file mode 100644 index 00000000..e69de29b diff --git a/rust/sedona-raster/src/lib.rs b/rust/sedona-raster/src/lib.rs new file mode 100644 index 00000000..e69de29b diff --git a/rust/sedona-raster/src/raster.rs b/rust/sedona-raster/src/raster.rs new file mode 100644 index 00000000..8aee0be1 --- /dev/null +++ b/rust/sedona-raster/src/raster.rs @@ -0,0 +1,132 @@ +use crate::raster::column::{DATA, METADATA}; +use arrow::array::{ + Array, ArrayAccessor, ArrayBuilder, ArrayData, AsArray, BinaryBuilder, ListBuilder, + StructArray, StructBuilder, UInt32Builder, +}; +use arrow::array::{BinaryArray, ListArray, UInt32Array}; +use arrow::datatypes::{DataType, Field, FieldRef, Fields, ToByteSlice}; +use arrow::error::ArrowError; + +#[repr(u16)] +pub enum BandDataType { + UInt8 = 0, + UInt16 = 1, + Int16 = 2, + UInt32 = 3, + Int32 = 4, + Float32 = 5, + Float64 = 6, + // Add complex types if needed +} + +#[repr(u16)] +pub enum StorageType { + InDb = 0, + OutDbRef = 1, +} + +/// Raster schema definition utilities +pub struct RasterSchema; + +impl RasterSchema { + // Raster schema: + pub fn fields() -> Fields { + Fields::from(vec![ + Field::new(column::METADATA, Self::metadata_type(), false), + Field::new(column::BANDS, Self::bands_type(), true), + ]) + } + + /// Raster metadata schema (dimensions and geospatial transformation) + pub fn metadata_type() -> DataType { + DataType::Struct(Fields::from(vec![ + // Raster dimensions + Field::new(column::WIDTH, DataType::UInt64, false), + Field::new(column::HEIGHT, DataType::UInt64, false), + // Geospatial transformation parameters + Field::new(column::UPPERLEFT_X, DataType::Float64, false), + Field::new(column::UPPERLEFT_Y, DataType::Float64, false), + Field::new(column::SCALE_X, DataType::Float64, false), + Field::new(column::SCALE_Y, DataType::Float64, false), + Field::new(column::SKEW_X, DataType::Float64, false), + Field::new(column::SKEW_Y, DataType::Float64, false), + // Optional bounding box + Field::new(column::BOUNDING_BOX, Self::bounding_box_type(), true), + ])) + } + + /// Bounding box schema (min_x, min_y, max_x, max_y) + pub fn bounding_box_type() -> DataType { + DataType::Struct(Fields::from(vec![ + Field::new(column::MIN_X, DataType::Float64, false), + Field::new(column::MIN_Y, DataType::Float64, false), + Field::new(column::MAX_X, DataType::Float64, false), + Field::new(column::MAX_Y, DataType::Float64, false), + ])) + } + + /// Bands list schema + pub fn bands_type() -> DataType { + DataType::List(FieldRef::new(Field::new( + column::BAND, + Self::band_type(), + false, + ))) + } + + /// Individual band schema (metadata + data) + pub fn band_type() -> DataType { + DataType::Struct(Fields::from(vec![ + Field::new(column::METADATA, Self::band_metadata_type(), false), + Field::new(column::DATA, Self::band_data_type(), false), + ])) + } + + /// Band metadata schema (nodata, storage type, data type) + /// Con + pub fn band_metadata_type() -> DataType { + DataType::Struct(Fields::from(vec![ + Field::new(column::NODATAVALUE, DataType::Binary, false), + Field::new(column::STORAGE_TYPE, DataType::UInt32, false), + Field::new(column::DATATYPE, DataType::UInt32, false), + ])) + } + + /// Band data schema (list of binary chunks) + pub fn band_data_type() -> DataType { + DataType::List(FieldRef::new(Field::new( + column::DATA, + DataType::Binary, + false, + ))) + } +} + +pub mod column { + pub const METADATA: &str = "metadata"; + pub const BANDS: &str = "bands"; + pub const BAND: &str = "band"; + pub const DATA: &str = "data"; + + // Raster metadata fields + pub const WIDTH: &str = "width"; + pub const HEIGHT: &str = "height"; + pub const UPPERLEFT_X: &str = "upperleft_x"; + pub const UPPERLEFT_Y: &str = "upperleft_y"; + pub const SCALE_X: &str = "scale_x"; + pub const SCALE_Y: &str = "scale_y"; + pub const SKEW_X: &str = "skew_x"; + pub const SKEW_Y: &str = "skew_y"; + pub const BOUNDING_BOX: &str = "bounding_box"; + + // Bounding box fields + pub const MIN_X: &str = "min_x"; + pub const MIN_Y: &str = "min_y"; + pub const MAX_X: &str = "max_x"; + pub const MAX_Y: &str = "max_y"; + + // Band metadata fields + pub const NODATAVALUE: &str = "nodata_value"; + pub const STORAGE_TYPE: &str = "storage_type"; + pub const DATATYPE: &str = "data_type"; +} \ No newline at end of file From 72d647fd2519f5daae8587948609db5deeff18f9 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Thu, 16 Oct 2025 12:04:29 -0700 Subject: [PATCH 02/18] add a bulder --- rust/sedona-raster/Cargo.toml | 18 + rust/sedona-raster/src/lib.rs | 1 + rust/sedona-raster/src/raster.rs | 697 +++++++++++++++++++++++++++++-- 3 files changed, 692 insertions(+), 24 deletions(-) diff --git a/rust/sedona-raster/Cargo.toml b/rust/sedona-raster/Cargo.toml index e69de29b..05ff60c6 100644 --- a/rust/sedona-raster/Cargo.toml +++ b/rust/sedona-raster/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "sedona-raster" +version.workspace = true +homepage.workspace = true +repository.workspace = true +description.workspace = true +readme.workspace = true +edition.workspace = true +rust-version.workspace = true + +[lints.clippy] +result_large_err = "allow" + +[dev-dependencies] +rstest = { workspace = true } + +[dependencies] +arrow = { workspace = true } \ No newline at end of file diff --git a/rust/sedona-raster/src/lib.rs b/rust/sedona-raster/src/lib.rs index e69de29b..72d860b8 100644 --- a/rust/sedona-raster/src/lib.rs +++ b/rust/sedona-raster/src/lib.rs @@ -0,0 +1 @@ +pub mod raster; diff --git a/rust/sedona-raster/src/raster.rs b/rust/sedona-raster/src/raster.rs index 8aee0be1..6b20b1f1 100644 --- a/rust/sedona-raster/src/raster.rs +++ b/rust/sedona-raster/src/raster.rs @@ -1,13 +1,11 @@ -use crate::raster::column::{DATA, METADATA}; -use arrow::array::{ - Array, ArrayAccessor, ArrayBuilder, ArrayData, AsArray, BinaryBuilder, ListBuilder, - StructArray, StructBuilder, UInt32Builder, -}; -use arrow::array::{BinaryArray, ListArray, UInt32Array}; -use arrow::datatypes::{DataType, Field, FieldRef, Fields, ToByteSlice}; +use arrow::array::{ArrayRef, BinaryBuilder, ListBuilder, StructArray, StructBuilder}; +use arrow::buffer::MutableBuffer; +use arrow::datatypes::{DataType, Field, FieldRef, Fields}; use arrow::error::ArrowError; +use std::sync::Arc; #[repr(u16)] +#[derive(Clone, Debug)] pub enum BandDataType { UInt8 = 0, UInt16 = 1, @@ -16,16 +14,31 @@ pub enum BandDataType { Int32 = 4, Float32 = 5, Float64 = 6, - // Add complex types if needed + // Consider support for complex types for scientific data } +/// Storage strategy for raster band data within Apache Arrow arrays. +/// +/// This enum defines how raster data is physically stored and accessed: +/// +/// **InDb**: Raster data is embedded directly in the Arrow array as binary blobs. +/// - Pros: Self-contained, no external dependencies, fast access for small-medium rasters +/// - Cons: Increases Arrow array size, memory usage grows with raster size +/// - Best for: Tiles, thumbnails, processed results, small rasters (<10MB per band) +/// +/// **OutDbRef**: Raster data is stored externally with references in the Arrow array. +/// - Pros: Keeps Arrow arrays lightweight, supports massive rasters, enables lazy loading +/// - Cons: Requires external storage management, potential for broken references +/// - Best for: Large satellite imagery, time series data, cloud-native workflows +/// - Reference format: JSON with storage type, path/URL, credentials, metadata +/// - Supported backends: S3, GCS, Azure Blob, local filesystem, HTTP endpoints #[repr(u16)] +#[derive(Clone, Debug)] pub enum StorageType { InDb = 0, OutDbRef = 1, } -/// Raster schema definition utilities pub struct RasterSchema; impl RasterSchema { @@ -37,7 +50,7 @@ impl RasterSchema { ]) } - /// Raster metadata schema (dimensions and geospatial transformation) + /// Raster metadata schema pub fn metadata_type() -> DataType { DataType::Struct(Fields::from(vec![ // Raster dimensions @@ -55,7 +68,7 @@ impl RasterSchema { ])) } - /// Bounding box schema (min_x, min_y, max_x, max_y) + /// Bounding box schema pub fn bounding_box_type() -> DataType { DataType::Struct(Fields::from(vec![ Field::new(column::MIN_X, DataType::Float64, false), @@ -74,7 +87,7 @@ impl RasterSchema { ))) } - /// Individual band schema (metadata + data) + /// Individual band schema pub fn band_type() -> DataType { DataType::Struct(Fields::from(vec![ Field::new(column::METADATA, Self::band_metadata_type(), false), @@ -82,8 +95,7 @@ impl RasterSchema { ])) } - /// Band metadata schema (nodata, storage type, data type) - /// Con + /// Band metadata schema pub fn band_metadata_type() -> DataType { DataType::Struct(Fields::from(vec![ Field::new(column::NODATAVALUE, DataType::Binary, false), @@ -92,13 +104,9 @@ impl RasterSchema { ])) } - /// Band data schema (list of binary chunks) + /// Band data schema (single binary blob) pub fn band_data_type() -> DataType { - DataType::List(FieldRef::new(Field::new( - column::DATA, - DataType::Binary, - false, - ))) + DataType::Binary } } @@ -107,7 +115,7 @@ pub mod column { pub const BANDS: &str = "bands"; pub const BAND: &str = "band"; pub const DATA: &str = "data"; - + // Raster metadata fields pub const WIDTH: &str = "width"; pub const HEIGHT: &str = "height"; @@ -118,15 +126,656 @@ pub mod column { pub const SKEW_X: &str = "skew_x"; pub const SKEW_Y: &str = "skew_y"; pub const BOUNDING_BOX: &str = "bounding_box"; - + // Bounding box fields pub const MIN_X: &str = "min_x"; pub const MIN_Y: &str = "min_y"; pub const MAX_X: &str = "max_x"; pub const MAX_Y: &str = "max_y"; - + // Band metadata fields pub const NODATAVALUE: &str = "nodata_value"; pub const STORAGE_TYPE: &str = "storage_type"; pub const DATATYPE: &str = "data_type"; -} \ No newline at end of file +} + +/// Builder for constructing raster arrays with zero-copy band data writing +pub struct RasterBuilder { + metadata_builder: StructBuilder, + bands_builder: ListBuilder, +} + +impl RasterBuilder { + /// Create a new raster builder with the specified capacity + pub fn new(capacity: usize) -> Self { + let metadata_builder = StructBuilder::from_fields( + match RasterSchema::metadata_type() { + DataType::Struct(fields) => fields, + _ => panic!("Expected struct type for metadata"), + }, + capacity, + ); + + let bands_builder = ListBuilder::new(StructBuilder::from_fields( + match RasterSchema::band_type() { + DataType::Struct(fields) => fields, + _ => panic!("Expected struct type for band"), + }, + 0, // Initial capacity for bands + )); + + Self { + metadata_builder, + bands_builder, + } + } + + /// Start a new raster and write its metadata + pub fn start_raster(&mut self, metadata: RasterMetadata) -> Result<(), ArrowError> { + self.append_metadata(metadata) + } + + /// Get direct access to the BinaryBuilder for writing the current band's data + pub fn band_data_writer(&mut self) -> &mut BinaryBuilder { + let band_builder = self.bands_builder.values(); + band_builder.field_builder::(1).unwrap() + } + + /// Create a MutableBuffer that can be written to directly + pub fn create_band_buffer( + &mut self, + capacity: usize, + ) -> (MutableBuffer, impl FnOnce(MutableBuffer) + '_) { + let mut buffer = MutableBuffer::with_capacity(capacity); + + // Pre-allocate the buffer to the exact size + buffer.resize(capacity, 0); + + let commit = move |buffer: MutableBuffer| { + // Convert MutableBuffer to &[u8] and append to BinaryBuilder + let data = buffer.as_slice(); + self.band_data_writer().append_value(data); + }; + + (buffer, commit) + } + + /// Alternative: Get a mutable slice from a MutableBuffer for GDAL + /// This provides the most direct access for zero-copy operations + /// TODO: have this 3 different way.... pick one!! + pub fn get_band_buffer_slice(&mut self, size: usize) -> (MutableBuffer, &mut [u8]) { + let mut buffer = MutableBuffer::with_capacity(size); + buffer.resize(size, 0); + + // Get mutable slice that GDAL can write to + let slice = unsafe { + // This is safe because we just allocated the buffer with the exact size + std::slice::from_raw_parts_mut(buffer.as_mut_ptr(), size) + }; + + (buffer, slice) + } + + /// Commit a MutableBuffer to the band data + pub fn commit_band_buffer(&mut self, buffer: MutableBuffer) { + let data = buffer.as_slice(); + self.band_data_writer().append_value(data); + } + + /// Finish writing the current band with its metadata + /// TODO: The band_metadata is in the finish in the band call, but in the + /// start in the raster call. Make it consistent. + pub fn finish_band(&mut self, band_metadata: BandMetadata) -> Result<(), ArrowError> { + let band_builder = self.bands_builder.values(); + + let metadata_builder = band_builder.field_builder::(0).unwrap(); + + if let Some(nodata) = band_metadata.nodata_value { + metadata_builder + .field_builder::(0) + .unwrap() + .append_value(&nodata); + } else { + metadata_builder + .field_builder::(0) + .unwrap() + .append_null(); + } + + metadata_builder + .field_builder::(1) + .unwrap() + .append_value(band_metadata.storage_type as u32); + + metadata_builder + .field_builder::(2) + .unwrap() + .append_value(band_metadata.datatype as u32); + + metadata_builder.append(true); + + // Finish the band + band_builder.append(true); + Ok(()) + } + + /// Finish all bands for the current raster + pub fn finish_raster(&mut self) -> Result<(), ArrowError> { + self.bands_builder.append(true); + Ok(()) + } + + /// Append raster metadata + fn append_metadata(&mut self, metadata: RasterMetadata) -> Result<(), ArrowError> { + // Width + self.metadata_builder + .field_builder::(0) + .unwrap() + .append_value(metadata.width); + + // Height + self.metadata_builder + .field_builder::(1) + .unwrap() + .append_value(metadata.height); + + // Geotransform parameters + self.metadata_builder + .field_builder::(2) + .unwrap() + .append_value(metadata.upperleft_x); + + self.metadata_builder + .field_builder::(3) + .unwrap() + .append_value(metadata.upperleft_y); + + self.metadata_builder + .field_builder::(4) + .unwrap() + .append_value(metadata.scale_x); + + self.metadata_builder + .field_builder::(5) + .unwrap() + .append_value(metadata.scale_y); + + self.metadata_builder + .field_builder::(6) + .unwrap() + .append_value(metadata.skew_x); + + self.metadata_builder + .field_builder::(7) + .unwrap() + .append_value(metadata.skew_y); + + // Optional bounding box + if let Some(bbox) = metadata.bounding_box { + let bbox_builder = self + .metadata_builder + .field_builder::(8) + .unwrap(); + + bbox_builder + .field_builder::(0) + .unwrap() + .append_value(bbox.min_x); + + bbox_builder + .field_builder::(1) + .unwrap() + .append_value(bbox.min_y); + + bbox_builder + .field_builder::(2) + .unwrap() + .append_value(bbox.max_x); + + bbox_builder + .field_builder::(3) + .unwrap() + .append_value(bbox.max_y); + + bbox_builder.append(true); + } + + self.metadata_builder.append(true); + + Ok(()) + } + + /// Append a null raster + pub fn append_null(&mut self) -> Result<(), ArrowError> { + self.metadata_builder.append(false); + self.bands_builder.append(false); + Ok(()) + } + + /// Finish building and return the constructed StructArray + pub fn finish(mut self) -> Result { + let metadata_array = self.metadata_builder.finish(); + let bands_array = self.bands_builder.finish(); + + let fields = RasterSchema::fields(); + let arrays: Vec = vec![Arc::new(metadata_array), Arc::new(bands_array)]; + + Ok(StructArray::new(fields, arrays, None)) + } +} + +/// Convenience wrapper for the zero-copy band writing approach +impl RasterBuilder { + /// High-level method that allows for zero-copy with a callback approach + pub fn append_raster_with_callback( + &mut self, + metadata: RasterMetadata, + band_count: usize, + mut write_bands: F, + ) -> Result<(), ArrowError> + where + F: FnMut(usize, &mut BinaryBuilder) -> Result, + { + self.start_raster(metadata)?; + + for band_index in 0..band_count { + let band_metadata = { + let binary_builder = self.band_data_writer(); + write_bands(band_index, binary_builder)? + }; + self.finish_band(band_metadata)?; + } + + self.finish_raster()?; + Ok(()) + } +} + +/// Metadata for a raster +#[derive(Debug, Clone)] +pub struct RasterMetadata { + pub width: u64, + pub height: u64, + pub upperleft_x: f64, + pub upperleft_y: f64, + pub scale_x: f64, + pub scale_y: f64, + pub skew_x: f64, + pub skew_y: f64, + pub bounding_box: Option, +} + +/// Bounding box coordinates +#[derive(Debug, Clone)] +pub struct BoundingBox { + pub min_x: f64, + pub min_y: f64, + pub max_x: f64, + pub max_y: f64, +} + +/// Metadata for a single band +#[derive(Debug, Clone)] +pub struct BandMetadata { + pub nodata_value: Option>, + pub storage_type: StorageType, + pub datatype: BandDataType, +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::Array; + use arrow::datatypes::DataType; + + #[test] + fn test_raster_builder_basic() { + let mut builder = RasterBuilder::new(1); + + let metadata = RasterMetadata { + width: 100, + height: 100, + upperleft_x: -120.0, + upperleft_y: 40.0, + scale_x: 0.1, + scale_y: -0.1, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: Some(BoundingBox { + min_x: -120.0, + min_y: 30.0, + max_x: -110.0, + max_y: 40.0, + }), + }; + + // Start writing a raster + builder.start_raster(metadata).unwrap(); + + // Write band with direct BinaryBuilder access + { + let raster_data = vec![1u8; 10000]; // 100x100 raster + builder.band_data_writer().append_value(&raster_data); + + // Finish the band with metadata + builder + .finish_band(BandMetadata { + nodata_value: Some(vec![255]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }) + .unwrap(); + } + + builder.finish_raster().unwrap(); + + let result = builder.finish().unwrap(); + assert_eq!(result.len(), 1); + assert_eq!(result.num_columns(), 2); + } + + #[test] + fn test_mutable_buffer_gdal_integration() { + let mut builder = RasterBuilder::new(1); + + let metadata = RasterMetadata { + width: 256, + height: 256, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: 1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(metadata).unwrap(); + + // GDAL integration pattern with MutableBuffer + { + let buffer_size = 256 * 256; // width * height for UInt8 data + let (mut buffer, commit) = builder.create_band_buffer(buffer_size); + + // Simulate GDAL reading directly into the MutableBuffer + // In real code: gdal_dataset.read_into_buffer(buffer.as_mut_slice())? + simulate_gdal_read_into_buffer(&mut buffer); + + // Commit the buffer to Arrow + commit(buffer); + } + + builder + .finish_band(BandMetadata { + nodata_value: None, + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }) + .unwrap(); + + builder.finish_raster().unwrap(); + let result = builder.finish().unwrap(); + + assert_eq!(result.len(), 1); + } + + #[test] + fn test_mutable_buffer_slice_pattern() { + let mut builder = RasterBuilder::new(1); + + let metadata = RasterMetadata { + width: 100, + height: 100, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: 1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(metadata).unwrap(); + + // Alternative pattern: get slice directly + { + let buffer_size = 10000; // 100x100 + let (buffer, slice) = builder.get_band_buffer_slice(buffer_size); + + // GDAL can write directly to this slice + // gdal_dataset.read_into_slice(slice)? + for (i, byte) in slice.iter_mut().enumerate() { + *byte = (i % 256) as u8; + } + + // Commit the buffer + builder.commit_band_buffer(buffer); + } + + builder + .finish_band(BandMetadata { + nodata_value: Some(vec![255]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }) + .unwrap(); + + builder.finish_raster().unwrap(); + let result = builder.finish().unwrap(); + + assert_eq!(result.len(), 1); + } + + // Helper function to simulate GDAL reading into a MutableBuffer + fn simulate_gdal_read_into_buffer(buffer: &mut MutableBuffer) { + let slice = unsafe { std::slice::from_raw_parts_mut(buffer.as_mut_ptr(), buffer.len()) }; + for (i, byte) in slice.iter_mut().enumerate() { + *byte = (i % 256) as u8; + } + } + + #[test] + fn test_raster_builder_callback_approach() { + let mut builder = RasterBuilder::new(1); + + let metadata = RasterMetadata { + width: 50, + height: 50, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: 1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + // Use callback approach for cleaner API + builder + .append_raster_with_callback(metadata, 2, |band_index, binary_builder| { + match band_index { + 0 => { + // Write RGB band - direct access to BinaryBuilder + let rgb_data = vec![255u8; 2500]; // 50x50 RGB values + binary_builder.append_value(&rgb_data); + Ok(BandMetadata { + nodata_value: None, + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }) + } + 1 => { + // Write NIR band - direct access to BinaryBuilder + let nir_data = vec![128u8; 2500]; // 50x50 NIR values + binary_builder.append_value(&nir_data); + Ok(BandMetadata { + nodata_value: Some(vec![0]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }) + } + _ => unreachable!(), + } + }) + .unwrap(); + + let result = builder.finish().unwrap(); + assert_eq!(result.len(), 1); + } + + #[test] + fn test_large_raster_simulation() { + let mut builder = RasterBuilder::new(1); + + let metadata = RasterMetadata { + width: 10000, // Large raster + height: 10000, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: 1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(metadata).unwrap(); + + // Simulate writing a huge raster band directly + { + let mut band_writer = builder.start_band(); + + // Direct access to BinaryBuilder - can write massive amounts of data + let binary_builder = band_writer.data_writer(); + + // Reserve space for large raster (100MB) + let raster_size = 10000 * 10000; + binary_builder.reserve_exact(raster_size); + + // In real usage, this could be streaming from GDAL or reading from disk + // Write the entire raster in one operation + let large_raster_data = vec![42u8; raster_size]; + binary_builder.append_value(&large_raster_data); + + band_writer + .finish_band(BandMetadata { + nodata_value: None, + storage_type: StorageType::OutDbRef, + datatype: BandDataType::UInt8, + }) + .unwrap(); + } + + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let result = builder.finish().unwrap(); + assert_eq!(result.len(), 1); + } + + #[test] + fn test_raster_metadata_struct() { + let metadata = RasterMetadata { + width: 256, + height: 256, + upperleft_x: -180.0, + upperleft_y: 90.0, + scale_x: 0.5, + scale_y: -0.5, + skew_x: 0.1, + skew_y: 0.1, + bounding_box: Some(BoundingBox { + min_x: -180.0, + min_y: -90.0, + max_x: 180.0, + max_y: 90.0, + }), + }; + + assert_eq!(metadata.width, 256); + assert_eq!(metadata.height, 256); + assert!(metadata.bounding_box.is_some()); + } + + #[test] + fn test_band_metadata_struct() { + let band_metadata = BandMetadata { + nodata_value: Some(vec![0, 0]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt16, + }; + + assert!(band_metadata.nodata_value.is_some()); + assert_eq!(band_metadata.storage_type as u16, 0); + assert_eq!(band_metadata.datatype as u16, 1); + } + + #[test] + fn test_multiple_bands_zero_copy() { + let mut builder = RasterBuilder::new(1); + + let metadata = RasterMetadata { + width: 10, + height: 10, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: 1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(metadata).unwrap(); + + // First band + { + let mut band_writer = builder.start_band(); + band_writer.data_writer().append_value(&[1, 2, 3]); + band_writer + .finish_band(BandMetadata { + nodata_value: None, + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }) + .unwrap(); + } + builder.finish_band().unwrap(); + + // Second band + { + let mut band_writer = builder.start_band(); + band_writer.data_writer().append_value(&[4, 5, 6]); + band_writer + .finish_band(BandMetadata { + nodata_value: Some(vec![255]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }) + .unwrap(); + } + builder.finish_band().unwrap(); + + builder.finish_raster().unwrap(); + + let result = builder.finish().unwrap(); + assert_eq!(result.len(), 1); + } + + // Existing schema tests... + #[test] + fn test_raster_schema_fields() { + let fields = RasterSchema::fields(); + assert_eq!(fields.len(), 2); + + // Check metadata field + let metadata_field = &fields[0]; + assert_eq!(metadata_field.name(), "metadata"); + assert!(!metadata_field.is_nullable()); + + // Check bands field + let bands_field = &fields[1]; + assert_eq!(bands_field.name(), "bands"); + assert!(bands_field.is_nullable()); + } +} From 8918929e6f4fbdf210c40ae02ca7a15892e93e2a Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:20:09 -0700 Subject: [PATCH 03/18] Add iterator --- rust/sedona-raster/src/raster.rs | 1130 +++++++++++++++++++++--------- 1 file changed, 799 insertions(+), 331 deletions(-) diff --git a/rust/sedona-raster/src/raster.rs b/rust/sedona-raster/src/raster.rs index 6b20b1f1..268f6838 100644 --- a/rust/sedona-raster/src/raster.rs +++ b/rust/sedona-raster/src/raster.rs @@ -1,11 +1,11 @@ -use arrow::array::{ArrayRef, BinaryBuilder, ListBuilder, StructArray, StructBuilder}; +use arrow::array::{Array, ArrayRef, BinaryArray, BinaryBuilder, ListArray, ListBuilder, StructArray, StructBuilder, UInt32Array, UInt64Array}; use arrow::buffer::MutableBuffer; use arrow::datatypes::{DataType, Field, FieldRef, Fields}; use arrow::error::ArrowError; use std::sync::Arc; #[repr(u16)] -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum BandDataType { UInt8 = 0, UInt16 = 1, @@ -22,18 +22,17 @@ pub enum BandDataType { /// This enum defines how raster data is physically stored and accessed: /// /// **InDb**: Raster data is embedded directly in the Arrow array as binary blobs. -/// - Pros: Self-contained, no external dependencies, fast access for small-medium rasters -/// - Cons: Increases Arrow array size, memory usage grows with raster size +/// - Self-contained, no external dependencies, fast access for small-medium rasters +/// - Increases Arrow array size, memory usage grows and copy times increase with raster size /// - Best for: Tiles, thumbnails, processed results, small rasters (<10MB per band) /// /// **OutDbRef**: Raster data is stored externally with references in the Arrow array. -/// - Pros: Keeps Arrow arrays lightweight, supports massive rasters, enables lazy loading -/// - Cons: Requires external storage management, potential for broken references +/// - Keeps Arrow arrays lightweight, supports massive rasters, enables lazy loading +/// - Requires external storage management, potential for broken references /// - Best for: Large satellite imagery, time series data, cloud-native workflows -/// - Reference format: JSON with storage type, path/URL, credentials, metadata /// - Supported backends: S3, GCS, Azure Blob, local filesystem, HTTP endpoints #[repr(u16)] -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum StorageType { InDb = 0, OutDbRef = 1, @@ -98,7 +97,7 @@ impl RasterSchema { /// Band metadata schema pub fn band_metadata_type() -> DataType { DataType::Struct(Fields::from(vec![ - Field::new(column::NODATAVALUE, DataType::Binary, false), + Field::new(column::NODATAVALUE, DataType::Binary, true), // Allow null nodata values Field::new(column::STORAGE_TYPE, DataType::UInt32, false), Field::new(column::DATATYPE, DataType::UInt32, false), ])) @@ -156,13 +155,20 @@ impl RasterBuilder { capacity, ); - let bands_builder = ListBuilder::new(StructBuilder::from_fields( + let band_struct_builder = StructBuilder::from_fields( match RasterSchema::band_type() { DataType::Struct(fields) => fields, _ => panic!("Expected struct type for band"), }, 0, // Initial capacity for bands - )); + ); + + let bands_builder = ListBuilder::new(band_struct_builder) + .with_field(Field::new( + column::BAND, + RasterSchema::band_type(), + false, + )); Self { metadata_builder, @@ -171,8 +177,17 @@ impl RasterBuilder { } /// Start a new raster and write its metadata - pub fn start_raster(&mut self, metadata: RasterMetadata) -> Result<(), ArrowError> { - self.append_metadata(metadata) + /// + /// Accepts any type that implements MetadataRef, allowing you to pass: + /// - RasterMetadata structs directly + /// - MetadataRef trait objects from iterators + pub fn start_raster(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { + self.append_metadata_from_ref(metadata) + } + + /// Convenience method for starting a raster with owned RasterMetadata + pub fn start_raster_owned(&mut self, metadata: RasterMetadata) -> Result<(), ArrowError> { + self.start_raster(&metadata) } /// Get direct access to the BinaryBuilder for writing the current band's data @@ -265,53 +280,53 @@ impl RasterBuilder { Ok(()) } - /// Append raster metadata - fn append_metadata(&mut self, metadata: RasterMetadata) -> Result<(), ArrowError> { + /// Append raster metadata from a MetadataRef trait object + fn append_metadata_from_ref(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { // Width self.metadata_builder .field_builder::(0) .unwrap() - .append_value(metadata.width); + .append_value(metadata.width()); // Height self.metadata_builder .field_builder::(1) .unwrap() - .append_value(metadata.height); + .append_value(metadata.height()); // Geotransform parameters self.metadata_builder .field_builder::(2) .unwrap() - .append_value(metadata.upperleft_x); + .append_value(metadata.upper_left_x()); self.metadata_builder .field_builder::(3) .unwrap() - .append_value(metadata.upperleft_y); + .append_value(metadata.upper_left_y()); self.metadata_builder .field_builder::(4) .unwrap() - .append_value(metadata.scale_x); + .append_value(metadata.scale_x()); self.metadata_builder .field_builder::(5) .unwrap() - .append_value(metadata.scale_y); + .append_value(metadata.scale_y()); self.metadata_builder .field_builder::(6) .unwrap() - .append_value(metadata.skew_x); + .append_value(metadata.skew_x()); self.metadata_builder .field_builder::(7) .unwrap() - .append_value(metadata.skew_y); + .append_value(metadata.skew_y()); // Optional bounding box - if let Some(bbox) = metadata.bounding_box { + if let Some(bbox) = metadata.bounding_box() { let bbox_builder = self .metadata_builder .field_builder::(8) @@ -338,6 +353,34 @@ impl RasterBuilder { .append_value(bbox.max_y); bbox_builder.append(true); + } else { + // Append null bounding box - need to fill in null values for all fields + let bbox_builder = self + .metadata_builder + .field_builder::(8) + .unwrap(); + + bbox_builder + .field_builder::(0) + .unwrap() + .append_null(); + + bbox_builder + .field_builder::(1) + .unwrap() + .append_null(); + + bbox_builder + .field_builder::(2) + .unwrap() + .append_null(); + + bbox_builder + .field_builder::(3) + .unwrap() + .append_null(); + + bbox_builder.append(false); } self.metadata_builder.append(true); @@ -376,7 +419,7 @@ impl RasterBuilder { where F: FnMut(usize, &mut BinaryBuilder) -> Result, { - self.start_raster(metadata)?; + self.start_raster(&metadata)?; for band_index in 0..band_count { let band_metadata = { @@ -391,391 +434,816 @@ impl RasterBuilder { } } -/// Metadata for a raster -#[derive(Debug, Clone)] -pub struct RasterMetadata { - pub width: u64, - pub height: u64, - pub upperleft_x: f64, - pub upperleft_y: f64, - pub scale_x: f64, - pub scale_y: f64, - pub skew_x: f64, - pub skew_y: f64, - pub bounding_box: Option, + +/// Iterator and accessor traits for reading raster data from Arrow arrays. +/// +/// These traits provide a zero-copy interface for accessing raster metadata and band data +/// from the Arrow-based storage format. The implementation handles both InDb and OutDbRef +/// storage types seamlessly. + +/// Trait for accessing raster metadata (dimensions, geotransform, bounding box, etc.) +pub trait MetadataRef { + /// Width of the raster in pixels (using u64 to match schema) + fn width(&self) -> u64; + /// Height of the raster in pixels (using u64 to match schema) + fn height(&self) -> u64; + /// X coordinate of the upper-left corner + fn upper_left_x(&self) -> f64; + /// Y coordinate of the upper-left corner + fn upper_left_y(&self) -> f64; + /// X-direction pixel size (scale) + fn scale_x(&self) -> f64; + /// Y-direction pixel size (scale) + fn scale_y(&self) -> f64; + /// X-direction skew/rotation + fn skew_x(&self) -> f64; + /// Y-direction skew/rotation + fn skew_y(&self) -> f64; + /// Optional bounding box (when available) + fn bounding_box(&self) -> Option; } -/// Bounding box coordinates -#[derive(Debug, Clone)] -pub struct BoundingBox { - pub min_x: f64, - pub min_y: f64, - pub max_x: f64, - pub max_y: f64, +/// Implement MetadataRef for RasterMetadata to allow direct use with builder +impl MetadataRef for RasterMetadata { + fn width(&self) -> u64 { self.width } + fn height(&self) -> u64 { self.height } + fn upper_left_x(&self) -> f64 { self.upperleft_x } + fn upper_left_y(&self) -> f64 { self.upperleft_y } + fn scale_x(&self) -> f64 { self.scale_x } + fn scale_y(&self) -> f64 { self.scale_y } + fn skew_x(&self) -> f64 { self.skew_x } + fn skew_y(&self) -> f64 { self.skew_y } + fn bounding_box(&self) -> Option { self.bounding_box.clone() } } -/// Metadata for a single band -#[derive(Debug, Clone)] -pub struct BandMetadata { - pub nodata_value: Option>, - pub storage_type: StorageType, - pub datatype: BandDataType, +/// Trait for accessing individual band metadata +pub trait BandMetadataRef { + /// No-data value as raw bytes (None if null) + fn nodata_value(&self) -> Option<&[u8]>; + /// Storage type (InDb or OutDbRef) + fn storage_type(&self) -> StorageType; + /// Band data type (Uint8, Float32, etc.) + fn data_type(&self) -> BandDataType; } -#[cfg(test)] -mod tests { - use super::*; - use arrow::array::Array; - use arrow::datatypes::DataType; +/// Trait for accessing individual band data +pub trait BandRef { + /// Band metadata accessor + fn metadata(&self) -> &dyn BandMetadataRef; + /// Raw band data as bytes (zero-copy access) + fn data(&self) -> &[u8]; +} - #[test] - fn test_raster_builder_basic() { - let mut builder = RasterBuilder::new(1); +/// Trait for accessing all bands in a raster +pub trait BandsRef { + /// Number of bands in the raster + fn len(&self) -> usize; + /// Check if no bands are present + fn is_empty(&self) -> bool { self.len() == 0 } + /// Get a specific band by index (returns None if out of bounds) + fn band(&self, index: usize) -> Option>; + /// Iterator over all bands + fn iter(&self) -> BandIterator<'_>; +} - let metadata = RasterMetadata { - width: 100, - height: 100, - upperleft_x: -120.0, - upperleft_y: 40.0, - scale_x: 0.1, - scale_y: -0.1, - skew_x: 0.0, - skew_y: 0.0, - bounding_box: Some(BoundingBox { - min_x: -120.0, - min_y: 30.0, - max_x: -110.0, - max_y: 40.0, - }), - }; +/// Trait for accessing complete raster data +pub trait RasterRef { + /// Raster metadata accessor + fn metadata(&self) -> &dyn MetadataRef; + /// Bands accessor + fn bands(&self) -> &dyn BandsRef; +} - // Start writing a raster - builder.start_raster(metadata).unwrap(); - - // Write band with direct BinaryBuilder access - { - let raster_data = vec![1u8; 10000]; // 100x100 raster - builder.band_data_writer().append_value(&raster_data); - - // Finish the band with metadata - builder - .finish_band(BandMetadata { - nodata_value: Some(vec![255]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - }) - .unwrap(); - } +/// Implementation of MetadataRef for Arrow StructArray +struct MetadataRefImpl<'a> { + metadata_struct: &'a StructArray, + index: usize, +} - builder.finish_raster().unwrap(); +impl<'a> MetadataRef for MetadataRefImpl<'a> { + fn width(&self) -> u64 { + self.metadata_struct + .column_by_name(column::WIDTH) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } - let result = builder.finish().unwrap(); - assert_eq!(result.len(), 1); - assert_eq!(result.num_columns(), 2); + fn height(&self) -> u64 { + self.metadata_struct + .column_by_name(column::HEIGHT) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) } - #[test] - fn test_mutable_buffer_gdal_integration() { - let mut builder = RasterBuilder::new(1); + fn upper_left_x(&self) -> f64 { + self.metadata_struct + .column_by_name(column::UPPERLEFT_X) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } - let metadata = RasterMetadata { - width: 256, - height: 256, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: 1.0, - skew_x: 0.0, - skew_y: 0.0, - bounding_box: None, - }; + fn upper_left_y(&self) -> f64 { + self.metadata_struct + .column_by_name(column::UPPERLEFT_Y) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } - builder.start_raster(metadata).unwrap(); + fn scale_x(&self) -> f64 { + self.metadata_struct + .column_by_name(column::SCALE_X) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } - // GDAL integration pattern with MutableBuffer - { - let buffer_size = 256 * 256; // width * height for UInt8 data - let (mut buffer, commit) = builder.create_band_buffer(buffer_size); + fn scale_y(&self) -> f64 { + self.metadata_struct + .column_by_name(column::SCALE_Y) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } - // Simulate GDAL reading directly into the MutableBuffer - // In real code: gdal_dataset.read_into_buffer(buffer.as_mut_slice())? - simulate_gdal_read_into_buffer(&mut buffer); + fn skew_x(&self) -> f64 { + self.metadata_struct + .column_by_name(column::SKEW_X) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } - // Commit the buffer to Arrow - commit(buffer); - } + fn skew_y(&self) -> f64 { + self.metadata_struct + .column_by_name(column::SKEW_Y) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } - builder - .finish_band(BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, + fn bounding_box(&self) -> Option { + // Try to get bounding box if present in schema + if let Some(bbox_column) = self.metadata_struct.column_by_name(column::BOUNDING_BOX) { + let bbox_struct = bbox_column.as_any().downcast_ref::()?; + Some(BoundingBox { + min_x: bbox_struct + .column_by_name(column::MIN_X)? + .as_any() + .downcast_ref::()? + .value(self.index), + min_y: bbox_struct + .column_by_name(column::MIN_Y)? + .as_any() + .downcast_ref::()? + .value(self.index), + max_x: bbox_struct + .column_by_name(column::MAX_X)? + .as_any() + .downcast_ref::()? + .value(self.index), + max_y: bbox_struct + .column_by_name(column::MAX_Y)? + .as_any() + .downcast_ref::()? + .value(self.index), }) - .unwrap(); + } else { + None + } + } +} - builder.finish_raster().unwrap(); - let result = builder.finish().unwrap(); +/// Implementation of BandMetadataRef for Arrow StructArray +struct BandMetadataRefImpl<'a> { + metadata_struct: &'a StructArray, + band_index: usize, +} - assert_eq!(result.len(), 1); +impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { + fn nodata_value(&self) -> Option<&[u8]> { + let nodata_array = self + .metadata_struct + .column_by_name(column::NODATAVALUE) + .unwrap() + .as_any() + .downcast_ref::() + .expect("Expected BinaryArray for nodata"); + + if nodata_array.is_null(self.band_index) { + None + } else { + Some(nodata_array.value(self.band_index)) + } } - #[test] - fn test_mutable_buffer_slice_pattern() { - let mut builder = RasterBuilder::new(1); + fn storage_type(&self) -> StorageType { + let storage_type_array = self + .metadata_struct + .column_by_name(column::STORAGE_TYPE) + .unwrap() + .as_any() + .downcast_ref::() + .expect("Expected UInt32Array for storage_type"); + + match storage_type_array.value(self.band_index) { + 0 => StorageType::InDb, + 1 => StorageType::OutDbRef, + _ => panic!( + "Unknown storage type: {}", + storage_type_array.value(self.band_index) + ), + } + } - let metadata = RasterMetadata { - width: 100, - height: 100, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: 1.0, - skew_x: 0.0, - skew_y: 0.0, - bounding_box: None, - }; + fn data_type(&self) -> BandDataType { + let datatype_array = self + .metadata_struct + .column_by_name(column::DATATYPE) + .unwrap() + .as_any() + .downcast_ref::() + .expect("Expected UInt32Array for datatype"); + + match datatype_array.value(self.band_index) { + 0 => BandDataType::UInt8, + 1 => BandDataType::UInt16, + 2 => BandDataType::Int16, + 3 => BandDataType::UInt32, + 4 => BandDataType::Int32, + 5 => BandDataType::Float32, + 6 => BandDataType::Float64, + _ => panic!("Unknown band data type: {}", datatype_array.value(self.band_index)), + } + } +} - builder.start_raster(metadata).unwrap(); +/// Implementation of BandRef for accessing individual band data +struct BandRefImpl<'a> { + band_metadata: BandMetadataRefImpl<'a>, + band_data: &'a [u8], +} + +impl<'a> BandRef for BandRefImpl<'a> { + fn metadata(&self) -> &dyn BandMetadataRef { + &self.band_metadata + } + + fn data(&self) -> &[u8] { + self.band_data + } +} - // Alternative pattern: get slice directly - { - let buffer_size = 10000; // 100x100 - let (buffer, slice) = builder.get_band_buffer_slice(buffer_size); +/// Implementation of BandsRef for accessing all bands in a raster +struct BandsRefImpl<'a> { + bands_list: &'a ListArray, + raster_index: usize, +} - // GDAL can write directly to this slice - // gdal_dataset.read_into_slice(slice)? - for (i, byte) in slice.iter_mut().enumerate() { - *byte = (i % 256) as u8; - } +impl<'a> BandsRef for BandsRefImpl<'a> { + fn len(&self) -> usize { + let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let end = self.bands_list.value_offsets()[self.raster_index + 1] as usize; + end - start + } - // Commit the buffer - builder.commit_band_buffer(buffer); + fn band(&self, index: usize) -> Option> { + if index >= self.len() { + return None; } - builder - .finish_band(BandMetadata { - nodata_value: Some(vec![255]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - }) - .unwrap(); + let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + + let bands_struct = self + .bands_list + .values() + .as_any() + .downcast_ref::()?; + + // Get the metadata substructure from the band struct + let band_metadata_struct = bands_struct + .column_by_name(column::METADATA)? + .as_any() + .downcast_ref::()?; + + let band_metadata = BandMetadataRefImpl { + metadata_struct: band_metadata_struct, + band_index: band_row, + }; + + // Get band data from the Binary column within the band struct + let band_data_array = bands_struct + .column_by_name(column::DATA)? + .as_any() + .downcast_ref::()?; - builder.finish_raster().unwrap(); - let result = builder.finish().unwrap(); + let band_data = band_data_array.value(band_row); - assert_eq!(result.len(), 1); + Some(Box::new(BandRefImpl { + band_metadata, + band_data, + })) } - // Helper function to simulate GDAL reading into a MutableBuffer - fn simulate_gdal_read_into_buffer(buffer: &mut MutableBuffer) { - let slice = unsafe { std::slice::from_raw_parts_mut(buffer.as_mut_ptr(), buffer.len()) }; - for (i, byte) in slice.iter_mut().enumerate() { - *byte = (i % 256) as u8; + fn iter(&self) -> BandIterator<'_> { + BandIterator { + bands: self, + current: 0, } } +} - #[test] - fn test_raster_builder_callback_approach() { - let mut builder = RasterBuilder::new(1); +/// Iterator for bands within a raster +pub struct BandIterator<'a> { + bands: &'a dyn BandsRef, + current: usize, +} - let metadata = RasterMetadata { - width: 50, - height: 50, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: 1.0, - skew_x: 0.0, - skew_y: 0.0, - bounding_box: None, - }; +impl<'a> Iterator for BandIterator<'a> { + type Item = Box; - // Use callback approach for cleaner API - builder - .append_raster_with_callback(metadata, 2, |band_index, binary_builder| { - match band_index { - 0 => { - // Write RGB band - direct access to BinaryBuilder - let rgb_data = vec![255u8; 2500]; // 50x50 RGB values - binary_builder.append_value(&rgb_data); - Ok(BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - }) - } - 1 => { - // Write NIR band - direct access to BinaryBuilder - let nir_data = vec![128u8; 2500]; // 50x50 NIR values - binary_builder.append_value(&nir_data); - Ok(BandMetadata { - nodata_value: Some(vec![0]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - }) - } - _ => unreachable!(), - } - }) - .unwrap(); + fn next(&mut self) -> Option { + if self.current < self.bands.len() { + let band = self.bands.band(self.current); + self.current += 1; + band + } else { + None + } + } - let result = builder.finish().unwrap(); - assert_eq!(result.len(), 1); + fn size_hint(&self) -> (usize, Option) { + let remaining = self.bands.len().saturating_sub(self.current); + (remaining, Some(remaining)) } +} - #[test] - fn test_large_raster_simulation() { - let mut builder = RasterBuilder::new(1); +impl ExactSizeIterator for BandIterator<'_> {} - let metadata = RasterMetadata { - width: 10000, // Large raster - height: 10000, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: 1.0, - skew_x: 0.0, - skew_y: 0.0, - bounding_box: None, +/// Implementation of RasterRef for complete raster access +pub struct RasterRefImpl<'a> { + metadata: MetadataRefImpl<'a>, + bands: BandsRefImpl<'a>, +} + +impl<'a> RasterRefImpl<'a> { + /// Create a new RasterRefImpl from a struct array and index + pub fn new(raster_struct: &'a StructArray, raster_index: usize) -> Self { + let metadata = MetadataRefImpl { + metadata_struct: raster_struct + .column_by_name(column::METADATA) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(), + index: raster_index, + }; + + let bands = BandsRefImpl { + bands_list: raster_struct + .column_by_name(column::BANDS) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(), + raster_index, }; + + Self { metadata, bands } + } +} - builder.start_raster(metadata).unwrap(); +impl<'a> RasterRef for RasterRefImpl<'a> { + fn metadata(&self) -> &dyn MetadataRef { + &self.metadata + } + + fn bands(&self) -> &dyn BandsRef { + &self.bands + } +} - // Simulate writing a huge raster band directly - { - let mut band_writer = builder.start_band(); +/// Iterator over raster structs in an Arrow StructArray +/// +/// This provides efficient, zero-copy access to raster data stored in Arrow format. +/// Each iteration yields a `RasterRefImpl` that provides access to both metadata and band data. +pub struct RasterStructIterator<'a> { + raster_array: &'a StructArray, + current_row: usize, +} - // Direct access to BinaryBuilder - can write massive amounts of data - let binary_builder = band_writer.data_writer(); +impl<'a> RasterStructIterator<'a> { + /// Create a new iterator over the raster struct array + pub fn new(raster_array: &'a StructArray) -> Self { + Self { + raster_array, + current_row: 0, + } + } - // Reserve space for large raster (100MB) - let raster_size = 10000 * 10000; - binary_builder.reserve_exact(raster_size); + /// Get the total number of rasters in the array + pub fn len(&self) -> usize { + self.raster_array.len() + } - // In real usage, this could be streaming from GDAL or reading from disk - // Write the entire raster in one operation - let large_raster_data = vec![42u8; raster_size]; - binary_builder.append_value(&large_raster_data); + /// Check if the array is empty + pub fn is_empty(&self) -> bool { + self.raster_array.is_empty() + } - band_writer - .finish_band(BandMetadata { - nodata_value: None, - storage_type: StorageType::OutDbRef, - datatype: BandDataType::UInt8, - }) - .unwrap(); + /// Get a specific raster by index without consuming the iterator + pub fn get(&self, index: usize) -> Option> { + if index >= self.raster_array.len() { + return None; } - builder.finish_band().unwrap(); - builder.finish_raster().unwrap(); + Some(RasterRefImpl::new(self.raster_array, index)) + } +} - let result = builder.finish().unwrap(); - assert_eq!(result.len(), 1); +impl<'a> Iterator for RasterStructIterator<'a> { + type Item = RasterRefImpl<'a>; + + fn next(&mut self) -> Option { + if self.current_row < self.raster_array.len() { + let result = self.get(self.current_row)?; + self.current_row += 1; + Some(result) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = self.raster_array.len().saturating_sub(self.current_row); + (remaining, Some(remaining)) } +} + +impl ExactSizeIterator for RasterStructIterator<'_> {} + +/// Convenience constructor function for creating a raster iterator +pub fn raster_iterator(raster_struct: &StructArray) -> RasterStructIterator<'_> { + RasterStructIterator::new(raster_struct) +} +/// Example usage demonstrating zero-copy metadata transfer between raster arrays +/// +/// This shows how to use the iterator to read raster metadata and pass it directly +/// to a new builder without any data copying or conversion. +/// +/// ```rust,ignore +/// use sedona_raster::raster::*; +/// +/// // Build a source raster +/// let mut source_builder = RasterBuilder::new(10); +/// let metadata = RasterMetadata { /* ... */ }; +/// source_builder.start_raster(&metadata).unwrap(); +/// // ... add bands ... +/// let source_array = source_builder.finish().unwrap(); +/// +/// // Copy metadata to new raster (zero-copy!) +/// let mut target_builder = RasterBuilder::new(10); +/// let iterator = raster_iterator(&source_array); +/// let source_raster = iterator.get(0).unwrap(); +/// +/// // Pass metadata reference directly - no allocation or copying! +/// target_builder.start_raster(source_raster.metadata()).unwrap(); +/// // ... add new bands with same geospatial parameters ... +/// ``` +pub fn example_zero_copy_metadata_transfer() { + // This is a documentation function showing the API usage pattern +} + +#[cfg(test)] +mod iterator_tests { + use super::*; + #[test] - fn test_raster_metadata_struct() { + fn test_iterator_basic_functionality() { + // Create a simple raster for testing using the correct API + let mut builder = RasterBuilder::new(10); // capacity + let metadata = RasterMetadata { - width: 256, - height: 256, - upperleft_x: -180.0, - upperleft_y: 90.0, - scale_x: 0.5, - scale_y: -0.5, - skew_x: 0.1, - skew_y: 0.1, + width: 10, + height: 10, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, bounding_box: Some(BoundingBox { - min_x: -180.0, - min_y: -90.0, - max_x: 180.0, - max_y: 90.0, + min_x: 0.0, + min_y: -10.0, + max_x: 10.0, + max_y: 0.0, }), }; - - assert_eq!(metadata.width, 256); - assert_eq!(metadata.height, 256); - assert!(metadata.bounding_box.is_some()); - } - - #[test] - fn test_band_metadata_struct() { + + builder.start_raster(&metadata).unwrap(); + let band_metadata = BandMetadata { - nodata_value: Some(vec![0, 0]), + nodata_value: Some(vec![255u8]), storage_type: StorageType::InDb, - datatype: BandDataType::UInt16, + datatype: BandDataType::UInt8, }; - - assert!(band_metadata.nodata_value.is_some()); - assert_eq!(band_metadata.storage_type as u16, 0); - assert_eq!(band_metadata.datatype as u16, 1); + + // Add a single band with some test data using the correct API + let test_data = vec![1u8; 100]; // 10x10 raster with value 1 + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); + builder.finish_raster(); + + let raster_array = builder.finish().unwrap(); + + // Test the iterator + let mut iterator = raster_iterator(&raster_array); + + assert_eq!(iterator.len(), 1); + assert!(!iterator.is_empty()); + + let raster = iterator.next().unwrap(); + let metadata = raster.metadata(); + + assert_eq!(metadata.width(), 10); + assert_eq!(metadata.height(), 10); + assert_eq!(metadata.scale_x(), 1.0); + assert_eq!(metadata.scale_y(), -1.0); + + let bbox = metadata.bounding_box().unwrap(); + assert_eq!(bbox.min_x, 0.0); + assert_eq!(bbox.max_x, 10.0); + + let bands = raster.bands(); + assert_eq!(bands.len(), 1); + assert!(!bands.is_empty()); + + let band = bands.band(0).unwrap(); + assert_eq!(band.data().len(), 100); + assert_eq!(band.data()[0], 1u8); + + let band_meta = band.metadata(); + assert_eq!(band_meta.storage_type(), StorageType::InDb); + assert_eq!(band_meta.data_type(), BandDataType::UInt8); + + // Test iterator over bands + let band_iter: Vec<_> = bands.iter().collect(); + assert_eq!(band_iter.len(), 1); } - + #[test] - fn test_multiple_bands_zero_copy() { - let mut builder = RasterBuilder::new(1); - + fn test_multi_band_iterator() { + let mut builder = RasterBuilder::new(10); + let metadata = RasterMetadata { - width: 10, - height: 10, + width: 5, + height: 5, upperleft_x: 0.0, upperleft_y: 0.0, scale_x: 1.0, - scale_y: 1.0, + scale_y: -1.0, skew_x: 0.0, skew_y: 0.0, bounding_box: None, }; - - builder.start_raster(metadata).unwrap(); - - // First band - { - let mut band_writer = builder.start_band(); - band_writer.data_writer().append_value(&[1, 2, 3]); - band_writer - .finish_band(BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - }) - .unwrap(); + + builder.start_raster(&metadata).unwrap(); + + // Add three bands using the correct API + for band_idx in 0..3 { + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }; + + let test_data = vec![band_idx as u8; 25]; // 5x5 raster + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); } - builder.finish_band().unwrap(); - - // Second band - { - let mut band_writer = builder.start_band(); - band_writer.data_writer().append_value(&[4, 5, 6]); - band_writer - .finish_band(BandMetadata { - nodata_value: Some(vec![255]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - }) - .unwrap(); + + builder.finish_raster(); + let raster_array = builder.finish().unwrap(); + + let mut iterator = raster_iterator(&raster_array); + let raster = iterator.next().unwrap(); + let bands = raster.bands(); + + assert_eq!(bands.len(), 3); + + // Test each band has different data + for i in 0..3 { + let band = bands.band(i).unwrap(); + let expected_value = i as u8; + assert!(band.data().iter().all(|&x| x == expected_value)); } - builder.finish_band().unwrap(); - - builder.finish_raster().unwrap(); - - let result = builder.finish().unwrap(); - assert_eq!(result.len(), 1); + + // Test iterator + let band_values: Vec = bands + .iter() + .enumerate() + .map(|(i, band)| { + assert_eq!(band.data()[0], i as u8); + band.data()[0] + }) + .collect(); + + assert_eq!(band_values, vec![0, 1, 2]); + } + + #[test] + fn test_copy_metadata_from_iterator() { + // Create an original raster + let mut source_builder = RasterBuilder::new(10); + + let original_metadata = RasterMetadata { + width: 42, + height: 24, + upperleft_x: -122.0, + upperleft_y: 37.8, + scale_x: 0.1, + scale_y: -0.1, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: Some(BoundingBox { + min_x: -122.0, + min_y: 35.4, + max_x: -120.0, + max_y: 37.8, + }), + }; + + source_builder.start_raster(&original_metadata).unwrap(); + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }; + + let test_data = vec![42u8; 1008]; // 42x24 raster + source_builder.band_data_writer().append_value(&test_data); + source_builder.finish_band(band_metadata).unwrap(); + source_builder.finish_raster().unwrap(); + + let source_array = source_builder.finish().unwrap(); + + // Now create a new raster using metadata from the iterator - this is the key feature! + let mut target_builder = RasterBuilder::new(10); + let iterator = raster_iterator(&source_array); + let source_raster = iterator.get(0).unwrap(); + + // Use metadata directly from the iterator (zero-copy!) + target_builder.start_raster(source_raster.metadata()).unwrap(); + + // Add new band data while preserving original metadata + let new_band_metadata = BandMetadata { + nodata_value: None, + storage_type: StorageType::InDb, + datatype: BandDataType::UInt16, + }; + + let new_data = vec![100u16; 1008]; // Different data, same dimensions + let new_data_bytes: Vec = new_data.iter() + .flat_map(|&x| x.to_le_bytes()) + .collect(); + + target_builder.band_data_writer().append_value(&new_data_bytes); + target_builder.finish_band(new_band_metadata).unwrap(); + target_builder.finish_raster().unwrap(); + + let target_array = target_builder.finish().unwrap(); + + // Verify the metadata was copied correctly + let target_iterator = raster_iterator(&target_array); + let target_raster = target_iterator.get(0).unwrap(); + let target_metadata = target_raster.metadata(); + + // All metadata should match the original + assert_eq!(target_metadata.width(), 42); + assert_eq!(target_metadata.height(), 24); + assert_eq!(target_metadata.upper_left_x(), -122.0); + assert_eq!(target_metadata.upper_left_y(), 37.8); + assert_eq!(target_metadata.scale_x(), 0.1); + assert_eq!(target_metadata.scale_y(), -0.1); + + let target_bbox = target_metadata.bounding_box().unwrap(); + assert_eq!(target_bbox.min_x, -122.0); + assert_eq!(target_bbox.max_x, -120.0); + + // But band data and metadata should be different + let target_band = target_raster.bands().band(0).unwrap(); + let target_band_meta = target_band.metadata(); + assert_eq!(target_band_meta.data_type(), BandDataType::UInt16); + assert!(target_band_meta.nodata_value().is_none()); + assert_eq!(target_band.data().len(), 2016); // 1008 * 2 bytes per u16 } - // Existing schema tests... #[test] - fn test_raster_schema_fields() { - let fields = RasterSchema::fields(); - assert_eq!(fields.len(), 2); + fn test_random_access() { + let mut builder = RasterBuilder::new(10); + + // Add multiple rasters + for raster_idx in 0..3 { + let metadata = RasterMetadata { + width: raster_idx as u64 + 1, + height: raster_idx as u64 + 1, + upperleft_x: raster_idx as f64, + upperleft_y: raster_idx as f64, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata).unwrap(); + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }; + + let size = (raster_idx + 1) * (raster_idx + 1); + let test_data = vec![raster_idx as u8; size]; + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); + builder.finish_raster(); + } + + let raster_array = builder.finish().unwrap(); + let iterator = raster_iterator(&raster_array); + + assert_eq!(iterator.len(), 3); + + // Test random access + let raster_2 = iterator.get(2).unwrap(); + assert_eq!(raster_2.metadata().width(), 3); + assert_eq!(raster_2.metadata().height(), 3); + assert_eq!(raster_2.metadata().upper_left_x(), 2.0); + + let band = raster_2.bands().band(0).unwrap(); + assert_eq!(band.data().len(), 9); + assert!(band.data().iter().all(|&x| x == 2u8)); + + // Test out of bounds + assert!(iterator.get(10).is_none()); + } +} - // Check metadata field - let metadata_field = &fields[0]; - assert_eq!(metadata_field.name(), "metadata"); - assert!(!metadata_field.is_nullable()); - // Check bands field - let bands_field = &fields[1]; - assert_eq!(bands_field.name(), "bands"); - assert!(bands_field.is_nullable()); - } + +/// Metadata for a raster +#[derive(Debug, Clone)] +pub struct RasterMetadata { + pub width: u64, + pub height: u64, + pub upperleft_x: f64, + pub upperleft_y: f64, + pub scale_x: f64, + pub scale_y: f64, + pub skew_x: f64, + pub skew_y: f64, + pub bounding_box: Option, +} + +/// Bounding box coordinates +#[derive(Debug, Clone)] +pub struct BoundingBox { + pub min_x: f64, + pub min_y: f64, + pub max_x: f64, + pub max_y: f64, +} + +/// Metadata for a single band +#[derive(Debug, Clone)] +pub struct BandMetadata { + pub nodata_value: Option>, + pub storage_type: StorageType, + pub datatype: BandDataType, } + + From 87e131b0d3fa01f3a11d98f690dc328e5317d01d Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:41:33 -0700 Subject: [PATCH 04/18] ran fmt --- rust/sedona-raster/src/raster.rs | 350 +++++++++++++++---------------- 1 file changed, 173 insertions(+), 177 deletions(-) diff --git a/rust/sedona-raster/src/raster.rs b/rust/sedona-raster/src/raster.rs index 268f6838..1c26e8e0 100644 --- a/rust/sedona-raster/src/raster.rs +++ b/rust/sedona-raster/src/raster.rs @@ -1,43 +1,14 @@ -use arrow::array::{Array, ArrayRef, BinaryArray, BinaryBuilder, ListArray, ListBuilder, StructArray, StructBuilder, UInt32Array, UInt64Array}; +use arrow::array::{ + Array, ArrayRef, BinaryArray, BinaryBuilder, ListArray, ListBuilder, StructArray, + StructBuilder, UInt32Array, UInt64Array, +}; use arrow::buffer::MutableBuffer; use arrow::datatypes::{DataType, Field, FieldRef, Fields}; use arrow::error::ArrowError; use std::sync::Arc; -#[repr(u16)] -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum BandDataType { - UInt8 = 0, - UInt16 = 1, - Int16 = 2, - UInt32 = 3, - Int32 = 4, - Float32 = 5, - Float64 = 6, - // Consider support for complex types for scientific data -} - -/// Storage strategy for raster band data within Apache Arrow arrays. -/// -/// This enum defines how raster data is physically stored and accessed: -/// -/// **InDb**: Raster data is embedded directly in the Arrow array as binary blobs. -/// - Self-contained, no external dependencies, fast access for small-medium rasters -/// - Increases Arrow array size, memory usage grows and copy times increase with raster size -/// - Best for: Tiles, thumbnails, processed results, small rasters (<10MB per band) -/// -/// **OutDbRef**: Raster data is stored externally with references in the Arrow array. -/// - Keeps Arrow arrays lightweight, supports massive rasters, enables lazy loading -/// - Requires external storage management, potential for broken references -/// - Best for: Large satellite imagery, time series data, cloud-native workflows -/// - Supported backends: S3, GCS, Azure Blob, local filesystem, HTTP endpoints -#[repr(u16)] -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum StorageType { - InDb = 0, - OutDbRef = 1, -} - +/// Creates a schema for storing raster data in Apache Arrow format. +/// Utilizing nested structs and lists to represent raster metadata and bands. pub struct RasterSchema; impl RasterSchema { @@ -109,6 +80,40 @@ impl RasterSchema { } } +#[repr(u16)] +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum BandDataType { + UInt8 = 0, + UInt16 = 1, + Int16 = 2, + UInt32 = 3, + Int32 = 4, + Float32 = 5, + Float64 = 6, + // Consider support for complex types for scientific data +} + +/// Storage strategy for raster band data within Apache Arrow arrays. +/// +/// This enum defines how raster data is physically stored and accessed: +/// +/// **InDb**: Raster data is embedded directly in the Arrow array as binary blobs. +/// - Self-contained, no external dependencies, fast access for small-medium rasters +/// - Increases Arrow array size, memory usage grows and copy times increase with raster size +/// - Best for: Tiles, thumbnails, processed results, small rasters (<10MB per band) +/// +/// **OutDbRef**: Raster data is stored externally with references in the Arrow array. +/// - Keeps Arrow arrays lightweight, supports massive rasters, enables lazy loading +/// - Requires external storage management, potential for broken references +/// - Best for: Large satellite imagery, time series data, cloud-native workflows +/// - Supported backends: S3, GCS, Azure Blob, local filesystem, HTTP endpoints +#[repr(u16)] +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum StorageType { + InDb = 0, + OutDbRef = 1, +} + pub mod column { pub const METADATA: &str = "metadata"; pub const BANDS: &str = "bands"; @@ -162,13 +167,12 @@ impl RasterBuilder { }, 0, // Initial capacity for bands ); - - let bands_builder = ListBuilder::new(band_struct_builder) - .with_field(Field::new( - column::BAND, - RasterSchema::band_type(), - false, - )); + + let bands_builder = ListBuilder::new(band_struct_builder).with_field(Field::new( + column::BAND, + RasterSchema::band_type(), + false, + )); Self { metadata_builder, @@ -177,7 +181,7 @@ impl RasterBuilder { } /// Start a new raster and write its metadata - /// + /// /// Accepts any type that implements MetadataRef, allowing you to pass: /// - RasterMetadata structs directly /// - MetadataRef trait objects from iterators @@ -434,9 +438,8 @@ impl RasterBuilder { } } - /// Iterator and accessor traits for reading raster data from Arrow arrays. -/// +/// /// These traits provide a zero-copy interface for accessing raster metadata and band data /// from the Arrow-based storage format. The implementation handles both InDb and OutDbRef /// storage types seamlessly. @@ -465,15 +468,33 @@ pub trait MetadataRef { /// Implement MetadataRef for RasterMetadata to allow direct use with builder impl MetadataRef for RasterMetadata { - fn width(&self) -> u64 { self.width } - fn height(&self) -> u64 { self.height } - fn upper_left_x(&self) -> f64 { self.upperleft_x } - fn upper_left_y(&self) -> f64 { self.upperleft_y } - fn scale_x(&self) -> f64 { self.scale_x } - fn scale_y(&self) -> f64 { self.scale_y } - fn skew_x(&self) -> f64 { self.skew_x } - fn skew_y(&self) -> f64 { self.skew_y } - fn bounding_box(&self) -> Option { self.bounding_box.clone() } + fn width(&self) -> u64 { + self.width + } + fn height(&self) -> u64 { + self.height + } + fn upper_left_x(&self) -> f64 { + self.upperleft_x + } + fn upper_left_y(&self) -> f64 { + self.upperleft_y + } + fn scale_x(&self) -> f64 { + self.scale_x + } + fn scale_y(&self) -> f64 { + self.scale_y + } + fn skew_x(&self) -> f64 { + self.skew_x + } + fn skew_y(&self) -> f64 { + self.skew_y + } + fn bounding_box(&self) -> Option { + self.bounding_box.clone() + } } /// Trait for accessing individual band metadata @@ -499,7 +520,9 @@ pub trait BandsRef { /// Number of bands in the raster fn len(&self) -> usize; /// Check if no bands are present - fn is_empty(&self) -> bool { self.len() == 0 } + fn is_empty(&self) -> bool { + self.len() == 0 + } /// Get a specific band by index (returns None if out of bounds) fn band(&self, index: usize) -> Option>; /// Iterator over all bands @@ -683,7 +706,7 @@ impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { .as_any() .downcast_ref::() .expect("Expected UInt32Array for datatype"); - + match datatype_array.value(self.band_index) { 0 => BandDataType::UInt8, 1 => BandDataType::UInt16, @@ -692,7 +715,10 @@ impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { 4 => BandDataType::Int32, 5 => BandDataType::Float32, 6 => BandDataType::Float64, - _ => panic!("Unknown band data type: {}", datatype_array.value(self.band_index)), + _ => panic!( + "Unknown band data type: {}", + datatype_array.value(self.band_index) + ), } } } @@ -818,7 +844,7 @@ impl<'a> RasterRefImpl<'a> { .unwrap(), index: raster_index, }; - + let bands = BandsRefImpl { bands_list: raster_struct .column_by_name(column::BANDS) @@ -828,7 +854,7 @@ impl<'a> RasterRefImpl<'a> { .unwrap(), raster_index, }; - + Self { metadata, bands } } } @@ -844,7 +870,7 @@ impl<'a> RasterRef for RasterRefImpl<'a> { } /// Iterator over raster structs in an Arrow StructArray -/// +/// /// This provides efficient, zero-copy access to raster data stored in Arrow format. /// Each iteration yields a `RasterRefImpl` that provides access to both metadata and band data. pub struct RasterStructIterator<'a> { @@ -907,43 +933,46 @@ pub fn raster_iterator(raster_struct: &StructArray) -> RasterStructIterator<'_> RasterStructIterator::new(raster_struct) } -/// Example usage demonstrating zero-copy metadata transfer between raster arrays -/// -/// This shows how to use the iterator to read raster metadata and pass it directly -/// to a new builder without any data copying or conversion. -/// -/// ```rust,ignore -/// use sedona_raster::raster::*; -/// -/// // Build a source raster -/// let mut source_builder = RasterBuilder::new(10); -/// let metadata = RasterMetadata { /* ... */ }; -/// source_builder.start_raster(&metadata).unwrap(); -/// // ... add bands ... -/// let source_array = source_builder.finish().unwrap(); -/// -/// // Copy metadata to new raster (zero-copy!) -/// let mut target_builder = RasterBuilder::new(10); -/// let iterator = raster_iterator(&source_array); -/// let source_raster = iterator.get(0).unwrap(); -/// -/// // Pass metadata reference directly - no allocation or copying! -/// target_builder.start_raster(source_raster.metadata()).unwrap(); -/// // ... add new bands with same geospatial parameters ... -/// ``` -pub fn example_zero_copy_metadata_transfer() { - // This is a documentation function showing the API usage pattern +/// Metadata for a raster +#[derive(Debug, Clone)] +pub struct RasterMetadata { + pub width: u64, + pub height: u64, + pub upperleft_x: f64, + pub upperleft_y: f64, + pub scale_x: f64, + pub scale_y: f64, + pub skew_x: f64, + pub skew_y: f64, + pub bounding_box: Option, +} + +/// Bounding box coordinates +#[derive(Debug, Clone)] +pub struct BoundingBox { + pub min_x: f64, + pub min_y: f64, + pub max_x: f64, + pub max_y: f64, +} + +/// Metadata for a single band +#[derive(Debug, Clone)] +pub struct BandMetadata { + pub nodata_value: Option>, + pub storage_type: StorageType, + pub datatype: BandDataType, } #[cfg(test)] mod iterator_tests { use super::*; - + #[test] fn test_iterator_basic_functionality() { // Create a simple raster for testing using the correct API let mut builder = RasterBuilder::new(10); // capacity - + let metadata = RasterMetadata { width: 10, height: 10, @@ -960,62 +989,62 @@ mod iterator_tests { max_y: 0.0, }), }; - + builder.start_raster(&metadata).unwrap(); - + let band_metadata = BandMetadata { nodata_value: Some(vec![255u8]), storage_type: StorageType::InDb, datatype: BandDataType::UInt8, }; - + // Add a single band with some test data using the correct API let test_data = vec![1u8; 100]; // 10x10 raster with value 1 builder.band_data_writer().append_value(&test_data); builder.finish_band(band_metadata).unwrap(); builder.finish_raster(); - + let raster_array = builder.finish().unwrap(); - + // Test the iterator let mut iterator = raster_iterator(&raster_array); - + assert_eq!(iterator.len(), 1); assert!(!iterator.is_empty()); - + let raster = iterator.next().unwrap(); let metadata = raster.metadata(); - + assert_eq!(metadata.width(), 10); assert_eq!(metadata.height(), 10); assert_eq!(metadata.scale_x(), 1.0); assert_eq!(metadata.scale_y(), -1.0); - + let bbox = metadata.bounding_box().unwrap(); assert_eq!(bbox.min_x, 0.0); assert_eq!(bbox.max_x, 10.0); - + let bands = raster.bands(); assert_eq!(bands.len(), 1); assert!(!bands.is_empty()); - + let band = bands.band(0).unwrap(); assert_eq!(band.data().len(), 100); assert_eq!(band.data()[0], 1u8); - + let band_meta = band.metadata(); assert_eq!(band_meta.storage_type(), StorageType::InDb); assert_eq!(band_meta.data_type(), BandDataType::UInt8); - + // Test iterator over bands let band_iter: Vec<_> = bands.iter().collect(); assert_eq!(band_iter.len(), 1); } - + #[test] fn test_multi_band_iterator() { let mut builder = RasterBuilder::new(10); - + let metadata = RasterMetadata { width: 5, height: 5, @@ -1027,9 +1056,9 @@ mod iterator_tests { skew_y: 0.0, bounding_box: None, }; - + builder.start_raster(&metadata).unwrap(); - + // Add three bands using the correct API for band_idx in 0..3 { let band_metadata = BandMetadata { @@ -1037,28 +1066,28 @@ mod iterator_tests { storage_type: StorageType::InDb, datatype: BandDataType::UInt8, }; - + let test_data = vec![band_idx as u8; 25]; // 5x5 raster builder.band_data_writer().append_value(&test_data); builder.finish_band(band_metadata).unwrap(); } - + builder.finish_raster(); let raster_array = builder.finish().unwrap(); - + let mut iterator = raster_iterator(&raster_array); let raster = iterator.next().unwrap(); let bands = raster.bands(); - + assert_eq!(bands.len(), 3); - + // Test each band has different data for i in 0..3 { let band = bands.band(i).unwrap(); let expected_value = i as u8; assert!(band.data().iter().all(|&x| x == expected_value)); } - + // Test iterator let band_values: Vec = bands .iter() @@ -1068,15 +1097,15 @@ mod iterator_tests { band.data()[0] }) .collect(); - + assert_eq!(band_values, vec![0, 1, 2]); } - + #[test] fn test_copy_metadata_from_iterator() { // Create an original raster let mut source_builder = RasterBuilder::new(10); - + let original_metadata = RasterMetadata { width: 42, height: 24, @@ -1093,53 +1122,55 @@ mod iterator_tests { max_y: 37.8, }), }; - + source_builder.start_raster(&original_metadata).unwrap(); - + let band_metadata = BandMetadata { nodata_value: Some(vec![255u8]), storage_type: StorageType::InDb, datatype: BandDataType::UInt8, }; - + let test_data = vec![42u8; 1008]; // 42x24 raster source_builder.band_data_writer().append_value(&test_data); source_builder.finish_band(band_metadata).unwrap(); source_builder.finish_raster().unwrap(); - + let source_array = source_builder.finish().unwrap(); - + // Now create a new raster using metadata from the iterator - this is the key feature! let mut target_builder = RasterBuilder::new(10); let iterator = raster_iterator(&source_array); let source_raster = iterator.get(0).unwrap(); - + // Use metadata directly from the iterator (zero-copy!) - target_builder.start_raster(source_raster.metadata()).unwrap(); - + target_builder + .start_raster(source_raster.metadata()) + .unwrap(); + // Add new band data while preserving original metadata let new_band_metadata = BandMetadata { nodata_value: None, storage_type: StorageType::InDb, datatype: BandDataType::UInt16, }; - + let new_data = vec![100u16; 1008]; // Different data, same dimensions - let new_data_bytes: Vec = new_data.iter() - .flat_map(|&x| x.to_le_bytes()) - .collect(); - - target_builder.band_data_writer().append_value(&new_data_bytes); + let new_data_bytes: Vec = new_data.iter().flat_map(|&x| x.to_le_bytes()).collect(); + + target_builder + .band_data_writer() + .append_value(&new_data_bytes); target_builder.finish_band(new_band_metadata).unwrap(); target_builder.finish_raster().unwrap(); - + let target_array = target_builder.finish().unwrap(); - + // Verify the metadata was copied correctly let target_iterator = raster_iterator(&target_array); let target_raster = target_iterator.get(0).unwrap(); let target_metadata = target_raster.metadata(); - + // All metadata should match the original assert_eq!(target_metadata.width(), 42); assert_eq!(target_metadata.height(), 24); @@ -1147,11 +1178,11 @@ mod iterator_tests { assert_eq!(target_metadata.upper_left_y(), 37.8); assert_eq!(target_metadata.scale_x(), 0.1); assert_eq!(target_metadata.scale_y(), -0.1); - + let target_bbox = target_metadata.bounding_box().unwrap(); assert_eq!(target_bbox.min_x, -122.0); assert_eq!(target_bbox.max_x, -120.0); - + // But band data and metadata should be different let target_band = target_raster.bands().band(0).unwrap(); let target_band_meta = target_band.metadata(); @@ -1163,7 +1194,7 @@ mod iterator_tests { #[test] fn test_random_access() { let mut builder = RasterBuilder::new(10); - + // Add multiple rasters for raster_idx in 0..3 { let metadata = RasterMetadata { @@ -1177,73 +1208,38 @@ mod iterator_tests { skew_y: 0.0, bounding_box: None, }; - + builder.start_raster(&metadata).unwrap(); - + let band_metadata = BandMetadata { nodata_value: Some(vec![255u8]), storage_type: StorageType::InDb, datatype: BandDataType::UInt8, }; - + let size = (raster_idx + 1) * (raster_idx + 1); let test_data = vec![raster_idx as u8; size]; builder.band_data_writer().append_value(&test_data); builder.finish_band(band_metadata).unwrap(); builder.finish_raster(); } - + let raster_array = builder.finish().unwrap(); let iterator = raster_iterator(&raster_array); - + assert_eq!(iterator.len(), 3); - + // Test random access let raster_2 = iterator.get(2).unwrap(); assert_eq!(raster_2.metadata().width(), 3); assert_eq!(raster_2.metadata().height(), 3); assert_eq!(raster_2.metadata().upper_left_x(), 2.0); - + let band = raster_2.bands().band(0).unwrap(); assert_eq!(band.data().len(), 9); assert!(band.data().iter().all(|&x| x == 2u8)); - + // Test out of bounds assert!(iterator.get(10).is_none()); } } - - - -/// Metadata for a raster -#[derive(Debug, Clone)] -pub struct RasterMetadata { - pub width: u64, - pub height: u64, - pub upperleft_x: f64, - pub upperleft_y: f64, - pub scale_x: f64, - pub scale_y: f64, - pub skew_x: f64, - pub skew_y: f64, - pub bounding_box: Option, -} - -/// Bounding box coordinates -#[derive(Debug, Clone)] -pub struct BoundingBox { - pub min_x: f64, - pub min_y: f64, - pub max_x: f64, - pub max_y: f64, -} - -/// Metadata for a single band -#[derive(Debug, Clone)] -pub struct BandMetadata { - pub nodata_value: Option>, - pub storage_type: StorageType, - pub datatype: BandDataType, -} - - From 510182935dd11a62ab34e50000a9963b67e2fbf1 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Thu, 16 Oct 2025 14:21:14 -0700 Subject: [PATCH 05/18] switch to indexes for column refs --- rust/sedona-raster/src/raster.rs | 276 ++++++++++++++++++++++++++----- 1 file changed, 232 insertions(+), 44 deletions(-) diff --git a/rust/sedona-raster/src/raster.rs b/rust/sedona-raster/src/raster.rs index 1c26e8e0..74fc7438 100644 --- a/rust/sedona-raster/src/raster.rs +++ b/rust/sedona-raster/src/raster.rs @@ -114,7 +114,7 @@ pub enum StorageType { OutDbRef = 1, } -pub mod column { +mod column { pub const METADATA: &str = "metadata"; pub const BANDS: &str = "bands"; pub const BAND: &str = "band"; @@ -143,6 +143,43 @@ pub mod column { pub const DATATYPE: &str = "data_type"; } +/// Hard-coded column indices for maximum performance +/// These must match the exact order defined in RasterSchema::metadata_type() +mod metadata_indices { + pub const WIDTH: usize = 0; + pub const HEIGHT: usize = 1; + pub const UPPERLEFT_X: usize = 2; + pub const UPPERLEFT_Y: usize = 3; + pub const SCALE_X: usize = 4; + pub const SCALE_Y: usize = 5; + pub const SKEW_X: usize = 6; + pub const SKEW_Y: usize = 7; + pub const BOUNDING_BOX: usize = 8; +} + +mod bounding_box_indices { + pub const MIN_X: usize = 0; + pub const MIN_Y: usize = 1; + pub const MAX_X: usize = 2; + pub const MAX_Y: usize = 3; +} + +mod band_metadata_indices { + pub const NODATAVALUE: usize = 0; + pub const STORAGE_TYPE: usize = 1; + pub const DATATYPE: usize = 2; +} + +mod band_indices { + pub const METADATA: usize = 0; + pub const DATA: usize = 1; +} + +mod raster_indices { + pub const METADATA: usize = 0; + pub const BANDS: usize = 1; +} + /// Builder for constructing raster arrays with zero-copy band data writing pub struct RasterBuilder { metadata_builder: StructBuilder, @@ -546,8 +583,7 @@ struct MetadataRefImpl<'a> { impl<'a> MetadataRef for MetadataRefImpl<'a> { fn width(&self) -> u64 { self.metadata_struct - .column_by_name(column::WIDTH) - .unwrap() + .column(metadata_indices::WIDTH) .as_any() .downcast_ref::() .unwrap() @@ -556,8 +592,7 @@ impl<'a> MetadataRef for MetadataRefImpl<'a> { fn height(&self) -> u64 { self.metadata_struct - .column_by_name(column::HEIGHT) - .unwrap() + .column(metadata_indices::HEIGHT) .as_any() .downcast_ref::() .unwrap() @@ -566,8 +601,7 @@ impl<'a> MetadataRef for MetadataRefImpl<'a> { fn upper_left_x(&self) -> f64 { self.metadata_struct - .column_by_name(column::UPPERLEFT_X) - .unwrap() + .column(metadata_indices::UPPERLEFT_X) .as_any() .downcast_ref::() .unwrap() @@ -576,8 +610,7 @@ impl<'a> MetadataRef for MetadataRefImpl<'a> { fn upper_left_y(&self) -> f64 { self.metadata_struct - .column_by_name(column::UPPERLEFT_Y) - .unwrap() + .column(metadata_indices::UPPERLEFT_Y) .as_any() .downcast_ref::() .unwrap() @@ -586,8 +619,7 @@ impl<'a> MetadataRef for MetadataRefImpl<'a> { fn scale_x(&self) -> f64 { self.metadata_struct - .column_by_name(column::SCALE_X) - .unwrap() + .column(metadata_indices::SCALE_X) .as_any() .downcast_ref::() .unwrap() @@ -596,8 +628,7 @@ impl<'a> MetadataRef for MetadataRefImpl<'a> { fn scale_y(&self) -> f64 { self.metadata_struct - .column_by_name(column::SCALE_Y) - .unwrap() + .column(metadata_indices::SCALE_Y) .as_any() .downcast_ref::() .unwrap() @@ -606,8 +637,7 @@ impl<'a> MetadataRef for MetadataRefImpl<'a> { fn skew_x(&self) -> f64 { self.metadata_struct - .column_by_name(column::SKEW_X) - .unwrap() + .column(metadata_indices::SKEW_X) .as_any() .downcast_ref::() .unwrap() @@ -616,8 +646,7 @@ impl<'a> MetadataRef for MetadataRefImpl<'a> { fn skew_y(&self) -> f64 { self.metadata_struct - .column_by_name(column::SKEW_Y) - .unwrap() + .column(metadata_indices::SKEW_Y) .as_any() .downcast_ref::() .unwrap() @@ -626,26 +655,30 @@ impl<'a> MetadataRef for MetadataRefImpl<'a> { fn bounding_box(&self) -> Option { // Try to get bounding box if present in schema - if let Some(bbox_column) = self.metadata_struct.column_by_name(column::BOUNDING_BOX) { - let bbox_struct = bbox_column.as_any().downcast_ref::()?; + if let Some(bbox_struct) = self + .metadata_struct + .column(metadata_indices::BOUNDING_BOX) + .as_any() + .downcast_ref::() + { Some(BoundingBox { min_x: bbox_struct - .column_by_name(column::MIN_X)? + .column(bounding_box_indices::MIN_X) .as_any() .downcast_ref::()? .value(self.index), min_y: bbox_struct - .column_by_name(column::MIN_Y)? + .column(bounding_box_indices::MIN_Y) .as_any() .downcast_ref::()? .value(self.index), max_x: bbox_struct - .column_by_name(column::MAX_X)? + .column(bounding_box_indices::MAX_X) .as_any() .downcast_ref::()? .value(self.index), max_y: bbox_struct - .column_by_name(column::MAX_Y)? + .column(bounding_box_indices::MAX_Y) .as_any() .downcast_ref::()? .value(self.index), @@ -666,8 +699,7 @@ impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { fn nodata_value(&self) -> Option<&[u8]> { let nodata_array = self .metadata_struct - .column_by_name(column::NODATAVALUE) - .unwrap() + .column(band_metadata_indices::NODATAVALUE) .as_any() .downcast_ref::() .expect("Expected BinaryArray for nodata"); @@ -682,8 +714,7 @@ impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { fn storage_type(&self) -> StorageType { let storage_type_array = self .metadata_struct - .column_by_name(column::STORAGE_TYPE) - .unwrap() + .column(band_metadata_indices::STORAGE_TYPE) .as_any() .downcast_ref::() .expect("Expected UInt32Array for storage_type"); @@ -701,8 +732,7 @@ impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { fn data_type(&self) -> BandDataType { let datatype_array = self .metadata_struct - .column_by_name(column::DATATYPE) - .unwrap() + .column(band_metadata_indices::DATATYPE) .as_any() .downcast_ref::() .expect("Expected UInt32Array for datatype"); @@ -768,7 +798,7 @@ impl<'a> BandsRef for BandsRefImpl<'a> { // Get the metadata substructure from the band struct let band_metadata_struct = bands_struct - .column_by_name(column::METADATA)? + .column(band_indices::METADATA) .as_any() .downcast_ref::()?; @@ -779,7 +809,7 @@ impl<'a> BandsRef for BandsRefImpl<'a> { // Get band data from the Binary column within the band struct let band_data_array = bands_struct - .column_by_name(column::DATA)? + .column(band_indices::DATA) .as_any() .downcast_ref::()?; @@ -833,25 +863,27 @@ pub struct RasterRefImpl<'a> { } impl<'a> RasterRefImpl<'a> { - /// Create a new RasterRefImpl from a struct array and index + /// Create a new RasterRefImpl from a struct array and index using hard-coded indices pub fn new(raster_struct: &'a StructArray, raster_index: usize) -> Self { + let metadata_struct = raster_struct + .column(raster_indices::METADATA) + .as_any() + .downcast_ref::() + .unwrap(); + + let bands_list = raster_struct + .column(raster_indices::BANDS) + .as_any() + .downcast_ref::() + .unwrap(); + let metadata = MetadataRefImpl { - metadata_struct: raster_struct - .column_by_name(column::METADATA) - .unwrap() - .as_any() - .downcast_ref::() - .unwrap(), + metadata_struct, index: raster_index, }; let bands = BandsRefImpl { - bands_list: raster_struct - .column_by_name(column::BANDS) - .unwrap() - .as_any() - .downcast_ref::() - .unwrap(), + bands_list, raster_index, }; @@ -1242,4 +1274,160 @@ mod iterator_tests { // Test out of bounds assert!(iterator.get(10).is_none()); } + + /// Comprehensive test to verify all hard-coded indices match the actual schema + /// This is critical - if this test fails, performance optimizations are broken! + #[test] + fn test_hardcoded_indices_match_schema() { + // Test raster-level indices + let raster_fields = RasterSchema::fields(); + assert_eq!(raster_fields.len(), 2, "Expected exactly 2 raster fields"); + assert_eq!( + raster_fields[raster_indices::METADATA].name(), + column::METADATA, + "Raster metadata index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::BANDS].name(), + column::BANDS, + "Raster bands index mismatch" + ); + + // Test metadata indices + let metadata_type = RasterSchema::metadata_type(); + if let DataType::Struct(metadata_fields) = metadata_type { + assert_eq!( + metadata_fields.len(), + 9, + "Expected exactly 9 metadata fields" + ); + assert_eq!( + metadata_fields[metadata_indices::WIDTH].name(), + column::WIDTH, + "Metadata width index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::HEIGHT].name(), + column::HEIGHT, + "Metadata height index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::UPPERLEFT_X].name(), + column::UPPERLEFT_X, + "Metadata upperleft_x index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::UPPERLEFT_Y].name(), + column::UPPERLEFT_Y, + "Metadata upperleft_y index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SCALE_X].name(), + column::SCALE_X, + "Metadata scale_x index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SCALE_Y].name(), + column::SCALE_Y, + "Metadata scale_y index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SKEW_X].name(), + column::SKEW_X, + "Metadata skew_x index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SKEW_Y].name(), + column::SKEW_Y, + "Metadata skew_y index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::BOUNDING_BOX].name(), + column::BOUNDING_BOX, + "Metadata bounding_box index mismatch" + ); + } else { + panic!("Expected Struct type for metadata"); + } + + // Test bounding box indices + let bbox_type = RasterSchema::bounding_box_type(); + if let DataType::Struct(bbox_fields) = bbox_type { + assert_eq!( + bbox_fields.len(), + 4, + "Expected exactly 4 bounding box fields" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MIN_X].name(), + column::MIN_X, + "Bounding box min_x index mismatch" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MIN_Y].name(), + column::MIN_Y, + "Bounding box min_y index mismatch" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MAX_X].name(), + column::MAX_X, + "Bounding box max_x index mismatch" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MAX_Y].name(), + column::MAX_Y, + "Bounding box max_y index mismatch" + ); + } else { + panic!("Expected Struct type for bounding box"); + } + + // Test band metadata indices + let band_metadata_type = RasterSchema::band_metadata_type(); + if let DataType::Struct(band_metadata_fields) = band_metadata_type { + assert_eq!( + band_metadata_fields.len(), + 3, + "Expected exactly 3 band metadata fields" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::NODATAVALUE].name(), + column::NODATAVALUE, + "Band metadata nodatavalue index mismatch" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::STORAGE_TYPE].name(), + column::STORAGE_TYPE, + "Band metadata storage_type index mismatch" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::DATATYPE].name(), + column::DATATYPE, + "Band metadata datatype index mismatch" + ); + } else { + panic!("Expected Struct type for band metadata"); + } + + // Test band indices + let band_type = RasterSchema::band_type(); + if let DataType::Struct(band_fields) = band_type { + assert_eq!(band_fields.len(), 2, "Expected exactly 2 band fields"); + assert_eq!( + band_fields[band_indices::METADATA].name(), + column::METADATA, + "Band metadata index mismatch" + ); + assert_eq!( + band_fields[band_indices::DATA].name(), + column::DATA, + "Band data index mismatch" + ); + } else { + panic!("Expected Struct type for band"); + } + + println!("✅ All hard-coded indices verified to match schema!"); + println!("🚀 Performance optimizations are correctly configured!"); + } } From a0df6830cfc1d150067251ecf764153dbfe7f3f8 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Thu, 16 Oct 2025 15:18:49 -0700 Subject: [PATCH 06/18] add crs to schema --- rust/sedona-raster/src/raster.rs | 230 ++++++++++++++++++------------- 1 file changed, 133 insertions(+), 97 deletions(-) diff --git a/rust/sedona-raster/src/raster.rs b/rust/sedona-raster/src/raster.rs index 74fc7438..423a3055 100644 --- a/rust/sedona-raster/src/raster.rs +++ b/rust/sedona-raster/src/raster.rs @@ -1,6 +1,6 @@ use arrow::array::{ - Array, ArrayRef, BinaryArray, BinaryBuilder, ListArray, ListBuilder, StructArray, - StructBuilder, UInt32Array, UInt64Array, + Array, ArrayRef, BinaryArray, BinaryBuilder, StringViewArray, ListArray, ListBuilder, + StructArray, StructBuilder, UInt32Array, UInt64Array, }; use arrow::buffer::MutableBuffer; use arrow::datatypes::{DataType, Field, FieldRef, Fields}; @@ -35,6 +35,8 @@ impl RasterSchema { Field::new(column::SKEW_Y, DataType::Float64, false), // Optional bounding box Field::new(column::BOUNDING_BOX, Self::bounding_box_type(), true), + // Optional coordinate reference system (CRS) as json + Field::new(column::CRS, DataType::Utf8View, true), ])) } @@ -114,71 +116,6 @@ pub enum StorageType { OutDbRef = 1, } -mod column { - pub const METADATA: &str = "metadata"; - pub const BANDS: &str = "bands"; - pub const BAND: &str = "band"; - pub const DATA: &str = "data"; - - // Raster metadata fields - pub const WIDTH: &str = "width"; - pub const HEIGHT: &str = "height"; - pub const UPPERLEFT_X: &str = "upperleft_x"; - pub const UPPERLEFT_Y: &str = "upperleft_y"; - pub const SCALE_X: &str = "scale_x"; - pub const SCALE_Y: &str = "scale_y"; - pub const SKEW_X: &str = "skew_x"; - pub const SKEW_Y: &str = "skew_y"; - pub const BOUNDING_BOX: &str = "bounding_box"; - - // Bounding box fields - pub const MIN_X: &str = "min_x"; - pub const MIN_Y: &str = "min_y"; - pub const MAX_X: &str = "max_x"; - pub const MAX_Y: &str = "max_y"; - - // Band metadata fields - pub const NODATAVALUE: &str = "nodata_value"; - pub const STORAGE_TYPE: &str = "storage_type"; - pub const DATATYPE: &str = "data_type"; -} - -/// Hard-coded column indices for maximum performance -/// These must match the exact order defined in RasterSchema::metadata_type() -mod metadata_indices { - pub const WIDTH: usize = 0; - pub const HEIGHT: usize = 1; - pub const UPPERLEFT_X: usize = 2; - pub const UPPERLEFT_Y: usize = 3; - pub const SCALE_X: usize = 4; - pub const SCALE_Y: usize = 5; - pub const SKEW_X: usize = 6; - pub const SKEW_Y: usize = 7; - pub const BOUNDING_BOX: usize = 8; -} - -mod bounding_box_indices { - pub const MIN_X: usize = 0; - pub const MIN_Y: usize = 1; - pub const MAX_X: usize = 2; - pub const MAX_Y: usize = 3; -} - -mod band_metadata_indices { - pub const NODATAVALUE: usize = 0; - pub const STORAGE_TYPE: usize = 1; - pub const DATATYPE: usize = 2; -} - -mod band_indices { - pub const METADATA: usize = 0; - pub const DATA: usize = 1; -} - -mod raster_indices { - pub const METADATA: usize = 0; - pub const BANDS: usize = 1; -} /// Builder for constructing raster arrays with zero-copy band data writing pub struct RasterBuilder { @@ -288,23 +225,23 @@ impl RasterBuilder { if let Some(nodata) = band_metadata.nodata_value { metadata_builder - .field_builder::(0) + .field_builder::(band_metadata_indices::NODATAVALUE) .unwrap() .append_value(&nodata); } else { metadata_builder - .field_builder::(0) + .field_builder::(band_metadata_indices::NODATAVALUE) .unwrap() .append_null(); } metadata_builder - .field_builder::(1) + .field_builder::(band_metadata_indices::STORAGE_TYPE) .unwrap() .append_value(band_metadata.storage_type as u32); metadata_builder - .field_builder::(2) + .field_builder::(band_metadata_indices::DATATYPE) .unwrap() .append_value(band_metadata.datatype as u32); @@ -325,44 +262,44 @@ impl RasterBuilder { fn append_metadata_from_ref(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { // Width self.metadata_builder - .field_builder::(0) + .field_builder::(metadata_indices::WIDTH) .unwrap() .append_value(metadata.width()); // Height self.metadata_builder - .field_builder::(1) + .field_builder::(metadata_indices::HEIGHT) .unwrap() .append_value(metadata.height()); // Geotransform parameters self.metadata_builder - .field_builder::(2) + .field_builder::(metadata_indices::UPPERLEFT_X) .unwrap() .append_value(metadata.upper_left_x()); self.metadata_builder - .field_builder::(3) + .field_builder::(metadata_indices::UPPERLEFT_Y) .unwrap() .append_value(metadata.upper_left_y()); self.metadata_builder - .field_builder::(4) + .field_builder::(metadata_indices::SCALE_X) .unwrap() .append_value(metadata.scale_x()); self.metadata_builder - .field_builder::(5) + .field_builder::(metadata_indices::SCALE_Y) .unwrap() .append_value(metadata.scale_y()); self.metadata_builder - .field_builder::(6) + .field_builder::(metadata_indices::SKEW_X) .unwrap() .append_value(metadata.skew_x()); self.metadata_builder - .field_builder::(7) + .field_builder::(metadata_indices::SKEW_Y) .unwrap() .append_value(metadata.skew_y()); @@ -370,26 +307,26 @@ impl RasterBuilder { if let Some(bbox) = metadata.bounding_box() { let bbox_builder = self .metadata_builder - .field_builder::(8) + .field_builder::(metadata_indices::BOUNDING_BOX) .unwrap(); bbox_builder - .field_builder::(0) + .field_builder::(bounding_box_indices::MIN_X) .unwrap() .append_value(bbox.min_x); bbox_builder - .field_builder::(1) + .field_builder::(bounding_box_indices::MIN_Y) .unwrap() .append_value(bbox.min_y); bbox_builder - .field_builder::(2) + .field_builder::(bounding_box_indices::MAX_X) .unwrap() .append_value(bbox.max_x); bbox_builder - .field_builder::(3) + .field_builder::(bounding_box_indices::MAX_Y) .unwrap() .append_value(bbox.max_y); @@ -398,26 +335,26 @@ impl RasterBuilder { // Append null bounding box - need to fill in null values for all fields let bbox_builder = self .metadata_builder - .field_builder::(8) + .field_builder::(metadata_indices::BOUNDING_BOX) .unwrap(); bbox_builder - .field_builder::(0) + .field_builder::(bounding_box_indices::MIN_X) .unwrap() .append_null(); bbox_builder - .field_builder::(1) + .field_builder::(bounding_box_indices::MIN_Y) .unwrap() .append_null(); bbox_builder - .field_builder::(2) + .field_builder::(bounding_box_indices::MAX_X) .unwrap() .append_null(); bbox_builder - .field_builder::(3) + .field_builder::(bounding_box_indices::MAX_Y) .unwrap() .append_null(); @@ -501,6 +438,8 @@ pub trait MetadataRef { fn skew_y(&self) -> f64; /// Optional bounding box (when available) fn bounding_box(&self) -> Option; + /// Optional coordinate reference system as binary data + fn crs(&self) -> Option<&str>; } /// Implement MetadataRef for RasterMetadata to allow direct use with builder @@ -532,15 +471,18 @@ impl MetadataRef for RasterMetadata { fn bounding_box(&self) -> Option { self.bounding_box.clone() } + fn crs(&self) -> Option<&str> { + self.crs.as_deref() + } } /// Trait for accessing individual band metadata pub trait BandMetadataRef { /// No-data value as raw bytes (None if null) fn nodata_value(&self) -> Option<&[u8]>; - /// Storage type (InDb or OutDbRef) + /// Storage type (InDb, OutDbRef, etc) fn storage_type(&self) -> StorageType; - /// Band data type (Uint8, Float32, etc.) + /// Band data type (UInt8, Float32, etc.) fn data_type(&self) -> BandDataType; } @@ -687,6 +629,20 @@ impl<'a> MetadataRef for MetadataRefImpl<'a> { None } } + + fn crs(&self) -> Option<&str> { + let crs_array = self + .metadata_struct + .column(metadata_indices::CRS) + .as_any() + .downcast_ref::()?; + + if crs_array.is_null(self.index) { + None + } else { + Some(crs_array.value(self.index)) + } + } } /// Implementation of BandMetadataRef for Arrow StructArray @@ -782,6 +738,10 @@ impl<'a> BandsRef for BandsRefImpl<'a> { end - start } + /// Get a specific band by index + /// IMPORTANT: This function is utilizing zero based band indexing. + /// We may want to consider one-based indexing to match + /// raster standard band conventions. fn band(&self, index: usize) -> Option> { if index >= self.len() { return None; @@ -977,6 +937,7 @@ pub struct RasterMetadata { pub skew_x: f64, pub skew_y: f64, pub bounding_box: Option, + pub crs: Option<&str>, } /// Bounding box coordinates @@ -996,6 +957,76 @@ pub struct BandMetadata { pub datatype: BandDataType, } +// Private field column name and index constants +// used across schema, builders and iterators +mod column { + pub const METADATA: &str = "metadata"; + pub const BANDS: &str = "bands"; + pub const BAND: &str = "band"; + pub const DATA: &str = "data"; + + // Raster metadata fields + pub const WIDTH: &str = "width"; + pub const HEIGHT: &str = "height"; + pub const UPPERLEFT_X: &str = "upperleft_x"; + pub const UPPERLEFT_Y: &str = "upperleft_y"; + pub const SCALE_X: &str = "scale_x"; + pub const SCALE_Y: &str = "scale_y"; + pub const SKEW_X: &str = "skew_x"; + pub const SKEW_Y: &str = "skew_y"; + pub const BOUNDING_BOX: &str = "bounding_box"; + pub const CRS: &str = "crs"; + + // Bounding box fields + pub const MIN_X: &str = "min_x"; + pub const MIN_Y: &str = "min_y"; + pub const MAX_X: &str = "max_x"; + pub const MAX_Y: &str = "max_y"; + + // Band metadata fields + pub const NODATAVALUE: &str = "nodata_value"; + pub const STORAGE_TYPE: &str = "storage_type"; + pub const DATATYPE: &str = "data_type"; +} + +/// Hard-coded column indices for maximum performance +/// These must match the exact order defined in RasterSchema::metadata_type() +mod metadata_indices { + pub const WIDTH: usize = 0; + pub const HEIGHT: usize = 1; + pub const UPPERLEFT_X: usize = 2; + pub const UPPERLEFT_Y: usize = 3; + pub const SCALE_X: usize = 4; + pub const SCALE_Y: usize = 5; + pub const SKEW_X: usize = 6; + pub const SKEW_Y: usize = 7; + pub const BOUNDING_BOX: usize = 8; + pub const CRS: usize = 9; +} + +mod bounding_box_indices { + pub const MIN_X: usize = 0; + pub const MIN_Y: usize = 1; + pub const MAX_X: usize = 2; + pub const MAX_Y: usize = 3; +} + +mod band_metadata_indices { + pub const NODATAVALUE: usize = 0; + pub const STORAGE_TYPE: usize = 1; + pub const DATATYPE: usize = 2; +} + +mod band_indices { + pub const METADATA: usize = 0; + pub const DATA: usize = 1; +} + +mod raster_indices { + pub const METADATA: usize = 0; + pub const BANDS: usize = 1; +} + #[cfg(test)] mod iterator_tests { use super::*; @@ -1020,6 +1051,7 @@ mod iterator_tests { max_x: 10.0, max_y: 0.0, }), + crs: None, }; builder.start_raster(&metadata).unwrap(); @@ -1087,6 +1119,7 @@ mod iterator_tests { skew_x: 0.0, skew_y: 0.0, bounding_box: None, + crs: None, }; builder.start_raster(&metadata).unwrap(); @@ -1153,6 +1186,7 @@ mod iterator_tests { max_x: -120.0, max_y: 37.8, }), + crs: Some(b"EPSG:4326".to_vec()), }; source_builder.start_raster(&original_metadata).unwrap(); @@ -1239,6 +1273,7 @@ mod iterator_tests { skew_x: 0.0, skew_y: 0.0, bounding_box: None, + crs: None, }; builder.start_raster(&metadata).unwrap(); @@ -1276,7 +1311,6 @@ mod iterator_tests { } /// Comprehensive test to verify all hard-coded indices match the actual schema - /// This is critical - if this test fails, performance optimizations are broken! #[test] fn test_hardcoded_indices_match_schema() { // Test raster-level indices @@ -1298,8 +1332,8 @@ mod iterator_tests { if let DataType::Struct(metadata_fields) = metadata_type { assert_eq!( metadata_fields.len(), - 9, - "Expected exactly 9 metadata fields" + 10, + "Expected exactly 10 metadata fields" ); assert_eq!( metadata_fields[metadata_indices::WIDTH].name(), @@ -1346,6 +1380,11 @@ mod iterator_tests { column::BOUNDING_BOX, "Metadata bounding_box index mismatch" ); + assert_eq!( + metadata_fields[metadata_indices::CRS].name(), + column::CRS, + "Metadata crs index mismatch" + ); } else { panic!("Expected Struct type for metadata"); } @@ -1426,8 +1465,5 @@ mod iterator_tests { } else { panic!("Expected Struct type for band"); } - - println!("✅ All hard-coded indices verified to match schema!"); - println!("🚀 Performance optimizations are correctly configured!"); } } From 07d2d707045254c34cf2aa50bdb6cb38b18fe7d3 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Fri, 17 Oct 2025 10:19:42 -0700 Subject: [PATCH 07/18] add crs --- rust/sedona-raster/src/raster.rs | 122 ++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 44 deletions(-) diff --git a/rust/sedona-raster/src/raster.rs b/rust/sedona-raster/src/raster.rs index 423a3055..df4b1a33 100644 --- a/rust/sedona-raster/src/raster.rs +++ b/rust/sedona-raster/src/raster.rs @@ -1,6 +1,6 @@ use arrow::array::{ - Array, ArrayRef, BinaryArray, BinaryBuilder, StringViewArray, ListArray, ListBuilder, - StructArray, StructBuilder, UInt32Array, UInt64Array, + Array, ArrayRef, BinaryArray, BinaryBuilder, ListArray, ListBuilder, StringViewArray, StringViewBuilder, + StructArray, StructBuilder, UInt32Array, UInt64Array }; use arrow::buffer::MutableBuffer; use arrow::datatypes::{DataType, Field, FieldRef, Fields}; @@ -16,6 +16,7 @@ impl RasterSchema { pub fn fields() -> Fields { Fields::from(vec![ Field::new(column::METADATA, Self::metadata_type(), false), + Field::new(column::CRS, Self::crs_type(), true), Field::new(column::BANDS, Self::bands_type(), true), ]) } @@ -35,8 +36,6 @@ impl RasterSchema { Field::new(column::SKEW_Y, DataType::Float64, false), // Optional bounding box Field::new(column::BOUNDING_BOX, Self::bounding_box_type(), true), - // Optional coordinate reference system (CRS) as json - Field::new(column::CRS, DataType::Utf8View, true), ])) } @@ -80,6 +79,11 @@ impl RasterSchema { pub fn band_data_type() -> DataType { DataType::Binary } + + /// CRS schema to store json representation + pub fn crs_type() -> DataType { + DataType::Utf8View + } } #[repr(u16)] @@ -120,6 +124,7 @@ pub enum StorageType { /// Builder for constructing raster arrays with zero-copy band data writing pub struct RasterBuilder { metadata_builder: StructBuilder, + crs_builder: StringViewBuilder, bands_builder: ListBuilder, } @@ -150,6 +155,7 @@ impl RasterBuilder { Self { metadata_builder, + crs_builder: StringViewBuilder::new(), bands_builder, } } @@ -160,7 +166,17 @@ impl RasterBuilder { /// - RasterMetadata structs directly /// - MetadataRef trait objects from iterators pub fn start_raster(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { - self.append_metadata_from_ref(metadata) + self.append_metadata_from_ref(metadata)?; + // Default to null CRS - user can set it separately with set_crs() + self.crs_builder.append_null(); + Ok(()) + } + + /// Start a new raster with metadata and optional CRS + pub fn start_raster_with_crs(&mut self, metadata: &dyn MetadataRef, crs: Option<&str>) -> Result<(), ArrowError> { + self.append_metadata_from_ref(metadata)?; + self.set_crs(crs)?; + Ok(()) } /// Convenience method for starting a raster with owned RasterMetadata @@ -366,9 +382,19 @@ impl RasterBuilder { Ok(()) } + /// Set the CRS for the current raster + pub fn set_crs(&mut self, crs: Option<&str>) -> Result<(), ArrowError> { + match crs { + Some(crs_data) => self.crs_builder.append_value(crs_data), + None => self.crs_builder.append_null(), + } + Ok(()) + } + /// Append a null raster pub fn append_null(&mut self) -> Result<(), ArrowError> { self.metadata_builder.append(false); + self.crs_builder.append_null(); self.bands_builder.append(false); Ok(()) } @@ -376,10 +402,15 @@ impl RasterBuilder { /// Finish building and return the constructed StructArray pub fn finish(mut self) -> Result { let metadata_array = self.metadata_builder.finish(); + let crs_array = self.crs_builder.finish(); let bands_array = self.bands_builder.finish(); let fields = RasterSchema::fields(); - let arrays: Vec = vec![Arc::new(metadata_array), Arc::new(bands_array)]; + let arrays: Vec = vec![ + Arc::new(metadata_array), + Arc::new(crs_array), + Arc::new(bands_array) + ]; Ok(StructArray::new(fields, arrays, None)) } @@ -438,8 +469,6 @@ pub trait MetadataRef { fn skew_y(&self) -> f64; /// Optional bounding box (when available) fn bounding_box(&self) -> Option; - /// Optional coordinate reference system as binary data - fn crs(&self) -> Option<&str>; } /// Implement MetadataRef for RasterMetadata to allow direct use with builder @@ -471,9 +500,6 @@ impl MetadataRef for RasterMetadata { fn bounding_box(&self) -> Option { self.bounding_box.clone() } - fn crs(&self) -> Option<&str> { - self.crs.as_deref() - } } /// Trait for accessing individual band metadata @@ -512,6 +538,8 @@ pub trait BandsRef { pub trait RasterRef { /// Raster metadata accessor fn metadata(&self) -> &dyn MetadataRef; + /// CRS accessor + fn crs(&self) -> Option<&str>; /// Bands accessor fn bands(&self) -> &dyn BandsRef; } @@ -630,19 +658,6 @@ impl<'a> MetadataRef for MetadataRefImpl<'a> { } } - fn crs(&self) -> Option<&str> { - let crs_array = self - .metadata_struct - .column(metadata_indices::CRS) - .as_any() - .downcast_ref::()?; - - if crs_array.is_null(self.index) { - None - } else { - Some(crs_array.value(self.index)) - } - } } /// Implementation of BandMetadataRef for Arrow StructArray @@ -819,6 +834,7 @@ impl ExactSizeIterator for BandIterator<'_> {} /// Implementation of RasterRef for complete raster access pub struct RasterRefImpl<'a> { metadata: MetadataRefImpl<'a>, + crs: &'a StringViewArray, bands: BandsRefImpl<'a>, } @@ -830,6 +846,12 @@ impl<'a> RasterRefImpl<'a> { .as_any() .downcast_ref::() .unwrap(); + + let crs = raster_struct + .column(raster_indices::CRS) + .as_any() + .downcast_ref::() + .unwrap(); let bands_list = raster_struct .column(raster_indices::BANDS) @@ -847,7 +869,7 @@ impl<'a> RasterRefImpl<'a> { raster_index, }; - Self { metadata, bands } + Self { metadata, crs, bands } } } @@ -856,6 +878,14 @@ impl<'a> RasterRef for RasterRefImpl<'a> { &self.metadata } + fn crs(&self) -> Option<&str> { + if self.crs.is_null(self.bands.raster_index) { + None + } else { + Some(&self.crs.value(self.bands.raster_index)) + } + } + fn bands(&self) -> &dyn BandsRef { &self.bands } @@ -937,7 +967,6 @@ pub struct RasterMetadata { pub skew_x: f64, pub skew_y: f64, pub bounding_box: Option, - pub crs: Option<&str>, } /// Bounding box coordinates @@ -1001,7 +1030,6 @@ mod metadata_indices { pub const SKEW_X: usize = 6; pub const SKEW_Y: usize = 7; pub const BOUNDING_BOX: usize = 8; - pub const CRS: usize = 9; } mod bounding_box_indices { @@ -1024,7 +1052,8 @@ mod band_indices { mod raster_indices { pub const METADATA: usize = 0; - pub const BANDS: usize = 1; + pub const CRS: usize = 1; + pub const BANDS: usize = 2; } #[cfg(test)] @@ -1051,10 +1080,10 @@ mod iterator_tests { max_x: 10.0, max_y: 0.0, }), - crs: None, }; - builder.start_raster(&metadata).unwrap(); + let epsg4326 = "EPSG:4326"; + builder.start_raster_with_crs(&metadata, Some(&epsg4326)).unwrap(); let band_metadata = BandMetadata { nodata_value: Some(vec![255u8]), @@ -1066,7 +1095,8 @@ mod iterator_tests { let test_data = vec![1u8; 100]; // 10x10 raster with value 1 builder.band_data_writer().append_value(&test_data); builder.finish_band(band_metadata).unwrap(); - builder.finish_raster(); + let result = builder.finish_raster(); + assert!(result.is_ok()); let raster_array = builder.finish().unwrap(); @@ -1100,6 +1130,9 @@ mod iterator_tests { assert_eq!(band_meta.storage_type(), StorageType::InDb); assert_eq!(band_meta.data_type(), BandDataType::UInt8); + let crs = raster.crs().unwrap(); + assert_eq!(crs, epsg4326); + // Test iterator over bands let band_iter: Vec<_> = bands.iter().collect(); assert_eq!(band_iter.len(), 1); @@ -1119,7 +1152,6 @@ mod iterator_tests { skew_x: 0.0, skew_y: 0.0, bounding_box: None, - crs: None, }; builder.start_raster(&metadata).unwrap(); @@ -1137,7 +1169,9 @@ mod iterator_tests { builder.finish_band(band_metadata).unwrap(); } - builder.finish_raster(); + let result = builder.finish_raster(); + assert!(result.is_ok()); + let raster_array = builder.finish().unwrap(); let mut iterator = raster_iterator(&raster_array); @@ -1186,7 +1220,6 @@ mod iterator_tests { max_x: -120.0, max_y: 37.8, }), - crs: Some(b"EPSG:4326".to_vec()), }; source_builder.start_raster(&original_metadata).unwrap(); @@ -1273,7 +1306,6 @@ mod iterator_tests { skew_x: 0.0, skew_y: 0.0, bounding_box: None, - crs: None, }; builder.start_raster(&metadata).unwrap(); @@ -1288,7 +1320,8 @@ mod iterator_tests { let test_data = vec![raster_idx as u8; size]; builder.band_data_writer().append_value(&test_data); builder.finish_band(band_metadata).unwrap(); - builder.finish_raster(); + let result = builder.finish_raster(); + assert!(result.is_ok()); } let raster_array = builder.finish().unwrap(); @@ -1315,12 +1348,17 @@ mod iterator_tests { fn test_hardcoded_indices_match_schema() { // Test raster-level indices let raster_fields = RasterSchema::fields(); - assert_eq!(raster_fields.len(), 2, "Expected exactly 2 raster fields"); + assert_eq!(raster_fields.len(), 3, "Expected exactly 2 raster fields"); assert_eq!( raster_fields[raster_indices::METADATA].name(), column::METADATA, "Raster metadata index mismatch" ); + assert_eq!( + raster_fields[raster_indices::CRS].name(), + column::CRS, + "CRS bands index mismatch" + ); assert_eq!( raster_fields[raster_indices::BANDS].name(), column::BANDS, @@ -1332,8 +1370,8 @@ mod iterator_tests { if let DataType::Struct(metadata_fields) = metadata_type { assert_eq!( metadata_fields.len(), - 10, - "Expected exactly 10 metadata fields" + 9, + "Expected exactly 9 metadata fields" ); assert_eq!( metadata_fields[metadata_indices::WIDTH].name(), @@ -1380,11 +1418,7 @@ mod iterator_tests { column::BOUNDING_BOX, "Metadata bounding_box index mismatch" ); - assert_eq!( - metadata_fields[metadata_indices::CRS].name(), - column::CRS, - "Metadata crs index mismatch" - ); + } else { panic!("Expected Struct type for metadata"); } From 4bed58d359a1cfac79884abce64485ad64282e14 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Fri, 17 Oct 2025 10:59:20 -0700 Subject: [PATCH 08/18] pull out bounding box --- rust/sedona-raster/src/raster.rs | 242 ++++++++++++++++--------------- 1 file changed, 129 insertions(+), 113 deletions(-) diff --git a/rust/sedona-raster/src/raster.rs b/rust/sedona-raster/src/raster.rs index df4b1a33..0ae044a1 100644 --- a/rust/sedona-raster/src/raster.rs +++ b/rust/sedona-raster/src/raster.rs @@ -17,6 +17,7 @@ impl RasterSchema { Fields::from(vec![ Field::new(column::METADATA, Self::metadata_type(), false), Field::new(column::CRS, Self::crs_type(), true), + Field::new(column::BBOX, Self::bounding_box_type(), true), Field::new(column::BANDS, Self::bands_type(), true), ]) } @@ -34,8 +35,6 @@ impl RasterSchema { Field::new(column::SCALE_Y, DataType::Float64, false), Field::new(column::SKEW_X, DataType::Float64, false), Field::new(column::SKEW_Y, DataType::Float64, false), - // Optional bounding box - Field::new(column::BOUNDING_BOX, Self::bounding_box_type(), true), ])) } @@ -125,6 +124,7 @@ pub enum StorageType { pub struct RasterBuilder { metadata_builder: StructBuilder, crs_builder: StringViewBuilder, + bbox_builder: StructBuilder, bands_builder: ListBuilder, } @@ -153,37 +153,54 @@ impl RasterBuilder { false, )); + let bbox_builder = StructBuilder::from_fields( + match RasterSchema::bounding_box_type() { + DataType::Struct(fields) => fields, + _ => panic!("Expected struct type for bounding box"), + }, + capacity, + ); + Self { metadata_builder, crs_builder: StringViewBuilder::new(), + bbox_builder, bands_builder, } } - /// Start a new raster and write its metadata + /// Start a new raster with metadata, optional CRS, and optional bounding box /// - /// Accepts any type that implements MetadataRef, allowing you to pass: - /// - RasterMetadata structs directly - /// - MetadataRef trait objects from iterators - pub fn start_raster(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { - self.append_metadata_from_ref(metadata)?; - // Default to null CRS - user can set it separately with set_crs() - self.crs_builder.append_null(); - Ok(()) - } - - /// Start a new raster with metadata and optional CRS - pub fn start_raster_with_crs(&mut self, metadata: &dyn MetadataRef, crs: Option<&str>) -> Result<(), ArrowError> { + /// This is the unified method for starting a raster with all optional parameters. + /// + /// # Arguments + /// * `metadata` - Raster metadata (dimensions, geotransform parameters) + /// * `crs` - Optional coordinate reference system as string + /// * `bbox` - Optional bounding box coordinates + /// + /// # Examples + /// ```ignore + /// // From iterator - copy all fields from existing raster + /// builder.start_raster(raster.metadata(), raster.crs(), raster.bounding_box(0).as_ref())?; + /// + /// // From RasterMetadata struct with all fields + /// builder.start_raster(&metadata, Some("EPSG:4326"), metadata.bounding_box.as_ref())?; + /// + /// // Minimal - just metadata + /// builder.start_raster(&metadata, None, None)?; + /// ``` + pub fn start_raster( + &mut self, + metadata: &dyn MetadataRef, + crs: Option<&str>, + bbox: Option<&BoundingBox> + ) -> Result<(), ArrowError> { self.append_metadata_from_ref(metadata)?; self.set_crs(crs)?; + self.append_bounding_box(bbox)?; Ok(()) } - /// Convenience method for starting a raster with owned RasterMetadata - pub fn start_raster_owned(&mut self, metadata: RasterMetadata) -> Result<(), ArrowError> { - self.start_raster(&metadata) - } - /// Get direct access to the BinaryBuilder for writing the current band's data pub fn band_data_writer(&mut self) -> &mut BinaryBuilder { let band_builder = self.bands_builder.values(); @@ -319,74 +336,67 @@ impl RasterBuilder { .unwrap() .append_value(metadata.skew_y()); - // Optional bounding box - if let Some(bbox) = metadata.bounding_box() { - let bbox_builder = self - .metadata_builder - .field_builder::(metadata_indices::BOUNDING_BOX) - .unwrap(); + self.metadata_builder.append(true); + + Ok(()) + } - bbox_builder + /// Set the CRS for the current raster + pub fn set_crs(&mut self, crs: Option<&str>) -> Result<(), ArrowError> { + match crs { + Some(crs_data) => self.crs_builder.append_value(crs_data), + None => self.crs_builder.append_null(), + } + Ok(()) + } + + /// Append a bounding box to the current raster + pub fn append_bounding_box(&mut self, bbox: Option<&BoundingBox>) -> Result<(), ArrowError> { + if let Some(bbox) = bbox { + self.bbox_builder .field_builder::(bounding_box_indices::MIN_X) .unwrap() .append_value(bbox.min_x); - bbox_builder + self.bbox_builder .field_builder::(bounding_box_indices::MIN_Y) .unwrap() .append_value(bbox.min_y); - bbox_builder + self.bbox_builder .field_builder::(bounding_box_indices::MAX_X) .unwrap() .append_value(bbox.max_x); - bbox_builder + self.bbox_builder .field_builder::(bounding_box_indices::MAX_Y) .unwrap() .append_value(bbox.max_y); - bbox_builder.append(true); + self.bbox_builder.append(true); } else { // Append null bounding box - need to fill in null values for all fields - let bbox_builder = self - .metadata_builder - .field_builder::(metadata_indices::BOUNDING_BOX) - .unwrap(); - - bbox_builder + self.bbox_builder .field_builder::(bounding_box_indices::MIN_X) .unwrap() .append_null(); - bbox_builder + self.bbox_builder .field_builder::(bounding_box_indices::MIN_Y) .unwrap() .append_null(); - bbox_builder + self.bbox_builder .field_builder::(bounding_box_indices::MAX_X) .unwrap() .append_null(); - bbox_builder + self.bbox_builder .field_builder::(bounding_box_indices::MAX_Y) .unwrap() .append_null(); - bbox_builder.append(false); - } - - self.metadata_builder.append(true); - - Ok(()) - } - - /// Set the CRS for the current raster - pub fn set_crs(&mut self, crs: Option<&str>) -> Result<(), ArrowError> { - match crs { - Some(crs_data) => self.crs_builder.append_value(crs_data), - None => self.crs_builder.append_null(), + self.bbox_builder.append(false); } Ok(()) } @@ -395,6 +405,7 @@ impl RasterBuilder { pub fn append_null(&mut self) -> Result<(), ArrowError> { self.metadata_builder.append(false); self.crs_builder.append_null(); + self.bbox_builder.append(false); self.bands_builder.append(false); Ok(()) } @@ -403,12 +414,14 @@ impl RasterBuilder { pub fn finish(mut self) -> Result { let metadata_array = self.metadata_builder.finish(); let crs_array = self.crs_builder.finish(); + let bbox_array = self.bbox_builder.finish(); let bands_array = self.bands_builder.finish(); let fields = RasterSchema::fields(); let arrays: Vec = vec![ Arc::new(metadata_array), Arc::new(crs_array), + Arc::new(bbox_array), Arc::new(bands_array) ]; @@ -428,7 +441,7 @@ impl RasterBuilder { where F: FnMut(usize, &mut BinaryBuilder) -> Result, { - self.start_raster(&metadata)?; + self.start_raster(&metadata, None, metadata.bounding_box.as_ref())?; for band_index in 0..band_count { let band_metadata = { @@ -467,8 +480,6 @@ pub trait MetadataRef { fn skew_x(&self) -> f64; /// Y-direction skew/rotation fn skew_y(&self) -> f64; - /// Optional bounding box (when available) - fn bounding_box(&self) -> Option; } /// Implement MetadataRef for RasterMetadata to allow direct use with builder @@ -497,9 +508,6 @@ impl MetadataRef for RasterMetadata { fn skew_y(&self) -> f64 { self.skew_y } - fn bounding_box(&self) -> Option { - self.bounding_box.clone() - } } /// Trait for accessing individual band metadata @@ -623,40 +631,7 @@ impl<'a> MetadataRef for MetadataRefImpl<'a> { .value(self.index) } - fn bounding_box(&self) -> Option { - // Try to get bounding box if present in schema - if let Some(bbox_struct) = self - .metadata_struct - .column(metadata_indices::BOUNDING_BOX) - .as_any() - .downcast_ref::() - { - Some(BoundingBox { - min_x: bbox_struct - .column(bounding_box_indices::MIN_X) - .as_any() - .downcast_ref::()? - .value(self.index), - min_y: bbox_struct - .column(bounding_box_indices::MIN_Y) - .as_any() - .downcast_ref::()? - .value(self.index), - max_x: bbox_struct - .column(bounding_box_indices::MAX_X) - .as_any() - .downcast_ref::()? - .value(self.index), - max_y: bbox_struct - .column(bounding_box_indices::MAX_Y) - .as_any() - .downcast_ref::()? - .value(self.index), - }) - } else { - None - } - } + } @@ -835,6 +810,7 @@ impl ExactSizeIterator for BandIterator<'_> {} pub struct RasterRefImpl<'a> { metadata: MetadataRefImpl<'a>, crs: &'a StringViewArray, + bbox: &'a StructArray, bands: BandsRefImpl<'a>, } @@ -853,6 +829,12 @@ impl<'a> RasterRefImpl<'a> { .downcast_ref::() .unwrap(); + let bbox = raster_struct + .column(raster_indices::BBOX) + .as_any() + .downcast_ref::() + .unwrap(); + let bands_list = raster_struct .column(raster_indices::BANDS) .as_any() @@ -869,7 +851,41 @@ impl<'a> RasterRefImpl<'a> { raster_index, }; - Self { metadata, crs, bands } + Self { metadata, crs, bbox, bands } + } + + /// Access the bounding box for this raster + pub fn bounding_box(&self, raster_index: usize) -> Option { + if self.bbox.is_null(raster_index) { + None + } else { + Some(BoundingBox { + min_x: self.bbox + .column(bounding_box_indices::MIN_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + min_y: self.bbox + .column(bounding_box_indices::MIN_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + max_x: self.bbox + .column(bounding_box_indices::MAX_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + max_y: self.bbox + .column(bounding_box_indices::MAX_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + }) + } } } @@ -1003,7 +1019,7 @@ mod column { pub const SCALE_Y: &str = "scale_y"; pub const SKEW_X: &str = "skew_x"; pub const SKEW_Y: &str = "skew_y"; - pub const BOUNDING_BOX: &str = "bounding_box"; + pub const BBOX: &str = "bbox"; pub const CRS: &str = "crs"; // Bounding box fields @@ -1029,7 +1045,6 @@ mod metadata_indices { pub const SCALE_Y: usize = 5; pub const SKEW_X: usize = 6; pub const SKEW_Y: usize = 7; - pub const BOUNDING_BOX: usize = 8; } mod bounding_box_indices { @@ -1053,7 +1068,8 @@ mod band_indices { mod raster_indices { pub const METADATA: usize = 0; pub const CRS: usize = 1; - pub const BANDS: usize = 2; + pub const BBOX: usize = 2; + pub const BANDS: usize = 3; } #[cfg(test)] @@ -1083,7 +1099,7 @@ mod iterator_tests { }; let epsg4326 = "EPSG:4326"; - builder.start_raster_with_crs(&metadata, Some(&epsg4326)).unwrap(); + builder.start_raster(&metadata, Some(&epsg4326), metadata.bounding_box.as_ref()).unwrap(); let band_metadata = BandMetadata { nodata_value: Some(vec![255u8]), @@ -1114,7 +1130,7 @@ mod iterator_tests { assert_eq!(metadata.scale_x(), 1.0); assert_eq!(metadata.scale_y(), -1.0); - let bbox = metadata.bounding_box().unwrap(); + let bbox = raster.bounding_box(0).unwrap(); assert_eq!(bbox.min_x, 0.0); assert_eq!(bbox.max_x, 10.0); @@ -1154,7 +1170,7 @@ mod iterator_tests { bounding_box: None, }; - builder.start_raster(&metadata).unwrap(); + builder.start_raster(&metadata, None, None).unwrap(); // Add three bands using the correct API for band_idx in 0..3 { @@ -1222,7 +1238,7 @@ mod iterator_tests { }), }; - source_builder.start_raster(&original_metadata).unwrap(); + source_builder.start_raster(&original_metadata, None, original_metadata.bounding_box.as_ref()).unwrap(); let band_metadata = BandMetadata { nodata_value: Some(vec![255u8]), @@ -1244,7 +1260,7 @@ mod iterator_tests { // Use metadata directly from the iterator (zero-copy!) target_builder - .start_raster(source_raster.metadata()) + .start_raster(source_raster.metadata(), source_raster.crs(), source_raster.bounding_box(0).as_ref()) .unwrap(); // Add new band data while preserving original metadata @@ -1278,7 +1294,7 @@ mod iterator_tests { assert_eq!(target_metadata.scale_x(), 0.1); assert_eq!(target_metadata.scale_y(), -0.1); - let target_bbox = target_metadata.bounding_box().unwrap(); + let target_bbox = target_raster.bounding_box(0).unwrap(); assert_eq!(target_bbox.min_x, -122.0); assert_eq!(target_bbox.max_x, -120.0); @@ -1308,7 +1324,7 @@ mod iterator_tests { bounding_box: None, }; - builder.start_raster(&metadata).unwrap(); + builder.start_raster(&metadata, None, None).unwrap(); let band_metadata = BandMetadata { nodata_value: Some(vec![255u8]), @@ -1348,7 +1364,7 @@ mod iterator_tests { fn test_hardcoded_indices_match_schema() { // Test raster-level indices let raster_fields = RasterSchema::fields(); - assert_eq!(raster_fields.len(), 3, "Expected exactly 2 raster fields"); + assert_eq!(raster_fields.len(), 4, "Expected exactly 4 raster fields"); assert_eq!( raster_fields[raster_indices::METADATA].name(), column::METADATA, @@ -1357,7 +1373,12 @@ mod iterator_tests { assert_eq!( raster_fields[raster_indices::CRS].name(), column::CRS, - "CRS bands index mismatch" + "Raster CRS index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::BBOX].name(), + column::BBOX, + "Raster BBOX index mismatch" ); assert_eq!( raster_fields[raster_indices::BANDS].name(), @@ -1370,8 +1391,8 @@ mod iterator_tests { if let DataType::Struct(metadata_fields) = metadata_type { assert_eq!( metadata_fields.len(), - 9, - "Expected exactly 9 metadata fields" + 8, + "Expected exactly 8 metadata fields" ); assert_eq!( metadata_fields[metadata_indices::WIDTH].name(), @@ -1413,11 +1434,6 @@ mod iterator_tests { column::SKEW_Y, "Metadata skew_y index mismatch" ); - assert_eq!( - metadata_fields[metadata_indices::BOUNDING_BOX].name(), - column::BOUNDING_BOX, - "Metadata bounding_box index mismatch" - ); } else { panic!("Expected Struct type for metadata"); From b0a933d4769c475c4f2fdc5e114b0437cef08816 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Mon, 20 Oct 2025 10:15:57 -0700 Subject: [PATCH 09/18] move raster datatype to sedona-schema --- rust/sedona-functions/src/lib.rs | 1 + rust/sedona-functions/src/rs_width.rs | 143 +++ rust/sedona-functions/src/sd_format.rs | 4 + rust/sedona-raster/src/lib.rs | 2 +- rust/sedona-raster/src/raster.rs | 1519 ---------------------- rust/sedona-schema/Cargo.toml | 1 + rust/sedona-schema/src/datatypes.rs | 1630 +++++++++++++++++++++++- rust/sedona-schema/src/matchers.rs | 12 +- 8 files changed, 1789 insertions(+), 1523 deletions(-) create mode 100644 rust/sedona-functions/src/rs_width.rs delete mode 100644 rust/sedona-raster/src/raster.rs diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 6c144f7c..9fffbd40 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -21,6 +21,7 @@ mod overlay; mod predicates; mod referencing; pub mod register; +mod rs_width; mod sd_format; pub mod st_analyze_aggr; mod st_area; diff --git a/rust/sedona-functions/src/rs_width.rs b/rust/sedona-functions/src/rs_width.rs new file mode 100644 index 00000000..59dce8d8 --- /dev/null +++ b/rust/sedona-functions/src/rs_width.rs @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::{sync::Arc, vec}; + +use crate::executor::WkbExecutor; +use arrow_array::builder::UInt64Builder; +use arrow_schema::DataType; +use datafusion_common::error::{DataFusionError, Result}; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use sedona_common::sedona_internal_err; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +/// RS_Width() scalar UDF implementation +/// +/// Extract the width of the raster +pub fn rs_width_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_width", + vec![Arc::new(RS_Width {})], + Volatility::Immutable, + Some(rs_width_doc()), + ) +} + +fn rs_width_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + format!("Return the width component of a raster",), + format!("RS_Width(raster: Raster)"), + ) + .with_argument("raster", "Raster: Input raster") + .with_sql_example(format!("SELECT RS_Width(raster)",)) + .build() +} + +#[derive(Debug)] +struct RS_Width {} + +impl SedonaScalarKernel for RS_Width { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster()], + SedonaType::Arrow(DataType::UInt64), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let rasters = args[0].to_array(); + let mut builder = UInt64Builder::with_capacity(args[0].len()); + + for raster in rasters.iter() { + match raster { + Some(raster) => { + builder.append_value(raster.metadata().width()); + } + None => builder.append_null(), + } + } + + Ok(ColumnarValue::from(builder.finish())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{create_array, ArrayRef}; + use datafusion_common::ScalarValue; + use datafusion_expr::ScalarUDF; + use rstest::rstest; + use sedona_testing::{create::create_array, testers::ScalarUdfTester}; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = rs_width_udf().into(); + assert_eq!(udf.name(), "rs_width"); + assert!(udf.documentation().is_some()); + } + + #[rstest] + fn udf_invoke() { + let raster_array = create_array( + &[gen_raster(10, 12), None, gen_raster(30, 15)], + &WKB_GEOMETRY, + ); + let expected: ArrayRef = create_array!(UInt64, [Some(10), None, Some(30),]); + assert_eq!( + &x_tester.invoke_array(wkb_array.clone()).unwrap(), + &expected_x + ); + } + + /// Generate a raster with the specified width, height, and value. + /// This should be improved and moved into sedona-testing + fn gen_raster(width: usize, height: usize) -> StructArray { + let mut builder = Raster::builder(); + + let metadata = RasterMetadata { + width, + height, + ..Default::default() + }; + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }; + + builder.start_raster(&metadata, None, None).unwrap(); + + let size = width * height * 8; + let test_data = vec![value as u8; size]; + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); + builder.finish_raster(); + + builder.finish().unwrap() + } +} diff --git a/rust/sedona-functions/src/sd_format.rs b/rust/sedona-functions/src/sd_format.rs index b1bb33ad..e060c7f7 100644 --- a/rust/sedona-functions/src/sd_format.rs +++ b/rust/sedona-functions/src/sd_format.rs @@ -127,6 +127,7 @@ impl SedonaScalarKernel for SDFormatDefault { fn sedona_type_to_formatted_type(sedona_type: &SedonaType) -> Result { match sedona_type { SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => Ok(SedonaType::Arrow(DataType::Utf8)), + SedonaType::Raster(_) => Ok(SedonaType::Arrow(DataType::Utf8)), SedonaType::Arrow(arrow_type) => { // dive into the arrow type and translate geospatial types into Utf8 match arrow_type { @@ -166,6 +167,9 @@ fn columnar_value_to_formatted_value( SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => { geospatial_value_to_formatted_value(sedona_type, columnar_value, maybe_width_hint) } + SedonaType::Raster(_) => { + geospatial_value_to_formatted_value(sedona_type, columnar_value, maybe_width_hint) + } SedonaType::Arrow(arrow_type) => match arrow_type { DataType::Struct(fields) => match columnar_value { ColumnarValue::Array(array) => { diff --git a/rust/sedona-raster/src/lib.rs b/rust/sedona-raster/src/lib.rs index 72d860b8..8b137891 100644 --- a/rust/sedona-raster/src/lib.rs +++ b/rust/sedona-raster/src/lib.rs @@ -1 +1 @@ -pub mod raster; + diff --git a/rust/sedona-raster/src/raster.rs b/rust/sedona-raster/src/raster.rs deleted file mode 100644 index 0ae044a1..00000000 --- a/rust/sedona-raster/src/raster.rs +++ /dev/null @@ -1,1519 +0,0 @@ -use arrow::array::{ - Array, ArrayRef, BinaryArray, BinaryBuilder, ListArray, ListBuilder, StringViewArray, StringViewBuilder, - StructArray, StructBuilder, UInt32Array, UInt64Array -}; -use arrow::buffer::MutableBuffer; -use arrow::datatypes::{DataType, Field, FieldRef, Fields}; -use arrow::error::ArrowError; -use std::sync::Arc; - -/// Creates a schema for storing raster data in Apache Arrow format. -/// Utilizing nested structs and lists to represent raster metadata and bands. -pub struct RasterSchema; - -impl RasterSchema { - // Raster schema: - pub fn fields() -> Fields { - Fields::from(vec![ - Field::new(column::METADATA, Self::metadata_type(), false), - Field::new(column::CRS, Self::crs_type(), true), - Field::new(column::BBOX, Self::bounding_box_type(), true), - Field::new(column::BANDS, Self::bands_type(), true), - ]) - } - - /// Raster metadata schema - pub fn metadata_type() -> DataType { - DataType::Struct(Fields::from(vec![ - // Raster dimensions - Field::new(column::WIDTH, DataType::UInt64, false), - Field::new(column::HEIGHT, DataType::UInt64, false), - // Geospatial transformation parameters - Field::new(column::UPPERLEFT_X, DataType::Float64, false), - Field::new(column::UPPERLEFT_Y, DataType::Float64, false), - Field::new(column::SCALE_X, DataType::Float64, false), - Field::new(column::SCALE_Y, DataType::Float64, false), - Field::new(column::SKEW_X, DataType::Float64, false), - Field::new(column::SKEW_Y, DataType::Float64, false), - ])) - } - - /// Bounding box schema - pub fn bounding_box_type() -> DataType { - DataType::Struct(Fields::from(vec![ - Field::new(column::MIN_X, DataType::Float64, false), - Field::new(column::MIN_Y, DataType::Float64, false), - Field::new(column::MAX_X, DataType::Float64, false), - Field::new(column::MAX_Y, DataType::Float64, false), - ])) - } - - /// Bands list schema - pub fn bands_type() -> DataType { - DataType::List(FieldRef::new(Field::new( - column::BAND, - Self::band_type(), - false, - ))) - } - - /// Individual band schema - pub fn band_type() -> DataType { - DataType::Struct(Fields::from(vec![ - Field::new(column::METADATA, Self::band_metadata_type(), false), - Field::new(column::DATA, Self::band_data_type(), false), - ])) - } - - /// Band metadata schema - pub fn band_metadata_type() -> DataType { - DataType::Struct(Fields::from(vec![ - Field::new(column::NODATAVALUE, DataType::Binary, true), // Allow null nodata values - Field::new(column::STORAGE_TYPE, DataType::UInt32, false), - Field::new(column::DATATYPE, DataType::UInt32, false), - ])) - } - - /// Band data schema (single binary blob) - pub fn band_data_type() -> DataType { - DataType::Binary - } - - /// CRS schema to store json representation - pub fn crs_type() -> DataType { - DataType::Utf8View - } -} - -#[repr(u16)] -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum BandDataType { - UInt8 = 0, - UInt16 = 1, - Int16 = 2, - UInt32 = 3, - Int32 = 4, - Float32 = 5, - Float64 = 6, - // Consider support for complex types for scientific data -} - -/// Storage strategy for raster band data within Apache Arrow arrays. -/// -/// This enum defines how raster data is physically stored and accessed: -/// -/// **InDb**: Raster data is embedded directly in the Arrow array as binary blobs. -/// - Self-contained, no external dependencies, fast access for small-medium rasters -/// - Increases Arrow array size, memory usage grows and copy times increase with raster size -/// - Best for: Tiles, thumbnails, processed results, small rasters (<10MB per band) -/// -/// **OutDbRef**: Raster data is stored externally with references in the Arrow array. -/// - Keeps Arrow arrays lightweight, supports massive rasters, enables lazy loading -/// - Requires external storage management, potential for broken references -/// - Best for: Large satellite imagery, time series data, cloud-native workflows -/// - Supported backends: S3, GCS, Azure Blob, local filesystem, HTTP endpoints -#[repr(u16)] -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum StorageType { - InDb = 0, - OutDbRef = 1, -} - - -/// Builder for constructing raster arrays with zero-copy band data writing -pub struct RasterBuilder { - metadata_builder: StructBuilder, - crs_builder: StringViewBuilder, - bbox_builder: StructBuilder, - bands_builder: ListBuilder, -} - -impl RasterBuilder { - /// Create a new raster builder with the specified capacity - pub fn new(capacity: usize) -> Self { - let metadata_builder = StructBuilder::from_fields( - match RasterSchema::metadata_type() { - DataType::Struct(fields) => fields, - _ => panic!("Expected struct type for metadata"), - }, - capacity, - ); - - let band_struct_builder = StructBuilder::from_fields( - match RasterSchema::band_type() { - DataType::Struct(fields) => fields, - _ => panic!("Expected struct type for band"), - }, - 0, // Initial capacity for bands - ); - - let bands_builder = ListBuilder::new(band_struct_builder).with_field(Field::new( - column::BAND, - RasterSchema::band_type(), - false, - )); - - let bbox_builder = StructBuilder::from_fields( - match RasterSchema::bounding_box_type() { - DataType::Struct(fields) => fields, - _ => panic!("Expected struct type for bounding box"), - }, - capacity, - ); - - Self { - metadata_builder, - crs_builder: StringViewBuilder::new(), - bbox_builder, - bands_builder, - } - } - - /// Start a new raster with metadata, optional CRS, and optional bounding box - /// - /// This is the unified method for starting a raster with all optional parameters. - /// - /// # Arguments - /// * `metadata` - Raster metadata (dimensions, geotransform parameters) - /// * `crs` - Optional coordinate reference system as string - /// * `bbox` - Optional bounding box coordinates - /// - /// # Examples - /// ```ignore - /// // From iterator - copy all fields from existing raster - /// builder.start_raster(raster.metadata(), raster.crs(), raster.bounding_box(0).as_ref())?; - /// - /// // From RasterMetadata struct with all fields - /// builder.start_raster(&metadata, Some("EPSG:4326"), metadata.bounding_box.as_ref())?; - /// - /// // Minimal - just metadata - /// builder.start_raster(&metadata, None, None)?; - /// ``` - pub fn start_raster( - &mut self, - metadata: &dyn MetadataRef, - crs: Option<&str>, - bbox: Option<&BoundingBox> - ) -> Result<(), ArrowError> { - self.append_metadata_from_ref(metadata)?; - self.set_crs(crs)?; - self.append_bounding_box(bbox)?; - Ok(()) - } - - /// Get direct access to the BinaryBuilder for writing the current band's data - pub fn band_data_writer(&mut self) -> &mut BinaryBuilder { - let band_builder = self.bands_builder.values(); - band_builder.field_builder::(1).unwrap() - } - - /// Create a MutableBuffer that can be written to directly - pub fn create_band_buffer( - &mut self, - capacity: usize, - ) -> (MutableBuffer, impl FnOnce(MutableBuffer) + '_) { - let mut buffer = MutableBuffer::with_capacity(capacity); - - // Pre-allocate the buffer to the exact size - buffer.resize(capacity, 0); - - let commit = move |buffer: MutableBuffer| { - // Convert MutableBuffer to &[u8] and append to BinaryBuilder - let data = buffer.as_slice(); - self.band_data_writer().append_value(data); - }; - - (buffer, commit) - } - - /// Alternative: Get a mutable slice from a MutableBuffer for GDAL - /// This provides the most direct access for zero-copy operations - /// TODO: have this 3 different way.... pick one!! - pub fn get_band_buffer_slice(&mut self, size: usize) -> (MutableBuffer, &mut [u8]) { - let mut buffer = MutableBuffer::with_capacity(size); - buffer.resize(size, 0); - - // Get mutable slice that GDAL can write to - let slice = unsafe { - // This is safe because we just allocated the buffer with the exact size - std::slice::from_raw_parts_mut(buffer.as_mut_ptr(), size) - }; - - (buffer, slice) - } - - /// Commit a MutableBuffer to the band data - pub fn commit_band_buffer(&mut self, buffer: MutableBuffer) { - let data = buffer.as_slice(); - self.band_data_writer().append_value(data); - } - - /// Finish writing the current band with its metadata - /// TODO: The band_metadata is in the finish in the band call, but in the - /// start in the raster call. Make it consistent. - pub fn finish_band(&mut self, band_metadata: BandMetadata) -> Result<(), ArrowError> { - let band_builder = self.bands_builder.values(); - - let metadata_builder = band_builder.field_builder::(0).unwrap(); - - if let Some(nodata) = band_metadata.nodata_value { - metadata_builder - .field_builder::(band_metadata_indices::NODATAVALUE) - .unwrap() - .append_value(&nodata); - } else { - metadata_builder - .field_builder::(band_metadata_indices::NODATAVALUE) - .unwrap() - .append_null(); - } - - metadata_builder - .field_builder::(band_metadata_indices::STORAGE_TYPE) - .unwrap() - .append_value(band_metadata.storage_type as u32); - - metadata_builder - .field_builder::(band_metadata_indices::DATATYPE) - .unwrap() - .append_value(band_metadata.datatype as u32); - - metadata_builder.append(true); - - // Finish the band - band_builder.append(true); - Ok(()) - } - - /// Finish all bands for the current raster - pub fn finish_raster(&mut self) -> Result<(), ArrowError> { - self.bands_builder.append(true); - Ok(()) - } - - /// Append raster metadata from a MetadataRef trait object - fn append_metadata_from_ref(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { - // Width - self.metadata_builder - .field_builder::(metadata_indices::WIDTH) - .unwrap() - .append_value(metadata.width()); - - // Height - self.metadata_builder - .field_builder::(metadata_indices::HEIGHT) - .unwrap() - .append_value(metadata.height()); - - // Geotransform parameters - self.metadata_builder - .field_builder::(metadata_indices::UPPERLEFT_X) - .unwrap() - .append_value(metadata.upper_left_x()); - - self.metadata_builder - .field_builder::(metadata_indices::UPPERLEFT_Y) - .unwrap() - .append_value(metadata.upper_left_y()); - - self.metadata_builder - .field_builder::(metadata_indices::SCALE_X) - .unwrap() - .append_value(metadata.scale_x()); - - self.metadata_builder - .field_builder::(metadata_indices::SCALE_Y) - .unwrap() - .append_value(metadata.scale_y()); - - self.metadata_builder - .field_builder::(metadata_indices::SKEW_X) - .unwrap() - .append_value(metadata.skew_x()); - - self.metadata_builder - .field_builder::(metadata_indices::SKEW_Y) - .unwrap() - .append_value(metadata.skew_y()); - - self.metadata_builder.append(true); - - Ok(()) - } - - /// Set the CRS for the current raster - pub fn set_crs(&mut self, crs: Option<&str>) -> Result<(), ArrowError> { - match crs { - Some(crs_data) => self.crs_builder.append_value(crs_data), - None => self.crs_builder.append_null(), - } - Ok(()) - } - - /// Append a bounding box to the current raster - pub fn append_bounding_box(&mut self, bbox: Option<&BoundingBox>) -> Result<(), ArrowError> { - if let Some(bbox) = bbox { - self.bbox_builder - .field_builder::(bounding_box_indices::MIN_X) - .unwrap() - .append_value(bbox.min_x); - - self.bbox_builder - .field_builder::(bounding_box_indices::MIN_Y) - .unwrap() - .append_value(bbox.min_y); - - self.bbox_builder - .field_builder::(bounding_box_indices::MAX_X) - .unwrap() - .append_value(bbox.max_x); - - self.bbox_builder - .field_builder::(bounding_box_indices::MAX_Y) - .unwrap() - .append_value(bbox.max_y); - - self.bbox_builder.append(true); - } else { - // Append null bounding box - need to fill in null values for all fields - self.bbox_builder - .field_builder::(bounding_box_indices::MIN_X) - .unwrap() - .append_null(); - - self.bbox_builder - .field_builder::(bounding_box_indices::MIN_Y) - .unwrap() - .append_null(); - - self.bbox_builder - .field_builder::(bounding_box_indices::MAX_X) - .unwrap() - .append_null(); - - self.bbox_builder - .field_builder::(bounding_box_indices::MAX_Y) - .unwrap() - .append_null(); - - self.bbox_builder.append(false); - } - Ok(()) - } - - /// Append a null raster - pub fn append_null(&mut self) -> Result<(), ArrowError> { - self.metadata_builder.append(false); - self.crs_builder.append_null(); - self.bbox_builder.append(false); - self.bands_builder.append(false); - Ok(()) - } - - /// Finish building and return the constructed StructArray - pub fn finish(mut self) -> Result { - let metadata_array = self.metadata_builder.finish(); - let crs_array = self.crs_builder.finish(); - let bbox_array = self.bbox_builder.finish(); - let bands_array = self.bands_builder.finish(); - - let fields = RasterSchema::fields(); - let arrays: Vec = vec![ - Arc::new(metadata_array), - Arc::new(crs_array), - Arc::new(bbox_array), - Arc::new(bands_array) - ]; - - Ok(StructArray::new(fields, arrays, None)) - } -} - -/// Convenience wrapper for the zero-copy band writing approach -impl RasterBuilder { - /// High-level method that allows for zero-copy with a callback approach - pub fn append_raster_with_callback( - &mut self, - metadata: RasterMetadata, - band_count: usize, - mut write_bands: F, - ) -> Result<(), ArrowError> - where - F: FnMut(usize, &mut BinaryBuilder) -> Result, - { - self.start_raster(&metadata, None, metadata.bounding_box.as_ref())?; - - for band_index in 0..band_count { - let band_metadata = { - let binary_builder = self.band_data_writer(); - write_bands(band_index, binary_builder)? - }; - self.finish_band(band_metadata)?; - } - - self.finish_raster()?; - Ok(()) - } -} - -/// Iterator and accessor traits for reading raster data from Arrow arrays. -/// -/// These traits provide a zero-copy interface for accessing raster metadata and band data -/// from the Arrow-based storage format. The implementation handles both InDb and OutDbRef -/// storage types seamlessly. - -/// Trait for accessing raster metadata (dimensions, geotransform, bounding box, etc.) -pub trait MetadataRef { - /// Width of the raster in pixels (using u64 to match schema) - fn width(&self) -> u64; - /// Height of the raster in pixels (using u64 to match schema) - fn height(&self) -> u64; - /// X coordinate of the upper-left corner - fn upper_left_x(&self) -> f64; - /// Y coordinate of the upper-left corner - fn upper_left_y(&self) -> f64; - /// X-direction pixel size (scale) - fn scale_x(&self) -> f64; - /// Y-direction pixel size (scale) - fn scale_y(&self) -> f64; - /// X-direction skew/rotation - fn skew_x(&self) -> f64; - /// Y-direction skew/rotation - fn skew_y(&self) -> f64; -} - -/// Implement MetadataRef for RasterMetadata to allow direct use with builder -impl MetadataRef for RasterMetadata { - fn width(&self) -> u64 { - self.width - } - fn height(&self) -> u64 { - self.height - } - fn upper_left_x(&self) -> f64 { - self.upperleft_x - } - fn upper_left_y(&self) -> f64 { - self.upperleft_y - } - fn scale_x(&self) -> f64 { - self.scale_x - } - fn scale_y(&self) -> f64 { - self.scale_y - } - fn skew_x(&self) -> f64 { - self.skew_x - } - fn skew_y(&self) -> f64 { - self.skew_y - } -} - -/// Trait for accessing individual band metadata -pub trait BandMetadataRef { - /// No-data value as raw bytes (None if null) - fn nodata_value(&self) -> Option<&[u8]>; - /// Storage type (InDb, OutDbRef, etc) - fn storage_type(&self) -> StorageType; - /// Band data type (UInt8, Float32, etc.) - fn data_type(&self) -> BandDataType; -} - -/// Trait for accessing individual band data -pub trait BandRef { - /// Band metadata accessor - fn metadata(&self) -> &dyn BandMetadataRef; - /// Raw band data as bytes (zero-copy access) - fn data(&self) -> &[u8]; -} - -/// Trait for accessing all bands in a raster -pub trait BandsRef { - /// Number of bands in the raster - fn len(&self) -> usize; - /// Check if no bands are present - fn is_empty(&self) -> bool { - self.len() == 0 - } - /// Get a specific band by index (returns None if out of bounds) - fn band(&self, index: usize) -> Option>; - /// Iterator over all bands - fn iter(&self) -> BandIterator<'_>; -} - -/// Trait for accessing complete raster data -pub trait RasterRef { - /// Raster metadata accessor - fn metadata(&self) -> &dyn MetadataRef; - /// CRS accessor - fn crs(&self) -> Option<&str>; - /// Bands accessor - fn bands(&self) -> &dyn BandsRef; -} - -/// Implementation of MetadataRef for Arrow StructArray -struct MetadataRefImpl<'a> { - metadata_struct: &'a StructArray, - index: usize, -} - -impl<'a> MetadataRef for MetadataRefImpl<'a> { - fn width(&self) -> u64 { - self.metadata_struct - .column(metadata_indices::WIDTH) - .as_any() - .downcast_ref::() - .unwrap() - .value(self.index) - } - - fn height(&self) -> u64 { - self.metadata_struct - .column(metadata_indices::HEIGHT) - .as_any() - .downcast_ref::() - .unwrap() - .value(self.index) - } - - fn upper_left_x(&self) -> f64 { - self.metadata_struct - .column(metadata_indices::UPPERLEFT_X) - .as_any() - .downcast_ref::() - .unwrap() - .value(self.index) - } - - fn upper_left_y(&self) -> f64 { - self.metadata_struct - .column(metadata_indices::UPPERLEFT_Y) - .as_any() - .downcast_ref::() - .unwrap() - .value(self.index) - } - - fn scale_x(&self) -> f64 { - self.metadata_struct - .column(metadata_indices::SCALE_X) - .as_any() - .downcast_ref::() - .unwrap() - .value(self.index) - } - - fn scale_y(&self) -> f64 { - self.metadata_struct - .column(metadata_indices::SCALE_Y) - .as_any() - .downcast_ref::() - .unwrap() - .value(self.index) - } - - fn skew_x(&self) -> f64 { - self.metadata_struct - .column(metadata_indices::SKEW_X) - .as_any() - .downcast_ref::() - .unwrap() - .value(self.index) - } - - fn skew_y(&self) -> f64 { - self.metadata_struct - .column(metadata_indices::SKEW_Y) - .as_any() - .downcast_ref::() - .unwrap() - .value(self.index) - } - - - -} - -/// Implementation of BandMetadataRef for Arrow StructArray -struct BandMetadataRefImpl<'a> { - metadata_struct: &'a StructArray, - band_index: usize, -} - -impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { - fn nodata_value(&self) -> Option<&[u8]> { - let nodata_array = self - .metadata_struct - .column(band_metadata_indices::NODATAVALUE) - .as_any() - .downcast_ref::() - .expect("Expected BinaryArray for nodata"); - - if nodata_array.is_null(self.band_index) { - None - } else { - Some(nodata_array.value(self.band_index)) - } - } - - fn storage_type(&self) -> StorageType { - let storage_type_array = self - .metadata_struct - .column(band_metadata_indices::STORAGE_TYPE) - .as_any() - .downcast_ref::() - .expect("Expected UInt32Array for storage_type"); - - match storage_type_array.value(self.band_index) { - 0 => StorageType::InDb, - 1 => StorageType::OutDbRef, - _ => panic!( - "Unknown storage type: {}", - storage_type_array.value(self.band_index) - ), - } - } - - fn data_type(&self) -> BandDataType { - let datatype_array = self - .metadata_struct - .column(band_metadata_indices::DATATYPE) - .as_any() - .downcast_ref::() - .expect("Expected UInt32Array for datatype"); - - match datatype_array.value(self.band_index) { - 0 => BandDataType::UInt8, - 1 => BandDataType::UInt16, - 2 => BandDataType::Int16, - 3 => BandDataType::UInt32, - 4 => BandDataType::Int32, - 5 => BandDataType::Float32, - 6 => BandDataType::Float64, - _ => panic!( - "Unknown band data type: {}", - datatype_array.value(self.band_index) - ), - } - } -} - -/// Implementation of BandRef for accessing individual band data -struct BandRefImpl<'a> { - band_metadata: BandMetadataRefImpl<'a>, - band_data: &'a [u8], -} - -impl<'a> BandRef for BandRefImpl<'a> { - fn metadata(&self) -> &dyn BandMetadataRef { - &self.band_metadata - } - - fn data(&self) -> &[u8] { - self.band_data - } -} - -/// Implementation of BandsRef for accessing all bands in a raster -struct BandsRefImpl<'a> { - bands_list: &'a ListArray, - raster_index: usize, -} - -impl<'a> BandsRef for BandsRefImpl<'a> { - fn len(&self) -> usize { - let start = self.bands_list.value_offsets()[self.raster_index] as usize; - let end = self.bands_list.value_offsets()[self.raster_index + 1] as usize; - end - start - } - - /// Get a specific band by index - /// IMPORTANT: This function is utilizing zero based band indexing. - /// We may want to consider one-based indexing to match - /// raster standard band conventions. - fn band(&self, index: usize) -> Option> { - if index >= self.len() { - return None; - } - - let start = self.bands_list.value_offsets()[self.raster_index] as usize; - let band_row = start + index; - - let bands_struct = self - .bands_list - .values() - .as_any() - .downcast_ref::()?; - - // Get the metadata substructure from the band struct - let band_metadata_struct = bands_struct - .column(band_indices::METADATA) - .as_any() - .downcast_ref::()?; - - let band_metadata = BandMetadataRefImpl { - metadata_struct: band_metadata_struct, - band_index: band_row, - }; - - // Get band data from the Binary column within the band struct - let band_data_array = bands_struct - .column(band_indices::DATA) - .as_any() - .downcast_ref::()?; - - let band_data = band_data_array.value(band_row); - - Some(Box::new(BandRefImpl { - band_metadata, - band_data, - })) - } - - fn iter(&self) -> BandIterator<'_> { - BandIterator { - bands: self, - current: 0, - } - } -} - -/// Iterator for bands within a raster -pub struct BandIterator<'a> { - bands: &'a dyn BandsRef, - current: usize, -} - -impl<'a> Iterator for BandIterator<'a> { - type Item = Box; - - fn next(&mut self) -> Option { - if self.current < self.bands.len() { - let band = self.bands.band(self.current); - self.current += 1; - band - } else { - None - } - } - - fn size_hint(&self) -> (usize, Option) { - let remaining = self.bands.len().saturating_sub(self.current); - (remaining, Some(remaining)) - } -} - -impl ExactSizeIterator for BandIterator<'_> {} - -/// Implementation of RasterRef for complete raster access -pub struct RasterRefImpl<'a> { - metadata: MetadataRefImpl<'a>, - crs: &'a StringViewArray, - bbox: &'a StructArray, - bands: BandsRefImpl<'a>, -} - -impl<'a> RasterRefImpl<'a> { - /// Create a new RasterRefImpl from a struct array and index using hard-coded indices - pub fn new(raster_struct: &'a StructArray, raster_index: usize) -> Self { - let metadata_struct = raster_struct - .column(raster_indices::METADATA) - .as_any() - .downcast_ref::() - .unwrap(); - - let crs = raster_struct - .column(raster_indices::CRS) - .as_any() - .downcast_ref::() - .unwrap(); - - let bbox = raster_struct - .column(raster_indices::BBOX) - .as_any() - .downcast_ref::() - .unwrap(); - - let bands_list = raster_struct - .column(raster_indices::BANDS) - .as_any() - .downcast_ref::() - .unwrap(); - - let metadata = MetadataRefImpl { - metadata_struct, - index: raster_index, - }; - - let bands = BandsRefImpl { - bands_list, - raster_index, - }; - - Self { metadata, crs, bbox, bands } - } - - /// Access the bounding box for this raster - pub fn bounding_box(&self, raster_index: usize) -> Option { - if self.bbox.is_null(raster_index) { - None - } else { - Some(BoundingBox { - min_x: self.bbox - .column(bounding_box_indices::MIN_X) - .as_any() - .downcast_ref::() - .unwrap() - .value(raster_index), - min_y: self.bbox - .column(bounding_box_indices::MIN_Y) - .as_any() - .downcast_ref::() - .unwrap() - .value(raster_index), - max_x: self.bbox - .column(bounding_box_indices::MAX_X) - .as_any() - .downcast_ref::() - .unwrap() - .value(raster_index), - max_y: self.bbox - .column(bounding_box_indices::MAX_Y) - .as_any() - .downcast_ref::() - .unwrap() - .value(raster_index), - }) - } - } -} - -impl<'a> RasterRef for RasterRefImpl<'a> { - fn metadata(&self) -> &dyn MetadataRef { - &self.metadata - } - - fn crs(&self) -> Option<&str> { - if self.crs.is_null(self.bands.raster_index) { - None - } else { - Some(&self.crs.value(self.bands.raster_index)) - } - } - - fn bands(&self) -> &dyn BandsRef { - &self.bands - } -} - -/// Iterator over raster structs in an Arrow StructArray -/// -/// This provides efficient, zero-copy access to raster data stored in Arrow format. -/// Each iteration yields a `RasterRefImpl` that provides access to both metadata and band data. -pub struct RasterStructIterator<'a> { - raster_array: &'a StructArray, - current_row: usize, -} - -impl<'a> RasterStructIterator<'a> { - /// Create a new iterator over the raster struct array - pub fn new(raster_array: &'a StructArray) -> Self { - Self { - raster_array, - current_row: 0, - } - } - - /// Get the total number of rasters in the array - pub fn len(&self) -> usize { - self.raster_array.len() - } - - /// Check if the array is empty - pub fn is_empty(&self) -> bool { - self.raster_array.is_empty() - } - - /// Get a specific raster by index without consuming the iterator - pub fn get(&self, index: usize) -> Option> { - if index >= self.raster_array.len() { - return None; - } - - Some(RasterRefImpl::new(self.raster_array, index)) - } -} - -impl<'a> Iterator for RasterStructIterator<'a> { - type Item = RasterRefImpl<'a>; - - fn next(&mut self) -> Option { - if self.current_row < self.raster_array.len() { - let result = self.get(self.current_row)?; - self.current_row += 1; - Some(result) - } else { - None - } - } - - fn size_hint(&self) -> (usize, Option) { - let remaining = self.raster_array.len().saturating_sub(self.current_row); - (remaining, Some(remaining)) - } -} - -impl ExactSizeIterator for RasterStructIterator<'_> {} - -/// Convenience constructor function for creating a raster iterator -pub fn raster_iterator(raster_struct: &StructArray) -> RasterStructIterator<'_> { - RasterStructIterator::new(raster_struct) -} - -/// Metadata for a raster -#[derive(Debug, Clone)] -pub struct RasterMetadata { - pub width: u64, - pub height: u64, - pub upperleft_x: f64, - pub upperleft_y: f64, - pub scale_x: f64, - pub scale_y: f64, - pub skew_x: f64, - pub skew_y: f64, - pub bounding_box: Option, -} - -/// Bounding box coordinates -#[derive(Debug, Clone)] -pub struct BoundingBox { - pub min_x: f64, - pub min_y: f64, - pub max_x: f64, - pub max_y: f64, -} - -/// Metadata for a single band -#[derive(Debug, Clone)] -pub struct BandMetadata { - pub nodata_value: Option>, - pub storage_type: StorageType, - pub datatype: BandDataType, -} - -// Private field column name and index constants -// used across schema, builders and iterators -mod column { - pub const METADATA: &str = "metadata"; - pub const BANDS: &str = "bands"; - pub const BAND: &str = "band"; - pub const DATA: &str = "data"; - - // Raster metadata fields - pub const WIDTH: &str = "width"; - pub const HEIGHT: &str = "height"; - pub const UPPERLEFT_X: &str = "upperleft_x"; - pub const UPPERLEFT_Y: &str = "upperleft_y"; - pub const SCALE_X: &str = "scale_x"; - pub const SCALE_Y: &str = "scale_y"; - pub const SKEW_X: &str = "skew_x"; - pub const SKEW_Y: &str = "skew_y"; - pub const BBOX: &str = "bbox"; - pub const CRS: &str = "crs"; - - // Bounding box fields - pub const MIN_X: &str = "min_x"; - pub const MIN_Y: &str = "min_y"; - pub const MAX_X: &str = "max_x"; - pub const MAX_Y: &str = "max_y"; - - // Band metadata fields - pub const NODATAVALUE: &str = "nodata_value"; - pub const STORAGE_TYPE: &str = "storage_type"; - pub const DATATYPE: &str = "data_type"; -} - -/// Hard-coded column indices for maximum performance -/// These must match the exact order defined in RasterSchema::metadata_type() -mod metadata_indices { - pub const WIDTH: usize = 0; - pub const HEIGHT: usize = 1; - pub const UPPERLEFT_X: usize = 2; - pub const UPPERLEFT_Y: usize = 3; - pub const SCALE_X: usize = 4; - pub const SCALE_Y: usize = 5; - pub const SKEW_X: usize = 6; - pub const SKEW_Y: usize = 7; -} - -mod bounding_box_indices { - pub const MIN_X: usize = 0; - pub const MIN_Y: usize = 1; - pub const MAX_X: usize = 2; - pub const MAX_Y: usize = 3; -} - -mod band_metadata_indices { - pub const NODATAVALUE: usize = 0; - pub const STORAGE_TYPE: usize = 1; - pub const DATATYPE: usize = 2; -} - -mod band_indices { - pub const METADATA: usize = 0; - pub const DATA: usize = 1; -} - -mod raster_indices { - pub const METADATA: usize = 0; - pub const CRS: usize = 1; - pub const BBOX: usize = 2; - pub const BANDS: usize = 3; -} - -#[cfg(test)] -mod iterator_tests { - use super::*; - - #[test] - fn test_iterator_basic_functionality() { - // Create a simple raster for testing using the correct API - let mut builder = RasterBuilder::new(10); // capacity - - let metadata = RasterMetadata { - width: 10, - height: 10, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - bounding_box: Some(BoundingBox { - min_x: 0.0, - min_y: -10.0, - max_x: 10.0, - max_y: 0.0, - }), - }; - - let epsg4326 = "EPSG:4326"; - builder.start_raster(&metadata, Some(&epsg4326), metadata.bounding_box.as_ref()).unwrap(); - - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - }; - - // Add a single band with some test data using the correct API - let test_data = vec![1u8; 100]; // 10x10 raster with value 1 - builder.band_data_writer().append_value(&test_data); - builder.finish_band(band_metadata).unwrap(); - let result = builder.finish_raster(); - assert!(result.is_ok()); - - let raster_array = builder.finish().unwrap(); - - // Test the iterator - let mut iterator = raster_iterator(&raster_array); - - assert_eq!(iterator.len(), 1); - assert!(!iterator.is_empty()); - - let raster = iterator.next().unwrap(); - let metadata = raster.metadata(); - - assert_eq!(metadata.width(), 10); - assert_eq!(metadata.height(), 10); - assert_eq!(metadata.scale_x(), 1.0); - assert_eq!(metadata.scale_y(), -1.0); - - let bbox = raster.bounding_box(0).unwrap(); - assert_eq!(bbox.min_x, 0.0); - assert_eq!(bbox.max_x, 10.0); - - let bands = raster.bands(); - assert_eq!(bands.len(), 1); - assert!(!bands.is_empty()); - - let band = bands.band(0).unwrap(); - assert_eq!(band.data().len(), 100); - assert_eq!(band.data()[0], 1u8); - - let band_meta = band.metadata(); - assert_eq!(band_meta.storage_type(), StorageType::InDb); - assert_eq!(band_meta.data_type(), BandDataType::UInt8); - - let crs = raster.crs().unwrap(); - assert_eq!(crs, epsg4326); - - // Test iterator over bands - let band_iter: Vec<_> = bands.iter().collect(); - assert_eq!(band_iter.len(), 1); - } - - #[test] - fn test_multi_band_iterator() { - let mut builder = RasterBuilder::new(10); - - let metadata = RasterMetadata { - width: 5, - height: 5, - upperleft_x: 0.0, - upperleft_y: 0.0, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - bounding_box: None, - }; - - builder.start_raster(&metadata, None, None).unwrap(); - - // Add three bands using the correct API - for band_idx in 0..3 { - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - }; - - let test_data = vec![band_idx as u8; 25]; // 5x5 raster - builder.band_data_writer().append_value(&test_data); - builder.finish_band(band_metadata).unwrap(); - } - - let result = builder.finish_raster(); - assert!(result.is_ok()); - - let raster_array = builder.finish().unwrap(); - - let mut iterator = raster_iterator(&raster_array); - let raster = iterator.next().unwrap(); - let bands = raster.bands(); - - assert_eq!(bands.len(), 3); - - // Test each band has different data - for i in 0..3 { - let band = bands.band(i).unwrap(); - let expected_value = i as u8; - assert!(band.data().iter().all(|&x| x == expected_value)); - } - - // Test iterator - let band_values: Vec = bands - .iter() - .enumerate() - .map(|(i, band)| { - assert_eq!(band.data()[0], i as u8); - band.data()[0] - }) - .collect(); - - assert_eq!(band_values, vec![0, 1, 2]); - } - - #[test] - fn test_copy_metadata_from_iterator() { - // Create an original raster - let mut source_builder = RasterBuilder::new(10); - - let original_metadata = RasterMetadata { - width: 42, - height: 24, - upperleft_x: -122.0, - upperleft_y: 37.8, - scale_x: 0.1, - scale_y: -0.1, - skew_x: 0.0, - skew_y: 0.0, - bounding_box: Some(BoundingBox { - min_x: -122.0, - min_y: 35.4, - max_x: -120.0, - max_y: 37.8, - }), - }; - - source_builder.start_raster(&original_metadata, None, original_metadata.bounding_box.as_ref()).unwrap(); - - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - }; - - let test_data = vec![42u8; 1008]; // 42x24 raster - source_builder.band_data_writer().append_value(&test_data); - source_builder.finish_band(band_metadata).unwrap(); - source_builder.finish_raster().unwrap(); - - let source_array = source_builder.finish().unwrap(); - - // Now create a new raster using metadata from the iterator - this is the key feature! - let mut target_builder = RasterBuilder::new(10); - let iterator = raster_iterator(&source_array); - let source_raster = iterator.get(0).unwrap(); - - // Use metadata directly from the iterator (zero-copy!) - target_builder - .start_raster(source_raster.metadata(), source_raster.crs(), source_raster.bounding_box(0).as_ref()) - .unwrap(); - - // Add new band data while preserving original metadata - let new_band_metadata = BandMetadata { - nodata_value: None, - storage_type: StorageType::InDb, - datatype: BandDataType::UInt16, - }; - - let new_data = vec![100u16; 1008]; // Different data, same dimensions - let new_data_bytes: Vec = new_data.iter().flat_map(|&x| x.to_le_bytes()).collect(); - - target_builder - .band_data_writer() - .append_value(&new_data_bytes); - target_builder.finish_band(new_band_metadata).unwrap(); - target_builder.finish_raster().unwrap(); - - let target_array = target_builder.finish().unwrap(); - - // Verify the metadata was copied correctly - let target_iterator = raster_iterator(&target_array); - let target_raster = target_iterator.get(0).unwrap(); - let target_metadata = target_raster.metadata(); - - // All metadata should match the original - assert_eq!(target_metadata.width(), 42); - assert_eq!(target_metadata.height(), 24); - assert_eq!(target_metadata.upper_left_x(), -122.0); - assert_eq!(target_metadata.upper_left_y(), 37.8); - assert_eq!(target_metadata.scale_x(), 0.1); - assert_eq!(target_metadata.scale_y(), -0.1); - - let target_bbox = target_raster.bounding_box(0).unwrap(); - assert_eq!(target_bbox.min_x, -122.0); - assert_eq!(target_bbox.max_x, -120.0); - - // But band data and metadata should be different - let target_band = target_raster.bands().band(0).unwrap(); - let target_band_meta = target_band.metadata(); - assert_eq!(target_band_meta.data_type(), BandDataType::UInt16); - assert!(target_band_meta.nodata_value().is_none()); - assert_eq!(target_band.data().len(), 2016); // 1008 * 2 bytes per u16 - } - - #[test] - fn test_random_access() { - let mut builder = RasterBuilder::new(10); - - // Add multiple rasters - for raster_idx in 0..3 { - let metadata = RasterMetadata { - width: raster_idx as u64 + 1, - height: raster_idx as u64 + 1, - upperleft_x: raster_idx as f64, - upperleft_y: raster_idx as f64, - scale_x: 1.0, - scale_y: -1.0, - skew_x: 0.0, - skew_y: 0.0, - bounding_box: None, - }; - - builder.start_raster(&metadata, None, None).unwrap(); - - let band_metadata = BandMetadata { - nodata_value: Some(vec![255u8]), - storage_type: StorageType::InDb, - datatype: BandDataType::UInt8, - }; - - let size = (raster_idx + 1) * (raster_idx + 1); - let test_data = vec![raster_idx as u8; size]; - builder.band_data_writer().append_value(&test_data); - builder.finish_band(band_metadata).unwrap(); - let result = builder.finish_raster(); - assert!(result.is_ok()); - } - - let raster_array = builder.finish().unwrap(); - let iterator = raster_iterator(&raster_array); - - assert_eq!(iterator.len(), 3); - - // Test random access - let raster_2 = iterator.get(2).unwrap(); - assert_eq!(raster_2.metadata().width(), 3); - assert_eq!(raster_2.metadata().height(), 3); - assert_eq!(raster_2.metadata().upper_left_x(), 2.0); - - let band = raster_2.bands().band(0).unwrap(); - assert_eq!(band.data().len(), 9); - assert!(band.data().iter().all(|&x| x == 2u8)); - - // Test out of bounds - assert!(iterator.get(10).is_none()); - } - - /// Comprehensive test to verify all hard-coded indices match the actual schema - #[test] - fn test_hardcoded_indices_match_schema() { - // Test raster-level indices - let raster_fields = RasterSchema::fields(); - assert_eq!(raster_fields.len(), 4, "Expected exactly 4 raster fields"); - assert_eq!( - raster_fields[raster_indices::METADATA].name(), - column::METADATA, - "Raster metadata index mismatch" - ); - assert_eq!( - raster_fields[raster_indices::CRS].name(), - column::CRS, - "Raster CRS index mismatch" - ); - assert_eq!( - raster_fields[raster_indices::BBOX].name(), - column::BBOX, - "Raster BBOX index mismatch" - ); - assert_eq!( - raster_fields[raster_indices::BANDS].name(), - column::BANDS, - "Raster bands index mismatch" - ); - - // Test metadata indices - let metadata_type = RasterSchema::metadata_type(); - if let DataType::Struct(metadata_fields) = metadata_type { - assert_eq!( - metadata_fields.len(), - 8, - "Expected exactly 8 metadata fields" - ); - assert_eq!( - metadata_fields[metadata_indices::WIDTH].name(), - column::WIDTH, - "Metadata width index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::HEIGHT].name(), - column::HEIGHT, - "Metadata height index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::UPPERLEFT_X].name(), - column::UPPERLEFT_X, - "Metadata upperleft_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::UPPERLEFT_Y].name(), - column::UPPERLEFT_Y, - "Metadata upperleft_y index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SCALE_X].name(), - column::SCALE_X, - "Metadata scale_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SCALE_Y].name(), - column::SCALE_Y, - "Metadata scale_y index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SKEW_X].name(), - column::SKEW_X, - "Metadata skew_x index mismatch" - ); - assert_eq!( - metadata_fields[metadata_indices::SKEW_Y].name(), - column::SKEW_Y, - "Metadata skew_y index mismatch" - ); - - } else { - panic!("Expected Struct type for metadata"); - } - - // Test bounding box indices - let bbox_type = RasterSchema::bounding_box_type(); - if let DataType::Struct(bbox_fields) = bbox_type { - assert_eq!( - bbox_fields.len(), - 4, - "Expected exactly 4 bounding box fields" - ); - assert_eq!( - bbox_fields[bounding_box_indices::MIN_X].name(), - column::MIN_X, - "Bounding box min_x index mismatch" - ); - assert_eq!( - bbox_fields[bounding_box_indices::MIN_Y].name(), - column::MIN_Y, - "Bounding box min_y index mismatch" - ); - assert_eq!( - bbox_fields[bounding_box_indices::MAX_X].name(), - column::MAX_X, - "Bounding box max_x index mismatch" - ); - assert_eq!( - bbox_fields[bounding_box_indices::MAX_Y].name(), - column::MAX_Y, - "Bounding box max_y index mismatch" - ); - } else { - panic!("Expected Struct type for bounding box"); - } - - // Test band metadata indices - let band_metadata_type = RasterSchema::band_metadata_type(); - if let DataType::Struct(band_metadata_fields) = band_metadata_type { - assert_eq!( - band_metadata_fields.len(), - 3, - "Expected exactly 3 band metadata fields" - ); - assert_eq!( - band_metadata_fields[band_metadata_indices::NODATAVALUE].name(), - column::NODATAVALUE, - "Band metadata nodatavalue index mismatch" - ); - assert_eq!( - band_metadata_fields[band_metadata_indices::STORAGE_TYPE].name(), - column::STORAGE_TYPE, - "Band metadata storage_type index mismatch" - ); - assert_eq!( - band_metadata_fields[band_metadata_indices::DATATYPE].name(), - column::DATATYPE, - "Band metadata datatype index mismatch" - ); - } else { - panic!("Expected Struct type for band metadata"); - } - - // Test band indices - let band_type = RasterSchema::band_type(); - if let DataType::Struct(band_fields) = band_type { - assert_eq!(band_fields.len(), 2, "Expected exactly 2 band fields"); - assert_eq!( - band_fields[band_indices::METADATA].name(), - column::METADATA, - "Band metadata index mismatch" - ); - assert_eq!( - band_fields[band_indices::DATA].name(), - column::DATA, - "Band data index mismatch" - ); - } else { - panic!("Expected Struct type for band"); - } - } -} diff --git a/rust/sedona-schema/Cargo.toml b/rust/sedona-schema/Cargo.toml index 223989df..11d26f38 100644 --- a/rust/sedona-schema/Cargo.toml +++ b/rust/sedona-schema/Cargo.toml @@ -28,6 +28,7 @@ rust-version.workspace = true result_large_err = "allow" [dependencies] +arrow = { workspace = true } arrow-schema = { workspace = true } arrow-array = { workspace = true } datafusion-common = { workspace = true } diff --git a/rust/sedona-schema/src/datatypes.rs b/rust/sedona-schema/src/datatypes.rs index 254ca254..9613ef3e 100644 --- a/rust/sedona-schema/src/datatypes.rs +++ b/rust/sedona-schema/src/datatypes.rs @@ -14,11 +14,18 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use arrow_schema::{DataType, Field}; +use arrow_array::{ + builder::{BinaryBuilder, Float64Builder, UInt32Builder, UInt64Builder, StructBuilder, StringViewBuilder, ListBuilder}, + BinaryArray, ListArray, StringViewArray, StructArray, UInt32Array, + UInt64Array, Float64Array, ArrayRef, Array +}; +use arrow::{buffer::MutableBuffer}; +use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; use datafusion_common::error::{DataFusionError, Result}; use sedona_common::sedona_internal_err; use serde_json::Value; use std::fmt::{Debug, Display}; +use std::sync::{Arc, LazyLock}; use crate::crs::{deserialize_crs, Crs}; use crate::extension_type::ExtensionType; @@ -29,6 +36,7 @@ pub enum SedonaType { Arrow(DataType), Wkb(Edges, Crs), WkbView(Edges, Crs), + Raster(RasterSchema), } impl From for SedonaType { @@ -72,7 +80,14 @@ pub const WKB_GEOGRAPHY: SedonaType = SedonaType::Wkb(Edges::Spherical, Crs::Non /// See [`WKB_GEOGRAPHY`] pub const WKB_VIEW_GEOGRAPHY: SedonaType = SedonaType::WkbView(Edges::Spherical, Crs::None); -// Implementation details +/// Sentinel for [`Sedona::RasterSchema`] +/// +/// The CRS is stored within the raster schema. +pub const RASTER: SedonaType = SedonaType::Raster(RasterSchema); + +/// Create a static value for the [`SedonaType::Raster`] that's initialized exactly once, +/// on first access +static RASTER_DATATYPE: LazyLock = LazyLock::new(|| DataType::Struct(RasterSchema::fields())); impl SedonaType { /// Given a field as it would appear in an external Schema return the appropriate SedonaType @@ -111,6 +126,7 @@ impl SedonaType { SedonaType::Arrow(data_type) => data_type, SedonaType::Wkb(_, _) => &DataType::Binary, SedonaType::WkbView(_, _) => &DataType::BinaryView, + SedonaType::Raster(_) => &RASTER_DATATYPE, } } @@ -119,6 +135,7 @@ impl SedonaType { match self { SedonaType::Arrow(_) => None, SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => Some("geoarrow.wkb"), + SedonaType::Raster(_) => Some("sedona.raster"), } } @@ -132,6 +149,13 @@ impl SedonaType { Some(serialize_edges_and_crs(edges, crs)), )) } + SedonaType::Raster(_) => { + Some(ExtensionType::new( + self.extension_name().unwrap(), + self.storage_type().clone(), + None, + )) + } _ => None, } } @@ -179,6 +203,7 @@ impl SedonaType { } } }, + SedonaType::Raster(_) => "raster".to_string(), } } @@ -195,6 +220,7 @@ impl SedonaType { (SedonaType::WkbView(edges, _), SedonaType::WkbView(other_edges, _)) => { edges == other_edges } + (SedonaType::Raster(_), SedonaType::Raster(_)) => true, _ => false, } } @@ -208,6 +234,7 @@ impl Display for SedonaType { SedonaType::Arrow(data_type) => Display::fmt(data_type, f), SedonaType::Wkb(edges, crs) => display_geometry("Wkb", edges, crs, f), SedonaType::WkbView(edges, crs) => display_geometry("WkbView", edges, crs, f), + SedonaType::Raster(_) => write!(f, "Raster"), } } } @@ -333,6 +360,1078 @@ fn deserialize_edges(edges: &Value) -> Result { } } +/// Schema for storing raster data in Apache Arrow format. +/// Utilizing nested structs and lists to represent raster metadata and bands. +#[derive(Debug, PartialEq, Clone)] +pub struct RasterSchema; +impl RasterSchema { + // Raster schema: + pub fn fields() -> Fields { + Fields::from(vec![ + Field::new(column::METADATA, Self::metadata_type(), false), + Field::new(column::CRS, Self::crs_type(), true), + Field::new(column::BBOX, Self::bounding_box_type(), true), + Field::new(column::BANDS, Self::bands_type(), true), + ]) + } + + /// Raster metadata schema + pub fn metadata_type() -> DataType { + DataType::Struct(Fields::from(vec![ + // Raster dimensions + Field::new(column::WIDTH, DataType::UInt64, false), + Field::new(column::HEIGHT, DataType::UInt64, false), + // Geospatial transformation parameters + Field::new(column::UPPERLEFT_X, DataType::Float64, false), + Field::new(column::UPPERLEFT_Y, DataType::Float64, false), + Field::new(column::SCALE_X, DataType::Float64, false), + Field::new(column::SCALE_Y, DataType::Float64, false), + Field::new(column::SKEW_X, DataType::Float64, false), + Field::new(column::SKEW_Y, DataType::Float64, false), + ])) + } + + /// Bounding box schema + pub fn bounding_box_type() -> DataType { + DataType::Struct(Fields::from(vec![ + Field::new(column::MIN_X, DataType::Float64, false), + Field::new(column::MIN_Y, DataType::Float64, false), + Field::new(column::MAX_X, DataType::Float64, false), + Field::new(column::MAX_Y, DataType::Float64, false), + ])) + } + + /// Bands list schema + pub fn bands_type() -> DataType { + DataType::List(FieldRef::new(Field::new( + column::BAND, + Self::band_type(), + false, + ))) + } + + /// Individual band schema + pub fn band_type() -> DataType { + DataType::Struct(Fields::from(vec![ + Field::new(column::METADATA, Self::band_metadata_type(), false), + Field::new(column::DATA, Self::band_data_type(), false), + ])) + } + + /// Band metadata schema + pub fn band_metadata_type() -> DataType { + DataType::Struct(Fields::from(vec![ + Field::new(column::NODATAVALUE, DataType::Binary, true), // Allow null nodata values + Field::new(column::STORAGE_TYPE, DataType::UInt32, false), + Field::new(column::DATATYPE, DataType::UInt32, false), + ])) + } + + /// Band data schema (single binary blob) + pub fn band_data_type() -> DataType { + DataType::Binary // consider switching to BinaryView + } + + /// CRS schema to store json representation + pub fn crs_type() -> DataType { + DataType::Utf8View + } +} + +#[repr(u16)] +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum BandDataType { + UInt8 = 0, + UInt16 = 1, + Int16 = 2, + UInt32 = 3, + Int32 = 4, + Float32 = 5, + Float64 = 6, + // Consider support for complex types for scientific data +} + +/// Storage strategy for raster band data within Apache Arrow arrays. +/// +/// This enum defines how raster data is physically stored and accessed: +/// +/// **InDb**: Raster data is embedded directly in the Arrow array as binary blobs. +/// - Self-contained, no external dependencies, fast access for small-medium rasters +/// - Increases Arrow array size, memory usage grows and copy times increase with raster size +/// - Best for: Tiles, thumbnails, processed results, small rasters (<10MB per band) +/// +/// **OutDbRef**: Raster data is stored externally with references in the Arrow array. +/// - Keeps Arrow arrays lightweight, supports massive rasters, enables lazy loading +/// - Requires external storage management, potential for broken references +/// - Best for: Large satellite imagery, time series data, cloud-native workflows +/// - Supported backends: S3, GCS, Azure Blob, local filesystem, HTTP endpoints +#[repr(u16)] +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum StorageType { + InDb = 0, + OutDbRef = 1, +} + +/// Builder for constructing raster arrays with zero-copy band data writing +pub struct RasterBuilder { + metadata_builder: StructBuilder, + crs_builder: StringViewBuilder, + bbox_builder: StructBuilder, + bands_builder: ListBuilder, +} + +impl RasterBuilder { + /// Create a new raster builder with the specified capacity + pub fn new(capacity: usize) -> Self { + let metadata_builder = StructBuilder::from_fields( + match RasterSchema::metadata_type() { + DataType::Struct(fields) => fields, + _ => panic!("Expected struct type for metadata"), + }, + capacity, + ); + + let band_struct_builder = StructBuilder::from_fields( + match RasterSchema::band_type() { + DataType::Struct(fields) => fields, + _ => panic!("Expected struct type for band"), + }, + 0, // Initial capacity for bands + ); + + let bands_builder = ListBuilder::new(band_struct_builder).with_field(Field::new( + column::BAND, + RasterSchema::band_type(), + false, + )); + + let bbox_builder = StructBuilder::from_fields( + match RasterSchema::bounding_box_type() { + DataType::Struct(fields) => fields, + _ => panic!("Expected struct type for bounding box"), + }, + capacity, + ); + + Self { + metadata_builder, + crs_builder: StringViewBuilder::new(), + bbox_builder, + bands_builder, + } + } + + /// Start a new raster with metadata, optional CRS, and optional bounding box + /// + /// This is the unified method for starting a raster with all optional parameters. + /// + /// # Arguments + /// * `metadata` - Raster metadata (dimensions, geotransform parameters) + /// * `crs` - Optional coordinate reference system as string + /// * `bbox` - Optional bounding box coordinates + /// + /// # Examples + /// ```ignore + /// // From iterator - copy all fields from existing raster + /// builder.start_raster(raster.metadata(), raster.crs(), raster.bounding_box(0).as_ref())?; + /// + /// // From RasterMetadata struct with all fields + /// builder.start_raster(&metadata, Some("EPSG:4326"), metadata.bounding_box.as_ref())?; + /// + /// // Minimal - just metadata + /// builder.start_raster(&metadata, None, None)?; + /// ``` + pub fn start_raster( + &mut self, + metadata: &dyn MetadataRef, + crs: Option<&str>, + bbox: Option<&BoundingBox>, + ) -> Result<(), ArrowError> { + self.append_metadata_from_ref(metadata)?; + self.set_crs(crs)?; + self.append_bounding_box(bbox)?; + Ok(()) + } + + /// Get direct access to the BinaryBuilder for writing the current band's data + pub fn band_data_writer(&mut self) -> &mut BinaryBuilder { + let band_builder = self.bands_builder.values(); + band_builder.field_builder::(1).unwrap() + } + + /// Create a MutableBuffer that can be written to directly + pub fn create_band_buffer( + &mut self, + capacity: usize, + ) -> (MutableBuffer, impl FnOnce(MutableBuffer) + '_) { + let mut buffer = MutableBuffer::with_capacity(capacity); + + // Pre-allocate the buffer to the exact size + buffer.resize(capacity, 0); + + let commit = move |buffer: MutableBuffer| { + // Convert MutableBuffer to &[u8] and append to BinaryBuilder + let data = buffer.as_slice(); + self.band_data_writer().append_value(data); + }; + + (buffer, commit) + } + + /// Alternative: Get a mutable slice from a MutableBuffer for GDAL + /// This provides the most direct access for zero-copy operations + /// TODO: have this 3 different way.... pick one!! + pub fn get_band_buffer_slice(&mut self, size: usize) -> (MutableBuffer, &mut [u8]) { + let mut buffer = MutableBuffer::with_capacity(size); + buffer.resize(size, 0); + + // Get mutable slice that GDAL can write to + let slice = unsafe { + // This is safe because we just allocated the buffer with the exact size + std::slice::from_raw_parts_mut(buffer.as_mut_ptr(), size) + }; + + (buffer, slice) + } + + /// Commit a MutableBuffer to the band data + pub fn commit_band_buffer(&mut self, buffer: MutableBuffer) { + let data = buffer.as_slice(); + self.band_data_writer().append_value(data); + } + + /// Finish writing the current band with its metadata + /// TODO: The band_metadata is in the finish in the band call, but in the + /// start in the raster call. Make it consistent. + pub fn finish_band(&mut self, band_metadata: BandMetadata) -> Result<(), ArrowError> { + let band_builder = self.bands_builder.values(); + + let metadata_builder = band_builder.field_builder::(0).unwrap(); + + if let Some(nodata) = band_metadata.nodata_value { + metadata_builder + .field_builder::(band_metadata_indices::NODATAVALUE) + .unwrap() + .append_value(&nodata); + } else { + metadata_builder + .field_builder::(band_metadata_indices::NODATAVALUE) + .unwrap() + .append_null(); + } + + metadata_builder + .field_builder::(band_metadata_indices::STORAGE_TYPE) + .unwrap() + .append_value(band_metadata.storage_type as u32); + + metadata_builder + .field_builder::(band_metadata_indices::DATATYPE) + .unwrap() + .append_value(band_metadata.datatype as u32); + + metadata_builder.append(true); + + // Finish the band + band_builder.append(true); + Ok(()) + } + + /// Finish all bands for the current raster + pub fn finish_raster(&mut self) -> Result<(), ArrowError> { + self.bands_builder.append(true); + Ok(()) + } + + /// Append raster metadata from a MetadataRef trait object + fn append_metadata_from_ref(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { + // Width + self.metadata_builder + .field_builder::(metadata_indices::WIDTH) + .unwrap() + .append_value(metadata.width()); + + // Height + self.metadata_builder + .field_builder::(metadata_indices::HEIGHT) + .unwrap() + .append_value(metadata.height()); + + // Geotransform parameters + self.metadata_builder + .field_builder::(metadata_indices::UPPERLEFT_X) + .unwrap() + .append_value(metadata.upper_left_x()); + + self.metadata_builder + .field_builder::(metadata_indices::UPPERLEFT_Y) + .unwrap() + .append_value(metadata.upper_left_y()); + + self.metadata_builder + .field_builder::(metadata_indices::SCALE_X) + .unwrap() + .append_value(metadata.scale_x()); + + self.metadata_builder + .field_builder::(metadata_indices::SCALE_Y) + .unwrap() + .append_value(metadata.scale_y()); + + self.metadata_builder + .field_builder::(metadata_indices::SKEW_X) + .unwrap() + .append_value(metadata.skew_x()); + + self.metadata_builder + .field_builder::(metadata_indices::SKEW_Y) + .unwrap() + .append_value(metadata.skew_y()); + + self.metadata_builder.append(true); + + Ok(()) + } + + /// Set the CRS for the current raster + pub fn set_crs(&mut self, crs: Option<&str>) -> Result<(), ArrowError> { + match crs { + Some(crs_data) => self.crs_builder.append_value(crs_data), + None => self.crs_builder.append_null(), + } + Ok(()) + } + + /// Append a bounding box to the current raster + pub fn append_bounding_box(&mut self, bbox: Option<&BoundingBox>) -> Result<(), ArrowError> { + if let Some(bbox) = bbox { + self.bbox_builder + .field_builder::(bounding_box_indices::MIN_X) + .unwrap() + .append_value(bbox.min_x); + + self.bbox_builder + .field_builder::(bounding_box_indices::MIN_Y) + .unwrap() + .append_value(bbox.min_y); + + self.bbox_builder + .field_builder::(bounding_box_indices::MAX_X) + .unwrap() + .append_value(bbox.max_x); + + self.bbox_builder + .field_builder::(bounding_box_indices::MAX_Y) + .unwrap() + .append_value(bbox.max_y); + + self.bbox_builder.append(true); + } else { + // Append null bounding box - need to fill in null values for all fields + self.bbox_builder + .field_builder::(bounding_box_indices::MIN_X) + .unwrap() + .append_null(); + + self.bbox_builder + .field_builder::(bounding_box_indices::MIN_Y) + .unwrap() + .append_null(); + + self.bbox_builder + .field_builder::(bounding_box_indices::MAX_X) + .unwrap() + .append_null(); + + self.bbox_builder + .field_builder::(bounding_box_indices::MAX_Y) + .unwrap() + .append_null(); + + self.bbox_builder.append(false); + } + Ok(()) + } + + /// Append a null raster + pub fn append_null(&mut self) -> Result<(), ArrowError> { + self.metadata_builder.append(false); + self.crs_builder.append_null(); + self.bbox_builder.append(false); + self.bands_builder.append(false); + Ok(()) + } + + /// Finish building and return the constructed StructArray + pub fn finish(mut self) -> Result { + let metadata_array = self.metadata_builder.finish(); + let crs_array = self.crs_builder.finish(); + let bbox_array = self.bbox_builder.finish(); + let bands_array = self.bands_builder.finish(); + + let fields = RasterSchema::fields(); + let arrays: Vec = vec![ + Arc::new(metadata_array), + Arc::new(crs_array), + Arc::new(bbox_array), + Arc::new(bands_array), + ]; + + Ok(StructArray::new(fields, arrays, None)) + } +} + +/// Convenience wrapper for the zero-copy band writing approach +impl RasterBuilder { + /// High-level method that allows for zero-copy with a callback approach + pub fn append_raster_with_callback( + &mut self, + metadata: RasterMetadata, + band_count: usize, + mut write_bands: F, + ) -> Result<(), ArrowError> + where + F: FnMut(usize, &mut BinaryBuilder) -> Result, + { + self.start_raster(&metadata, None, metadata.bounding_box.as_ref())?; + + for band_index in 0..band_count { + let band_metadata = { + let binary_builder = self.band_data_writer(); + write_bands(band_index, binary_builder)? + }; + self.finish_band(band_metadata)?; + } + + self.finish_raster()?; + Ok(()) + } +} + +/// Iterator and accessor traits for reading raster data from Arrow arrays. +/// +/// These traits provide a zero-copy interface for accessing raster metadata and band data +/// from the Arrow-based storage format. The implementation handles both InDb and OutDbRef +/// storage types seamlessly. + +/// Trait for accessing raster metadata (dimensions, geotransform, bounding box, etc.) +pub trait MetadataRef { + /// Width of the raster in pixels (using u64 to match schema) + fn width(&self) -> u64; + /// Height of the raster in pixels (using u64 to match schema) + fn height(&self) -> u64; + /// X coordinate of the upper-left corner + fn upper_left_x(&self) -> f64; + /// Y coordinate of the upper-left corner + fn upper_left_y(&self) -> f64; + /// X-direction pixel size (scale) + fn scale_x(&self) -> f64; + /// Y-direction pixel size (scale) + fn scale_y(&self) -> f64; + /// X-direction skew/rotation + fn skew_x(&self) -> f64; + /// Y-direction skew/rotation + fn skew_y(&self) -> f64; +} + +/// Implement MetadataRef for RasterMetadata to allow direct use with builder +impl MetadataRef for RasterMetadata { + fn width(&self) -> u64 { + self.width + } + fn height(&self) -> u64 { + self.height + } + fn upper_left_x(&self) -> f64 { + self.upperleft_x + } + fn upper_left_y(&self) -> f64 { + self.upperleft_y + } + fn scale_x(&self) -> f64 { + self.scale_x + } + fn scale_y(&self) -> f64 { + self.scale_y + } + fn skew_x(&self) -> f64 { + self.skew_x + } + fn skew_y(&self) -> f64 { + self.skew_y + } +} + +/// Trait for accessing individual band metadata +pub trait BandMetadataRef { + /// No-data value as raw bytes (None if null) + fn nodata_value(&self) -> Option<&[u8]>; + /// Storage type (InDb, OutDbRef, etc) + fn storage_type(&self) -> StorageType; + /// Band data type (UInt8, Float32, etc.) + fn data_type(&self) -> BandDataType; +} + +/// Trait for accessing individual band data +pub trait BandRef { + /// Band metadata accessor + fn metadata(&self) -> &dyn BandMetadataRef; + /// Raw band data as bytes (zero-copy access) + fn data(&self) -> &[u8]; +} + +/// Trait for accessing all bands in a raster +pub trait BandsRef { + /// Number of bands in the raster + fn len(&self) -> usize; + /// Check if no bands are present + fn is_empty(&self) -> bool { + self.len() == 0 + } + /// Get a specific band by index (returns None if out of bounds) + fn band(&self, index: usize) -> Option>; + /// Iterator over all bands + fn iter(&self) -> BandIterator<'_>; +} + +/// Trait for accessing complete raster data +pub trait RasterRef { + /// Raster metadata accessor + fn metadata(&self) -> &dyn MetadataRef; + /// CRS accessor + fn crs(&self) -> Option<&str>; + /// Bands accessor + fn bands(&self) -> &dyn BandsRef; +} + +/// Implementation of MetadataRef for Arrow StructArray +struct MetadataRefImpl<'a> { + metadata_struct: &'a StructArray, + index: usize, +} + +impl<'a> MetadataRef for MetadataRefImpl<'a> { + fn width(&self) -> u64 { + self.metadata_struct + .column(metadata_indices::WIDTH) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn height(&self) -> u64 { + self.metadata_struct + .column(metadata_indices::HEIGHT) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn upper_left_x(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::UPPERLEFT_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn upper_left_y(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::UPPERLEFT_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn scale_x(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::SCALE_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn scale_y(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::SCALE_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn skew_x(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::SKEW_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn skew_y(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::SKEW_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } +} + +/// Implementation of BandMetadataRef for Arrow StructArray +struct BandMetadataRefImpl<'a> { + metadata_struct: &'a StructArray, + band_index: usize, +} + +impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { + fn nodata_value(&self) -> Option<&[u8]> { + let nodata_array = self + .metadata_struct + .column(band_metadata_indices::NODATAVALUE) + .as_any() + .downcast_ref::() + .expect("Expected BinaryArray for nodata"); + + if nodata_array.is_null(self.band_index) { + None + } else { + Some(nodata_array.value(self.band_index)) + } + } + + fn storage_type(&self) -> StorageType { + let storage_type_array = self + .metadata_struct + .column(band_metadata_indices::STORAGE_TYPE) + .as_any() + .downcast_ref::() + .expect("Expected UInt32Array for storage_type"); + + match storage_type_array.value(self.band_index) { + 0 => StorageType::InDb, + 1 => StorageType::OutDbRef, + _ => panic!( + "Unknown storage type: {}", + storage_type_array.value(self.band_index) + ), + } + } + + fn data_type(&self) -> BandDataType { + let datatype_array = self + .metadata_struct + .column(band_metadata_indices::DATATYPE) + .as_any() + .downcast_ref::() + .expect("Expected UInt32Array for datatype"); + + match datatype_array.value(self.band_index) { + 0 => BandDataType::UInt8, + 1 => BandDataType::UInt16, + 2 => BandDataType::Int16, + 3 => BandDataType::UInt32, + 4 => BandDataType::Int32, + 5 => BandDataType::Float32, + 6 => BandDataType::Float64, + _ => panic!( + "Unknown band data type: {}", + datatype_array.value(self.band_index) + ), + } + } +} + +/// Implementation of BandRef for accessing individual band data +struct BandRefImpl<'a> { + band_metadata: BandMetadataRefImpl<'a>, + band_data: &'a [u8], +} + +impl<'a> BandRef for BandRefImpl<'a> { + fn metadata(&self) -> &dyn BandMetadataRef { + &self.band_metadata + } + + fn data(&self) -> &[u8] { + self.band_data + } +} + +/// Implementation of BandsRef for accessing all bands in a raster +struct BandsRefImpl<'a> { + bands_list: &'a ListArray, + raster_index: usize, +} + +impl<'a> BandsRef for BandsRefImpl<'a> { + fn len(&self) -> usize { + let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let end = self.bands_list.value_offsets()[self.raster_index + 1] as usize; + end - start + } + + /// Get a specific band by index + /// IMPORTANT: This function is utilizing zero based band indexing. + /// We may want to consider one-based indexing to match + /// raster standard band conventions. + fn band(&self, index: usize) -> Option> { + if index >= self.len() { + return None; + } + + let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + + let bands_struct = self + .bands_list + .values() + .as_any() + .downcast_ref::()?; + + // Get the metadata substructure from the band struct + let band_metadata_struct = bands_struct + .column(band_indices::METADATA) + .as_any() + .downcast_ref::()?; + + let band_metadata = BandMetadataRefImpl { + metadata_struct: band_metadata_struct, + band_index: band_row, + }; + + // Get band data from the Binary column within the band struct + let band_data_array = bands_struct + .column(band_indices::DATA) + .as_any() + .downcast_ref::()?; + + let band_data = band_data_array.value(band_row); + + Some(Box::new(BandRefImpl { + band_metadata, + band_data, + })) + } + + fn iter(&self) -> BandIterator<'_> { + BandIterator { + bands: self, + current: 0, + } + } +} + +/// Iterator for bands within a raster +pub struct BandIterator<'a> { + bands: &'a dyn BandsRef, + current: usize, +} + +impl<'a> Iterator for BandIterator<'a> { + type Item = Box; + + fn next(&mut self) -> Option { + if self.current < self.bands.len() { + let band = self.bands.band(self.current); + self.current += 1; + band + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = self.bands.len().saturating_sub(self.current); + (remaining, Some(remaining)) + } +} + +impl ExactSizeIterator for BandIterator<'_> {} + +/// Implementation of RasterRef for complete raster access +pub struct RasterRefImpl<'a> { + metadata: MetadataRefImpl<'a>, + crs: &'a StringViewArray, + bbox: &'a StructArray, + bands: BandsRefImpl<'a>, +} + +impl<'a> RasterRefImpl<'a> { + /// Create a new RasterRefImpl from a struct array and index using hard-coded indices + pub fn new(raster_struct: &'a StructArray, raster_index: usize) -> Self { + let metadata_struct = raster_struct + .column(raster_indices::METADATA) + .as_any() + .downcast_ref::() + .unwrap(); + + let crs = raster_struct + .column(raster_indices::CRS) + .as_any() + .downcast_ref::() + .unwrap(); + + let bbox = raster_struct + .column(raster_indices::BBOX) + .as_any() + .downcast_ref::() + .unwrap(); + + let bands_list = raster_struct + .column(raster_indices::BANDS) + .as_any() + .downcast_ref::() + .unwrap(); + + let metadata = MetadataRefImpl { + metadata_struct, + index: raster_index, + }; + + let bands = BandsRefImpl { + bands_list, + raster_index, + }; + + Self { + metadata, + crs, + bbox, + bands, + } + } + + /// Access the bounding box for this raster + pub fn bounding_box(&self, raster_index: usize) -> Option { + if self.bbox.is_null(raster_index) { + None + } else { + Some(BoundingBox { + min_x: self + .bbox + .column(bounding_box_indices::MIN_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + min_y: self + .bbox + .column(bounding_box_indices::MIN_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + max_x: self + .bbox + .column(bounding_box_indices::MAX_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + max_y: self + .bbox + .column(bounding_box_indices::MAX_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + }) + } + } +} + +impl<'a> RasterRef for RasterRefImpl<'a> { + fn metadata(&self) -> &dyn MetadataRef { + &self.metadata + } + + fn crs(&self) -> Option<&str> { + if self.crs.is_null(self.bands.raster_index) { + None + } else { + Some(&self.crs.value(self.bands.raster_index)) + } + } + + fn bands(&self) -> &dyn BandsRef { + &self.bands + } +} + +/// Iterator over raster structs in an Arrow StructArray +/// +/// This provides efficient, zero-copy access to raster data stored in Arrow format. +/// Each iteration yields a `RasterRefImpl` that provides access to both metadata and band data. +pub struct RasterStructIterator<'a> { + raster_array: &'a StructArray, + current_row: usize, +} + +impl<'a> RasterStructIterator<'a> { + /// Create a new iterator over the raster struct array + pub fn new(raster_array: &'a StructArray) -> Self { + Self { + raster_array, + current_row: 0, + } + } + + /// Get the total number of rasters in the array + pub fn len(&self) -> usize { + self.raster_array.len() + } + + /// Check if the array is empty + pub fn is_empty(&self) -> bool { + self.raster_array.is_empty() + } + + /// Get a specific raster by index without consuming the iterator + pub fn get(&self, index: usize) -> Option> { + if index >= self.raster_array.len() { + return None; + } + + Some(RasterRefImpl::new(self.raster_array, index)) + } +} + +impl<'a> Iterator for RasterStructIterator<'a> { + type Item = RasterRefImpl<'a>; + + fn next(&mut self) -> Option { + if self.current_row < self.raster_array.len() { + let result = self.get(self.current_row)?; + self.current_row += 1; + Some(result) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = self.raster_array.len().saturating_sub(self.current_row); + (remaining, Some(remaining)) + } +} + +impl ExactSizeIterator for RasterStructIterator<'_> {} + +/// Convenience constructor function for creating a raster iterator +pub fn raster_iterator(raster_struct: &StructArray) -> RasterStructIterator<'_> { + RasterStructIterator::new(raster_struct) +} + +/// Metadata for a raster +#[derive(Debug, Clone)] +pub struct RasterMetadata { + pub width: u64, + pub height: u64, + pub upperleft_x: f64, + pub upperleft_y: f64, + pub scale_x: f64, + pub scale_y: f64, + pub skew_x: f64, + pub skew_y: f64, + pub bounding_box: Option, +} + +/// Bounding box coordinates +#[derive(Debug, Clone)] +pub struct BoundingBox { + pub min_x: f64, + pub min_y: f64, + pub max_x: f64, + pub max_y: f64, +} + +/// Metadata for a single band +#[derive(Debug, Clone)] +pub struct BandMetadata { + pub nodata_value: Option>, + pub storage_type: StorageType, + pub datatype: BandDataType, +} + +// Private field column name and index constants +// used across schema, builders and iterators +mod column { + pub const METADATA: &str = "metadata"; + pub const BANDS: &str = "bands"; + pub const BAND: &str = "band"; + pub const DATA: &str = "data"; + + // Raster metadata fields + pub const WIDTH: &str = "width"; + pub const HEIGHT: &str = "height"; + pub const UPPERLEFT_X: &str = "upperleft_x"; + pub const UPPERLEFT_Y: &str = "upperleft_y"; + pub const SCALE_X: &str = "scale_x"; + pub const SCALE_Y: &str = "scale_y"; + pub const SKEW_X: &str = "skew_x"; + pub const SKEW_Y: &str = "skew_y"; + pub const BBOX: &str = "bbox"; + pub const CRS: &str = "crs"; + + // Bounding box fields + pub const MIN_X: &str = "min_x"; + pub const MIN_Y: &str = "min_y"; + pub const MAX_X: &str = "max_x"; + pub const MAX_Y: &str = "max_y"; + + // Band metadata fields + pub const NODATAVALUE: &str = "nodata_value"; + pub const STORAGE_TYPE: &str = "storage_type"; + pub const DATATYPE: &str = "data_type"; +} + +/// Hard-coded column indices for maximum performance +/// These must match the exact order defined in RasterSchema::metadata_type() +mod metadata_indices { + pub const WIDTH: usize = 0; + pub const HEIGHT: usize = 1; + pub const UPPERLEFT_X: usize = 2; + pub const UPPERLEFT_Y: usize = 3; + pub const SCALE_X: usize = 4; + pub const SCALE_Y: usize = 5; + pub const SKEW_X: usize = 6; + pub const SKEW_Y: usize = 7; +} + +mod bounding_box_indices { + pub const MIN_X: usize = 0; + pub const MIN_Y: usize = 1; + pub const MAX_X: usize = 2; + pub const MAX_Y: usize = 3; +} + +mod band_metadata_indices { + pub const NODATAVALUE: usize = 0; + pub const STORAGE_TYPE: usize = 1; + pub const DATATYPE: usize = 2; +} + +mod band_indices { + pub const METADATA: usize = 0; + pub const DATA: usize = 1; +} + +mod raster_indices { + pub const METADATA: usize = 0; + pub const CRS: usize = 1; + pub const BBOX: usize = 2; + pub const BANDS: usize = 3; +} + + + #[cfg(test)] mod tests { use crate::crs::lnglat; @@ -549,4 +1648,531 @@ mod tests { .message() .contains("Unsupported edges value")); } + + #[test] + fn test_iterator_basic_functionality() { + // Create a simple raster for testing using the correct API + let mut builder = RasterBuilder::new(10); // capacity + + let metadata = RasterMetadata { + width: 10, + height: 10, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: Some(BoundingBox { + min_x: 0.0, + min_y: -10.0, + max_x: 10.0, + max_y: 0.0, + }), + }; + + let epsg4326 = "EPSG:4326"; + builder + .start_raster(&metadata, Some(&epsg4326), metadata.bounding_box.as_ref()) + .unwrap(); + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }; + + // Add a single band with some test data using the correct API + let test_data = vec![1u8; 100]; // 10x10 raster with value 1 + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); + let result = builder.finish_raster(); + assert!(result.is_ok()); + + let raster_array = builder.finish().unwrap(); + + // Test the iterator + let mut iterator = raster_iterator(&raster_array); + + assert_eq!(iterator.len(), 1); + assert!(!iterator.is_empty()); + + let raster = iterator.next().unwrap(); + let metadata = raster.metadata(); + + assert_eq!(metadata.width(), 10); + assert_eq!(metadata.height(), 10); + assert_eq!(metadata.scale_x(), 1.0); + assert_eq!(metadata.scale_y(), -1.0); + + let bbox = raster.bounding_box(0).unwrap(); + assert_eq!(bbox.min_x, 0.0); + assert_eq!(bbox.max_x, 10.0); + + let bands = raster.bands(); + assert_eq!(bands.len(), 1); + assert!(!bands.is_empty()); + + let band = bands.band(0).unwrap(); + assert_eq!(band.data().len(), 100); + assert_eq!(band.data()[0], 1u8); + + let band_meta = band.metadata(); + assert_eq!(band_meta.storage_type(), StorageType::InDb); + assert_eq!(band_meta.data_type(), BandDataType::UInt8); + + let crs = raster.crs().unwrap(); + assert_eq!(crs, epsg4326); + + // Test iterator over bands + let band_iter: Vec<_> = bands.iter().collect(); + assert_eq!(band_iter.len(), 1); + } + + #[test] + fn test_multi_band_iterator() { + let mut builder = RasterBuilder::new(10); + + let metadata = RasterMetadata { + width: 5, + height: 5, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata, None, None).unwrap(); + + // Add three bands using the correct API + for band_idx in 0..3 { + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }; + + let test_data = vec![band_idx as u8; 25]; // 5x5 raster + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); + } + + let result = builder.finish_raster(); + assert!(result.is_ok()); + + let raster_array = builder.finish().unwrap(); + + let mut iterator = raster_iterator(&raster_array); + let raster = iterator.next().unwrap(); + let bands = raster.bands(); + + assert_eq!(bands.len(), 3); + + // Test each band has different data + for i in 0..3 { + let band = bands.band(i).unwrap(); + let expected_value = i as u8; + assert!(band.data().iter().all(|&x| x == expected_value)); + } + + // Test iterator + let band_values: Vec = bands + .iter() + .enumerate() + .map(|(i, band)| { + assert_eq!(band.data()[0], i as u8); + band.data()[0] + }) + .collect(); + + assert_eq!(band_values, vec![0, 1, 2]); + } + + #[test] + fn test_copy_metadata_from_iterator() { + // Create an original raster + let mut source_builder = RasterBuilder::new(10); + + let original_metadata = RasterMetadata { + width: 42, + height: 24, + upperleft_x: -122.0, + upperleft_y: 37.8, + scale_x: 0.1, + scale_y: -0.1, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: Some(BoundingBox { + min_x: -122.0, + min_y: 35.4, + max_x: -120.0, + max_y: 37.8, + }), + }; + + source_builder + .start_raster( + &original_metadata, + None, + original_metadata.bounding_box.as_ref(), + ) + .unwrap(); + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }; + + let test_data = vec![42u8; 1008]; // 42x24 raster + source_builder.band_data_writer().append_value(&test_data); + source_builder.finish_band(band_metadata).unwrap(); + source_builder.finish_raster().unwrap(); + + let source_array = source_builder.finish().unwrap(); + + // Now create a new raster using metadata from the iterator - this is the key feature! + let mut target_builder = RasterBuilder::new(10); + let iterator = raster_iterator(&source_array); + let source_raster = iterator.get(0).unwrap(); + + // Use metadata directly from the iterator (zero-copy!) + target_builder + .start_raster( + source_raster.metadata(), + source_raster.crs(), + source_raster.bounding_box(0).as_ref(), + ) + .unwrap(); + + // Add new band data while preserving original metadata + let new_band_metadata = BandMetadata { + nodata_value: None, + storage_type: StorageType::InDb, + datatype: BandDataType::UInt16, + }; + + let new_data = vec![100u16; 1008]; // Different data, same dimensions + let new_data_bytes: Vec = new_data.iter().flat_map(|&x| x.to_le_bytes()).collect(); + + target_builder + .band_data_writer() + .append_value(&new_data_bytes); + target_builder.finish_band(new_band_metadata).unwrap(); + target_builder.finish_raster().unwrap(); + + let target_array = target_builder.finish().unwrap(); + + // Verify the metadata was copied correctly + let target_iterator = raster_iterator(&target_array); + let target_raster = target_iterator.get(0).unwrap(); + let target_metadata = target_raster.metadata(); + + // All metadata should match the original + assert_eq!(target_metadata.width(), 42); + assert_eq!(target_metadata.height(), 24); + assert_eq!(target_metadata.upper_left_x(), -122.0); + assert_eq!(target_metadata.upper_left_y(), 37.8); + assert_eq!(target_metadata.scale_x(), 0.1); + assert_eq!(target_metadata.scale_y(), -0.1); + + let target_bbox = target_raster.bounding_box(0).unwrap(); + assert_eq!(target_bbox.min_x, -122.0); + assert_eq!(target_bbox.max_x, -120.0); + + // But band data and metadata should be different + let target_band = target_raster.bands().band(0).unwrap(); + let target_band_meta = target_band.metadata(); + assert_eq!(target_band_meta.data_type(), BandDataType::UInt16); + assert!(target_band_meta.nodata_value().is_none()); + assert_eq!(target_band.data().len(), 2016); // 1008 * 2 bytes per u16 + } + + #[test] + fn test_random_access() { + let mut builder = RasterBuilder::new(10); + + // Add multiple rasters + for raster_idx in 0..3 { + let metadata = RasterMetadata { + width: raster_idx as u64 + 1, + height: raster_idx as u64 + 1, + upperleft_x: raster_idx as f64, + upperleft_y: raster_idx as f64, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata, None, None).unwrap(); + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + }; + + let size = (raster_idx + 1) * (raster_idx + 1); + let test_data = vec![raster_idx as u8; size]; + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); + let result = builder.finish_raster(); + assert!(result.is_ok()); + } + + let raster_array = builder.finish().unwrap(); + let iterator = raster_iterator(&raster_array); + + assert_eq!(iterator.len(), 3); + + // Test random access + let raster_2 = iterator.get(2).unwrap(); + assert_eq!(raster_2.metadata().width(), 3); + assert_eq!(raster_2.metadata().height(), 3); + assert_eq!(raster_2.metadata().upper_left_x(), 2.0); + + let band = raster_2.bands().band(0).unwrap(); + assert_eq!(band.data().len(), 9); + assert!(band.data().iter().all(|&x| x == 2u8)); + + // Test out of bounds + assert!(iterator.get(10).is_none()); + } + + /// Comprehensive test to verify all hard-coded indices match the actual schema + #[test] + fn test_hardcoded_indices_match_schema() { + // Test raster-level indices + let raster_fields = RasterSchema::fields(); + assert_eq!(raster_fields.len(), 4, "Expected exactly 4 raster fields"); + assert_eq!( + raster_fields[raster_indices::METADATA].name(), + column::METADATA, + "Raster metadata index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::CRS].name(), + column::CRS, + "Raster CRS index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::BBOX].name(), + column::BBOX, + "Raster BBOX index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::BANDS].name(), + column::BANDS, + "Raster bands index mismatch" + ); + + // Test metadata indices + let metadata_type = RasterSchema::metadata_type(); + if let DataType::Struct(metadata_fields) = metadata_type { + assert_eq!( + metadata_fields.len(), + 8, + "Expected exactly 8 metadata fields" + ); + assert_eq!( + metadata_fields[metadata_indices::WIDTH].name(), + column::WIDTH, + "Metadata width index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::HEIGHT].name(), + column::HEIGHT, + "Metadata height index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::UPPERLEFT_X].name(), + column::UPPERLEFT_X, + "Metadata upperleft_x index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::UPPERLEFT_Y].name(), + column::UPPERLEFT_Y, + "Metadata upperleft_y index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SCALE_X].name(), + column::SCALE_X, + "Metadata scale_x index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SCALE_Y].name(), + column::SCALE_Y, + "Metadata scale_y index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SKEW_X].name(), + column::SKEW_X, + "Metadata skew_x index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SKEW_Y].name(), + column::SKEW_Y, + "Metadata skew_y index mismatch" + ); + } else { + panic!("Expected Struct type for metadata"); + } + + // Test bounding box indices + let bbox_type = RasterSchema::bounding_box_type(); + if let DataType::Struct(bbox_fields) = bbox_type { + assert_eq!( + bbox_fields.len(), + 4, + "Expected exactly 4 bounding box fields" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MIN_X].name(), + column::MIN_X, + "Bounding box min_x index mismatch" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MIN_Y].name(), + column::MIN_Y, + "Bounding box min_y index mismatch" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MAX_X].name(), + column::MAX_X, + "Bounding box max_x index mismatch" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MAX_Y].name(), + column::MAX_Y, + "Bounding box max_y index mismatch" + ); + } else { + panic!("Expected Struct type for bounding box"); + } + + // Test band metadata indices + let band_metadata_type = RasterSchema::band_metadata_type(); + if let DataType::Struct(band_metadata_fields) = band_metadata_type { + assert_eq!( + band_metadata_fields.len(), + 3, + "Expected exactly 3 band metadata fields" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::NODATAVALUE].name(), + column::NODATAVALUE, + "Band metadata nodatavalue index mismatch" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::STORAGE_TYPE].name(), + column::STORAGE_TYPE, + "Band metadata storage_type index mismatch" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::DATATYPE].name(), + column::DATATYPE, + "Band metadata datatype index mismatch" + ); + } else { + panic!("Expected Struct type for band metadata"); + } + + // Test band indices + let band_type = RasterSchema::band_type(); + if let DataType::Struct(band_fields) = band_type { + assert_eq!(band_fields.len(), 2, "Expected exactly 2 band fields"); + assert_eq!( + band_fields[band_indices::METADATA].name(), + column::METADATA, + "Band metadata index mismatch" + ); + assert_eq!( + band_fields[band_indices::DATA].name(), + column::DATA, + "Band data index mismatch" + ); + } else { + panic!("Expected Struct type for band"); + } + } + + #[test] + fn test_band_data_type_conversion() { + // Create a test raster with bands of different data types + let mut builder = RasterBuilder::new(10); + + let metadata = RasterMetadata { + width: 2, + height: 2, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata, None, None).unwrap(); + + // Test all BandDataType variants + let test_cases = vec![ + (BandDataType::UInt8, vec![1u8, 2u8, 3u8, 4u8]), + (BandDataType::UInt16, vec![1u8, 0u8, 2u8, 0u8, 3u8, 0u8, 4u8, 0u8]), // little-endian u16 + (BandDataType::Int16, vec![255u8, 255u8, 254u8, 255u8, 253u8, 255u8, 252u8, 255u8]), // little-endian i16 + (BandDataType::UInt32, vec![1u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8, 0u8, 3u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8]), // little-endian u32 + (BandDataType::Int32, vec![255u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, 255u8, 253u8, 255u8, 255u8, 255u8, 252u8, 255u8, 255u8, 255u8]), // little-endian i32 + (BandDataType::Float32, vec![0u8, 0u8, 128u8, 63u8, 0u8, 0u8, 0u8, 64u8, 0u8, 0u8, 64u8, 64u8, 0u8, 0u8, 128u8, 64u8]), // little-endian f32: 1.0, 2.0, 3.0, 4.0 + (BandDataType::Float64, vec![0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 240u8, 63u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 8u8, 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 16u8, 64u8]), // little-endian f64: 1.0, 2.0, 3.0, 4.0 + ]; + + for (expected_data_type, test_data) in test_cases { + let band_metadata = BandMetadata { + nodata_value: None, + storage_type: StorageType::InDb, + datatype: expected_data_type.clone(), + }; + + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); + } + + builder.finish_raster().unwrap(); + let raster_array = builder.finish().unwrap(); + + // Test the data type conversion for each band + let iterator = raster_iterator(&raster_array); + let raster = iterator.get(0).unwrap(); + let bands = raster.bands(); + + assert_eq!(bands.len(), 7, "Expected 7 bands for all data types"); + + // Verify each band returns the correct data type + let expected_types = vec![ + BandDataType::UInt8, + BandDataType::UInt16, + BandDataType::Int16, + BandDataType::UInt32, + BandDataType::Int32, + BandDataType::Float32, + BandDataType::Float64, + ]; + + for (i, expected_type) in expected_types.iter().enumerate() { + let band = bands.band(i).unwrap(); + let band_metadata = band.metadata(); + let actual_type = band_metadata.data_type(); + + assert_eq!( + actual_type, *expected_type, + "Band {} expected data type {:?}, got {:?}", + i, expected_type, actual_type + ); + } + } } diff --git a/rust/sedona-schema/src/matchers.rs b/rust/sedona-schema/src/matchers.rs index 57a74ddc..f6545e13 100644 --- a/rust/sedona-schema/src/matchers.rs +++ b/rust/sedona-schema/src/matchers.rs @@ -21,7 +21,7 @@ use arrow_schema::DataType; use datafusion_common::{plan_err, Result}; use sedona_common::sedona_internal_err; -use crate::datatypes::{Edges, SedonaType, WKB_GEOGRAPHY, WKB_GEOMETRY}; +use crate::datatypes::{Edges, SedonaType, WKB_GEOGRAPHY, WKB_GEOMETRY, RASTER}; /// Helper to match arguments and compute return types #[derive(Debug)] @@ -170,6 +170,12 @@ impl ArgMatcher { Arc::new(IsGeography {}) } + pub fn is_raster() -> Arc { + Arc::new(IsExact { + exact_type: RASTER, + }) + } + /// Matches a null argument pub fn is_null() -> Arc { Arc::new(IsNull {}) @@ -478,6 +484,10 @@ mod tests { ArgMatcher::is_boolean().type_if_null(), Some(SedonaType::Arrow(DataType::Boolean)) ); + + assert!(ArgMatcher::is_raster().match_type(&RASTER)); + assert!(!ArgMatcher::is_raster().match_type(&SedonaType::Arrow(DataType::Int32))); + assert!(!ArgMatcher::is_raster().match_type(&WKB_GEOMETRY)); } #[test] From f7c465960781f5b52859b768aec9edf00fcbad95 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Mon, 20 Oct 2025 14:01:31 -0700 Subject: [PATCH 10/18] move outdb url into metadata and add an executor function --- c/sedona-geoarrow-c/src/geoarrow_c.rs | 5 + rust/sedona-functions/src/executor.rs | 116 +++++++++++- rust/sedona-functions/src/rs_value.rs | 83 +++++++++ rust/sedona-functions/src/rs_width.rs | 130 +++++++++----- rust/sedona-schema/src/datatypes.rs | 248 +++++++++++++++++++++++--- rust/sedona-schema/src/matchers.rs | 6 +- 6 files changed, 515 insertions(+), 73 deletions(-) create mode 100644 rust/sedona-functions/src/rs_value.rs diff --git a/c/sedona-geoarrow-c/src/geoarrow_c.rs b/c/sedona-geoarrow-c/src/geoarrow_c.rs index 67b2608b..a0d3006f 100644 --- a/c/sedona-geoarrow-c/src/geoarrow_c.rs +++ b/c/sedona-geoarrow-c/src/geoarrow_c.rs @@ -281,6 +281,11 @@ fn geoarrow_type_id(sedona_type: &SedonaType) -> Result { + return Err(GeoArrowCError::Invalid( + "GeoArrow type ID not defined for Raster types".to_string(), + )); + } }; Ok(type_id) diff --git a/rust/sedona-functions/src/executor.rs b/rust/sedona-functions/src/executor.rs index 0a98e71d..04b4a020 100644 --- a/rust/sedona-functions/src/executor.rs +++ b/rust/sedona-functions/src/executor.rs @@ -16,14 +16,14 @@ // under the License. use std::iter::zip; -use arrow_array::ArrayRef; +use arrow_array::{Array, ArrayRef, StructArray}; use arrow_schema::DataType; use datafusion_common::cast::{as_binary_array, as_binary_view_array}; use datafusion_common::error::Result; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::ColumnarValue; use sedona_common::sedona_internal_err; -use sedona_schema::datatypes::SedonaType; +use sedona_schema::datatypes::{SedonaType, raster_iterator, RasterRefImpl}; use wkb::reader::Wkb; /// Helper for writing general kernel implementations with geometry @@ -75,6 +75,118 @@ pub struct GenericExecutor<'a, 'b, Factory0, Factory1> { /// Alias for an executor that iterates over geometries as [Wkb] pub type WkbExecutor<'a, 'b> = GenericExecutor<'a, 'b, WkbGeometryFactory, WkbGeometryFactory>; +/// Helper for writing raster kernel implementations +/// +/// The [RasterExecutor] provides a simplified interface for executing functions +/// on raster arrays, handling the common pattern of downcasting to StructArray, +/// creating raster iterators, and handling null values. +pub struct RasterExecutor<'a, 'b> { + pub arg_types: &'a [SedonaType], + pub args: &'b [ColumnarValue], + num_iterations: usize, +} + +impl<'a, 'b> RasterExecutor<'a, 'b> { + /// Create a new [RasterExecutor] + pub fn new(arg_types: &'a [SedonaType], args: &'b [ColumnarValue]) -> Self { + Self { + arg_types, + args, + num_iterations: Self::calc_num_iterations(args), + } + } + + /// Return the number of iterations that will be performed + pub fn num_iterations(&self) -> usize { + self.num_iterations + } + + /// Execute a function by iterating over rasters in the first argument + /// + /// This handles the common pattern of: + /// 1. Downcasting array to StructArray + /// 2. Creating raster iterator + /// 3. Iterating with null checks + /// 4. Calling the provided function with each raster + pub fn execute_raster_void(&self, mut func: F) -> Result<()> + where + F: FnMut(usize, Option>) -> Result<()>, + { + let raster_array = match &self.args[0] { + ColumnarValue::Array(array) => array, + ColumnarValue::Scalar(_) => { + return Err(DataFusionError::NotImplemented( + "Scalar raster input not yet supported".to_string() + )); + } + }; + + // Downcast to StructArray (rasters are stored as structs) + let raster_struct = raster_array + .as_any() + .downcast_ref::() + .ok_or_else(|| DataFusionError::Internal( + "Expected StructArray for raster data".to_string() + ))?; + + // Create raster iterator + let iterator = raster_iterator(raster_struct); + + // Iterate through each raster in the array + for i in 0..self.num_iterations { + if raster_struct.is_null(i) { + func(i, None)?; + } else { + // Get the raster at this index + let raster = iterator.get(i).ok_or_else(|| { + DataFusionError::Internal( + format!("Failed to get raster at index {}", i) + ) + })?; + func(i, Some(raster))?; + } + } + + Ok(()) + } + + /// Finish an [ArrayRef] output as the appropriate [ColumnarValue] + /// + /// Converts the output into a [ColumnarValue::Scalar] if all arguments were scalars, + /// or a [ColumnarValue::Array] otherwise. + pub fn finish(&self, out: ArrayRef) -> Result { + for arg in self.args { + match arg { + // If any argument was an array, we return an array + ColumnarValue::Array(_) => { + return Ok(ColumnarValue::Array(out)); + } + ColumnarValue::Scalar(_) => {} + } + } + + // For all scalar arguments, we return a scalar + Ok(ColumnarValue::Scalar(ScalarValue::try_from_array(&out, 0)?)) + } + + /// Calculates the number of iterations that should happen based on the + /// argument ColumnarValue types + fn calc_num_iterations(args: &[ColumnarValue]) -> usize { + for arg in args { + match arg { + // If any argument is an array, we have to iterate array.len() times + ColumnarValue::Array(array) => { + return array.len(); + } + ColumnarValue::Scalar(_) => {} + } + } + + // All scalars: we iterate once + 1 + } +} + impl<'a, 'b, Factory0: GeometryFactory, Factory1: GeometryFactory> GenericExecutor<'a, 'b, Factory0, Factory1> { diff --git a/rust/sedona-functions/src/rs_value.rs b/rust/sedona-functions/src/rs_value.rs new file mode 100644 index 00000000..55b9b949 --- /dev/null +++ b/rust/sedona-functions/src/rs_value.rs @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::{sync::Arc, vec}; + +use crate::executor::WkbExecutor; +use arrow_array::builder::UInt64Builder; +use arrow_schema::DataType; +use datafusion_common::error::{DataFusionError, Result}; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use sedona_common::sedona_internal_err; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +/// RS_Value() scalar UDF implementation +/// +/// Extract the value at a given point from the raster +/// TODO: Implement support for point geometry input +pub fn rs_value_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new_stub( + "rs_value", + ArgMatcher::new( + vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_numeric(), + ArgMatcher::is_numeric(), + ArgMatcher::is_numeric(), + ], + SedonaType::Arrow(DataType::Boolean), + ), + Volatility::Immutable, + Some(rs_value_doc()), + ) +} +fn rs_value_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + format!( + "Returns the value at the given point in the raster.", + ), + format!("RS_Value (raster: Raster, colX: Integer, colY: Integer, band: Integer)"), + ) + .with_argument("raster", "Raster: Input raster") + .with_optional_argument("x", "Integer: X coordinate") + .with_optional_argument("y", "Integer: Y coordinate") + .with_optional_argument("point", "Point: Point geometry") + .with_argument("band_id", "Integer: Band number (1-based index)") + .with_sql_example(format!( + "SELECT RS_Value(raster, x, y, band_id)", + )) + .with_sql_example(format!( + "SELECT RS_Value(raster, point, band_id)", + )) + .build() +} + +#[cfg(test)] +mod tests { + use super::*; + use datafusion_expr::ScalarUDF; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = rs_value_udf().into(); + assert_eq!(udf.name(), "rs_value"); + assert!(udf.documentation().is_some()); + } +} \ No newline at end of file diff --git a/rust/sedona-functions/src/rs_width.rs b/rust/sedona-functions/src/rs_width.rs index 59dce8d8..fd8b2e66 100644 --- a/rust/sedona-functions/src/rs_width.rs +++ b/rust/sedona-functions/src/rs_width.rs @@ -16,16 +16,18 @@ // under the License. use std::{sync::Arc, vec}; -use crate::executor::WkbExecutor; +use crate::executor::RasterExecutor; use arrow_array::builder::UInt64Builder; use arrow_schema::DataType; -use datafusion_common::error::{DataFusionError, Result}; +use datafusion_common::error::Result; use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; -use sedona_common::sedona_internal_err; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; -use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; +use sedona_schema::{ + datatypes::{RasterRef, SedonaType}, + matchers::ArgMatcher +}; /// RS_Width() scalar UDF implementation /// @@ -42,11 +44,15 @@ pub fn rs_width_udf() -> SedonaScalarUDF { fn rs_width_doc() -> Documentation { Documentation::builder( DOC_SECTION_OTHER, - format!("Return the width component of a raster",), + format!( + "Return the width component of a raster", + ), format!("RS_Width(raster: Raster)"), ) .with_argument("raster", "Raster: Input raster") - .with_sql_example(format!("SELECT RS_Width(raster)",)) + .with_sql_example(format!( + "SELECT RS_Width(raster)", + )) .build() } @@ -68,30 +74,32 @@ impl SedonaScalarKernel for RS_Width { arg_types: &[SedonaType], args: &[ColumnarValue], ) -> Result { - let rasters = args[0].to_array(); - let mut builder = UInt64Builder::with_capacity(args[0].len()); + let executor = RasterExecutor::new(arg_types, args); + let mut builder = UInt64Builder::with_capacity(executor.num_iterations()); - for raster in rasters.iter() { - match raster { + executor.execute_raster_void(|_i, raster_opt| { + match raster_opt { + None => builder.append_null(), Some(raster) => { - builder.append_value(raster.metadata().width()); + let width = raster.metadata().width(); + builder.append_value(width); } - None => builder.append_null(), } - } + Ok(()) + })?; - Ok(ColumnarValue::from(builder.finish())) + executor.finish(Arc::new(builder.finish())) } } #[cfg(test)] mod tests { use super::*; - use arrow_array::{create_array, ArrayRef}; - use datafusion_common::ScalarValue; + use arrow_array::{Array, ArrayRef, UInt64Array}; use datafusion_expr::ScalarUDF; - use rstest::rstest; - use sedona_testing::{create::create_array, testers::ScalarUdfTester}; + use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, RASTER, + }; #[test] fn udf_metadata() { @@ -100,44 +108,80 @@ mod tests { assert!(udf.documentation().is_some()); } - #[rstest] + #[test] fn udf_invoke() { - let raster_array = create_array( - &[gen_raster(10, 12), None, gen_raster(30, 15)], - &WKB_GEOMETRY, - ); - let expected: ArrayRef = create_array!(UInt64, [Some(10), None, Some(30),]); - assert_eq!( - &x_tester.invoke_array(wkb_array.clone()).unwrap(), - &expected_x - ); + // Create test rasters with different widths + let raster_array = create_test_raster_array(); + + // Create the UDF and invoke it + let kernel = RS_Width {}; + let args = vec![ColumnarValue::Array(raster_array)]; + let arg_types = vec![RASTER]; + + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); + + // Check the result + if let ColumnarValue::Array(result_array) = result { + let width_array = result_array.as_any().downcast_ref::().unwrap(); + + assert_eq!(width_array.len(), 2); + assert_eq!(width_array.value(0), 10); // First raster width + assert_eq!(width_array.value(1), 30); // Second raster width + } else { + panic!("Expected array result"); + } } - /// Generate a raster with the specified width, height, and value. - /// This should be improved and moved into sedona-testing - fn gen_raster(width: usize, height: usize) -> StructArray { - let mut builder = Raster::builder(); - - let metadata = RasterMetadata { - width, - height, - ..Default::default() + /// Create a test raster array with different widths for testing + fn create_test_raster_array() -> ArrayRef { + let mut builder = RasterBuilder::new(10); + + // First raster: 10x12 + let metadata1 = RasterMetadata { + width: 10, + height: 12, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, }; let band_metadata = BandMetadata { nodata_value: Some(vec![255u8]), storage_type: StorageType::InDb, datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, }; - builder.start_raster(&metadata, None, None).unwrap(); + builder.start_raster(&metadata1, None, None).unwrap(); + let test_data1 = vec![1u8; 10 * 12]; // width * height + builder.band_data_writer().append_value(&test_data1); + builder.finish_band(band_metadata.clone()).unwrap(); + builder.finish_raster().unwrap(); + + // Second raster: 30x15 + let metadata2 = RasterMetadata { + width: 30, + height: 15, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; - let size = width * height * 8; - let test_data = vec![value as u8; size]; - builder.band_data_writer().append_value(&test_data); + builder.start_raster(&metadata2, None, None).unwrap(); + let test_data2 = vec![1u8; 30 * 15]; // width * height + builder.band_data_writer().append_value(&test_data2); builder.finish_band(band_metadata).unwrap(); - builder.finish_raster(); + builder.finish_raster().unwrap(); - builder.finish().unwrap() + Arc::new(builder.finish().unwrap()) } } diff --git a/rust/sedona-schema/src/datatypes.rs b/rust/sedona-schema/src/datatypes.rs index 9613ef3e..37a5ba46 100644 --- a/rust/sedona-schema/src/datatypes.rs +++ b/rust/sedona-schema/src/datatypes.rs @@ -14,12 +14,15 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. +use arrow::buffer::MutableBuffer; use arrow_array::{ - builder::{BinaryBuilder, Float64Builder, UInt32Builder, UInt64Builder, StructBuilder, StringViewBuilder, ListBuilder}, - BinaryArray, ListArray, StringViewArray, StructArray, UInt32Array, - UInt64Array, Float64Array, ArrayRef, Array + builder::{ + BinaryBuilder, Float64Builder, ListBuilder, StringBuilder, StringViewBuilder, StructBuilder, + UInt32Builder, UInt64Builder, + }, + Array, ArrayRef, BinaryArray, Float64Array, ListArray, StringArray, StringViewArray, StructArray, + UInt32Array, UInt64Array, }; -use arrow::{buffer::MutableBuffer}; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; use datafusion_common::error::{DataFusionError, Result}; use sedona_common::sedona_internal_err; @@ -87,7 +90,8 @@ pub const RASTER: SedonaType = SedonaType::Raster(RasterSchema); /// Create a static value for the [`SedonaType::Raster`] that's initialized exactly once, /// on first access -static RASTER_DATATYPE: LazyLock = LazyLock::new(|| DataType::Struct(RasterSchema::fields())); +static RASTER_DATATYPE: LazyLock = + LazyLock::new(|| DataType::Struct(RasterSchema::fields())); impl SedonaType { /// Given a field as it would appear in an external Schema return the appropriate SedonaType @@ -149,13 +153,11 @@ impl SedonaType { Some(serialize_edges_and_crs(edges, crs)), )) } - SedonaType::Raster(_) => { - Some(ExtensionType::new( - self.extension_name().unwrap(), - self.storage_type().clone(), - None, - )) - } + SedonaType::Raster(_) => Some(ExtensionType::new( + self.extension_name().unwrap(), + self.storage_type().clone(), + None, + )), _ => None, } } @@ -424,6 +426,9 @@ impl RasterSchema { Field::new(column::NODATAVALUE, DataType::Binary, true), // Allow null nodata values Field::new(column::STORAGE_TYPE, DataType::UInt32, false), Field::new(column::DATATYPE, DataType::UInt32, false), + // OutDb reference fields - only used when storage_type == OutDbRef + Field::new(column::OUTDB_URL, DataType::Utf8, true), + Field::new(column::OUTDB_BAND_ID, DataType::UInt32, true), ])) } @@ -630,6 +635,32 @@ impl RasterBuilder { .unwrap() .append_value(band_metadata.datatype as u32); + // Handle OutDb URL + if let Some(url) = band_metadata.outdb_url { + metadata_builder + .field_builder::(band_metadata_indices::OUTDB_URL) + .unwrap() + .append_value(&url); + } else { + metadata_builder + .field_builder::(band_metadata_indices::OUTDB_URL) + .unwrap() + .append_null(); + } + + // Handle OutDb band ID + if let Some(band_id) = band_metadata.outdb_band_id { + metadata_builder + .field_builder::(band_metadata_indices::OUTDB_BAND_ID) + .unwrap() + .append_value(band_id); + } else { + metadata_builder + .field_builder::(band_metadata_indices::OUTDB_BAND_ID) + .unwrap() + .append_null(); + } + metadata_builder.append(true); // Finish the band @@ -870,6 +901,10 @@ pub trait BandMetadataRef { fn storage_type(&self) -> StorageType; /// Band data type (UInt8, Float32, etc.) fn data_type(&self) -> BandDataType; + /// OutDb URL (only used when storage_type == OutDbRef) + fn outdb_url(&self) -> Option<&str>; + /// OutDb band ID (only used when storage_type == OutDbRef) + fn outdb_band_id(&self) -> Option; } /// Trait for accessing individual band data @@ -1046,6 +1081,36 @@ impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { ), } } + + fn outdb_url(&self) -> Option<&str> { + let url_array = self + .metadata_struct + .column(band_metadata_indices::OUTDB_URL) + .as_any() + .downcast_ref::() + .expect("Expected StringArray for outdb_url"); + + if url_array.is_null(self.band_index) { + None + } else { + Some(url_array.value(self.band_index)) + } + } + + fn outdb_band_id(&self) -> Option { + let band_id_array = self + .metadata_struct + .column(band_metadata_indices::OUTDB_BAND_ID) + .as_any() + .downcast_ref::() + .expect("Expected UInt32Array for outdb_band_id"); + + if band_id_array.is_null(self.band_index) { + None + } else { + Some(band_id_array.value(self.band_index)) + } + } } /// Implementation of BandRef for accessing individual band data @@ -1358,6 +1423,10 @@ pub struct BandMetadata { pub nodata_value: Option>, pub storage_type: StorageType, pub datatype: BandDataType, + /// URL for OutDb reference (only used when storage_type == OutDbRef) + pub outdb_url: Option, + /// Band ID within the OutDb resource (only used when storage_type == OutDbRef) + pub outdb_band_id: Option, } // Private field column name and index constants @@ -1390,6 +1459,8 @@ mod column { pub const NODATAVALUE: &str = "nodata_value"; pub const STORAGE_TYPE: &str = "storage_type"; pub const DATATYPE: &str = "data_type"; + pub const OUTDB_URL: &str = "outdb_url"; + pub const OUTDB_BAND_ID: &str = "outdb_band_id"; } /// Hard-coded column indices for maximum performance @@ -1416,6 +1487,8 @@ mod band_metadata_indices { pub const NODATAVALUE: usize = 0; pub const STORAGE_TYPE: usize = 1; pub const DATATYPE: usize = 2; + pub const OUTDB_URL: usize = 3; + pub const OUTDB_BAND_ID: usize = 4; } mod band_indices { @@ -1430,8 +1503,6 @@ mod raster_indices { pub const BANDS: usize = 3; } - - #[cfg(test)] mod tests { use crate::crs::lnglat; @@ -1649,7 +1720,7 @@ mod tests { .contains("Unsupported edges value")); } - #[test] + #[test] fn test_iterator_basic_functionality() { // Create a simple raster for testing using the correct API let mut builder = RasterBuilder::new(10); // capacity @@ -1680,6 +1751,8 @@ mod tests { nodata_value: Some(vec![255u8]), storage_type: StorageType::InDb, datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, }; // Add a single band with some test data using the correct API @@ -1753,6 +1826,8 @@ mod tests { nodata_value: Some(vec![255u8]), storage_type: StorageType::InDb, datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, }; let test_data = vec![band_idx as u8; 25]; // 5x5 raster @@ -1825,6 +1900,8 @@ mod tests { nodata_value: Some(vec![255u8]), storage_type: StorageType::InDb, datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, }; let test_data = vec![42u8; 1008]; // 42x24 raster @@ -1853,6 +1930,8 @@ mod tests { nodata_value: None, storage_type: StorageType::InDb, datatype: BandDataType::UInt16, + outdb_url: None, + outdb_band_id: None, }; let new_data = vec![100u16; 1008]; // Different data, same dimensions @@ -1915,6 +1994,8 @@ mod tests { nodata_value: Some(vec![255u8]), storage_type: StorageType::InDb, datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, }; let size = (raster_idx + 1) * (raster_idx + 1); @@ -2060,8 +2141,8 @@ mod tests { if let DataType::Struct(band_metadata_fields) = band_metadata_type { assert_eq!( band_metadata_fields.len(), - 3, - "Expected exactly 3 band metadata fields" + 5, + "Expected exactly 5 band metadata fields" ); assert_eq!( band_metadata_fields[band_metadata_indices::NODATAVALUE].name(), @@ -2078,6 +2159,16 @@ mod tests { column::DATATYPE, "Band metadata datatype index mismatch" ); + assert_eq!( + band_metadata_fields[band_metadata_indices::OUTDB_URL].name(), + column::OUTDB_URL, + "Band metadata outdb_url index mismatch" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::OUTDB_BAND_ID].name(), + column::OUTDB_BAND_ID, + "Band metadata outdb_band_id index mismatch" + ); } else { panic!("Expected Struct type for band metadata"); } @@ -2123,12 +2214,42 @@ mod tests { // Test all BandDataType variants let test_cases = vec![ (BandDataType::UInt8, vec![1u8, 2u8, 3u8, 4u8]), - (BandDataType::UInt16, vec![1u8, 0u8, 2u8, 0u8, 3u8, 0u8, 4u8, 0u8]), // little-endian u16 - (BandDataType::Int16, vec![255u8, 255u8, 254u8, 255u8, 253u8, 255u8, 252u8, 255u8]), // little-endian i16 - (BandDataType::UInt32, vec![1u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8, 0u8, 3u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8]), // little-endian u32 - (BandDataType::Int32, vec![255u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, 255u8, 253u8, 255u8, 255u8, 255u8, 252u8, 255u8, 255u8, 255u8]), // little-endian i32 - (BandDataType::Float32, vec![0u8, 0u8, 128u8, 63u8, 0u8, 0u8, 0u8, 64u8, 0u8, 0u8, 64u8, 64u8, 0u8, 0u8, 128u8, 64u8]), // little-endian f32: 1.0, 2.0, 3.0, 4.0 - (BandDataType::Float64, vec![0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 240u8, 63u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 8u8, 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 16u8, 64u8]), // little-endian f64: 1.0, 2.0, 3.0, 4.0 + ( + BandDataType::UInt16, + vec![1u8, 0u8, 2u8, 0u8, 3u8, 0u8, 4u8, 0u8], + ), // little-endian u16 + ( + BandDataType::Int16, + vec![255u8, 255u8, 254u8, 255u8, 253u8, 255u8, 252u8, 255u8], + ), // little-endian i16 + ( + BandDataType::UInt32, + vec![ + 1u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8, 0u8, 3u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8, + ], + ), // little-endian u32 + ( + BandDataType::Int32, + vec![ + 255u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, 255u8, 253u8, 255u8, 255u8, + 255u8, 252u8, 255u8, 255u8, 255u8, + ], + ), // little-endian i32 + ( + BandDataType::Float32, + vec![ + 0u8, 0u8, 128u8, 63u8, 0u8, 0u8, 0u8, 64u8, 0u8, 0u8, 64u8, 64u8, 0u8, 0u8, + 128u8, 64u8, + ], + ), // little-endian f32: 1.0, 2.0, 3.0, 4.0 + ( + BandDataType::Float64, + vec![ + 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 240u8, 63u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, + 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 8u8, 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, + 16u8, 64u8, + ], + ), // little-endian f64: 1.0, 2.0, 3.0, 4.0 ]; for (expected_data_type, test_data) in test_cases { @@ -2136,6 +2257,8 @@ mod tests { nodata_value: None, storage_type: StorageType::InDb, datatype: expected_data_type.clone(), + outdb_url: None, + outdb_band_id: None, }; builder.band_data_writer().append_value(&test_data); @@ -2167,7 +2290,7 @@ mod tests { let band = bands.band(i).unwrap(); let band_metadata = band.metadata(); let actual_type = band_metadata.data_type(); - + assert_eq!( actual_type, *expected_type, "Band {} expected data type {:?}, got {:?}", @@ -2175,4 +2298,81 @@ mod tests { ); } } + + #[test] + fn test_outdb_metadata_fields() { + // Test creating raster with OutDb reference metadata + let mut builder = RasterBuilder::new(10); + + let metadata = RasterMetadata { + width: 1024, + height: 1024, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata, None, None).unwrap(); + + // Test InDb band (should have null OutDb fields) + let indb_band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, + }; + + let test_data = vec![1u8; 100]; + builder.band_data_writer().append_value(&test_data); + builder.finish_band(indb_band_metadata).unwrap(); + + // Test OutDbRef band (should have OutDb fields populated) + let outdb_band_metadata = BandMetadata { + nodata_value: None, + storage_type: StorageType::OutDbRef, + datatype: BandDataType::Float32, + outdb_url: Some("s3://mybucket/satellite_image.tif".to_string()), + outdb_band_id: Some(2), + }; + + // For OutDbRef, data field could be empty or contain metadata/thumbnail + builder.band_data_writer().append_value(&[]); + builder.finish_band(outdb_band_metadata).unwrap(); + + builder.finish_raster().unwrap(); + let raster_array = builder.finish().unwrap(); + + // Verify the band metadata + let iterator = raster_iterator(&raster_array); + let raster = iterator.get(0).unwrap(); + let bands = raster.bands(); + + assert_eq!(bands.len(), 2); + + // Test InDb band + let indb_band = bands.band(0).unwrap(); + let indb_metadata = indb_band.metadata(); + assert_eq!(indb_metadata.storage_type(), StorageType::InDb); + assert_eq!(indb_metadata.data_type(), BandDataType::UInt8); + assert!(indb_metadata.outdb_url().is_none()); + assert!(indb_metadata.outdb_band_id().is_none()); + assert_eq!(indb_band.data().len(), 100); + + // Test OutDbRef band + let outdb_band = bands.band(1).unwrap(); + let outdb_metadata = outdb_band.metadata(); + assert_eq!(outdb_metadata.storage_type(), StorageType::OutDbRef); + assert_eq!(outdb_metadata.data_type(), BandDataType::Float32); + assert_eq!( + outdb_metadata.outdb_url().unwrap(), + "s3://mybucket/satellite_image.tif" + ); + assert_eq!(outdb_metadata.outdb_band_id().unwrap(), 2); + assert_eq!(outdb_band.data().len(), 0); // Empty data for OutDbRef + } } diff --git a/rust/sedona-schema/src/matchers.rs b/rust/sedona-schema/src/matchers.rs index f6545e13..faa39827 100644 --- a/rust/sedona-schema/src/matchers.rs +++ b/rust/sedona-schema/src/matchers.rs @@ -21,7 +21,7 @@ use arrow_schema::DataType; use datafusion_common::{plan_err, Result}; use sedona_common::sedona_internal_err; -use crate::datatypes::{Edges, SedonaType, WKB_GEOGRAPHY, WKB_GEOMETRY, RASTER}; +use crate::datatypes::{Edges, SedonaType, RASTER, WKB_GEOGRAPHY, WKB_GEOMETRY}; /// Helper to match arguments and compute return types #[derive(Debug)] @@ -171,9 +171,7 @@ impl ArgMatcher { } pub fn is_raster() -> Arc { - Arc::new(IsExact { - exact_type: RASTER, - }) + Arc::new(IsExact { exact_type: RASTER }) } /// Matches a null argument From aafa3c56edd24792f806dac4bdecda385da7c275 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Mon, 20 Oct 2025 15:03:07 -0700 Subject: [PATCH 11/18] fix builder to handle null rasters --- rust/sedona-functions/src/rs_width.rs | 18 ++- rust/sedona-schema/src/datatypes.rs | 206 ++++++++++++++++++-------- 2 files changed, 154 insertions(+), 70 deletions(-) diff --git a/rust/sedona-functions/src/rs_width.rs b/rust/sedona-functions/src/rs_width.rs index fd8b2e66..38d3dec9 100644 --- a/rust/sedona-functions/src/rs_width.rs +++ b/rust/sedona-functions/src/rs_width.rs @@ -124,9 +124,10 @@ mod tests { if let ColumnarValue::Array(result_array) = result { let width_array = result_array.as_any().downcast_ref::().unwrap(); - assert_eq!(width_array.len(), 2); + assert_eq!(width_array.len(), 3); assert_eq!(width_array.value(0), 10); // First raster width - assert_eq!(width_array.value(1), 30); // Second raster width + assert!(width_array.is_null(1)); // Second raster is null + assert_eq!(width_array.value(2), 30); // Third raster width } else { panic!("Expected array result"); } @@ -163,8 +164,11 @@ mod tests { builder.finish_band(band_metadata.clone()).unwrap(); builder.finish_raster().unwrap(); - // Second raster: 30x15 - let metadata2 = RasterMetadata { + // Second raster: null + builder.append_null().unwrap(); + + // Third raster: 30x15 + let metadata3 = RasterMetadata { width: 30, height: 15, upperleft_x: 0.0, @@ -176,9 +180,9 @@ mod tests { bounding_box: None, }; - builder.start_raster(&metadata2, None, None).unwrap(); - let test_data2 = vec![1u8; 30 * 15]; // width * height - builder.band_data_writer().append_value(&test_data2); + builder.start_raster(&metadata3, None, None).unwrap(); + let test_data3 = vec![3u8; 30 * 15]; // width * height + builder.band_data_writer().append_value(&test_data3); builder.finish_band(band_metadata).unwrap(); builder.finish_raster().unwrap(); diff --git a/rust/sedona-schema/src/datatypes.rs b/rust/sedona-schema/src/datatypes.rs index 37a5ba46..cd7efe06 100644 --- a/rust/sedona-schema/src/datatypes.rs +++ b/rust/sedona-schema/src/datatypes.rs @@ -14,10 +14,10 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use arrow::buffer::MutableBuffer; +use arrow::buffer::{MutableBuffer, BooleanBuffer, NullBuffer}; use arrow_array::{ builder::{ - BinaryBuilder, Float64Builder, ListBuilder, StringBuilder, StringViewBuilder, StructBuilder, + BinaryBuilder, Float64Builder, ListBuilder, StringBuilder, StructBuilder, UInt32Builder, UInt64Builder, }, Array, ArrayRef, BinaryArray, Float64Array, ListArray, StringArray, StringViewArray, StructArray, @@ -439,7 +439,7 @@ impl RasterSchema { /// CRS schema to store json representation pub fn crs_type() -> DataType { - DataType::Utf8View + DataType::Utf8 } } @@ -479,15 +479,13 @@ pub enum StorageType { /// Builder for constructing raster arrays with zero-copy band data writing pub struct RasterBuilder { - metadata_builder: StructBuilder, - crs_builder: StringViewBuilder, - bbox_builder: StructBuilder, - bands_builder: ListBuilder, + main_builder: StructBuilder, } impl RasterBuilder { /// Create a new raster builder with the specified capacity pub fn new(capacity: usize) -> Self { + // Create individual builders that we know work let metadata_builder = StructBuilder::from_fields( match RasterSchema::metadata_type() { DataType::Struct(fields) => fields, @@ -496,12 +494,22 @@ impl RasterBuilder { capacity, ); + let crs_builder = StringBuilder::new(); + + let bbox_builder = StructBuilder::from_fields( + match RasterSchema::bounding_box_type() { + DataType::Struct(fields) => fields, + _ => panic!("Expected struct type for bounding box"), + }, + capacity, + ); + let band_struct_builder = StructBuilder::from_fields( match RasterSchema::band_type() { DataType::Struct(fields) => fields, _ => panic!("Expected struct type for band"), }, - 0, // Initial capacity for bands + 0, ); let bands_builder = ListBuilder::new(band_struct_builder).with_field(Field::new( @@ -510,19 +518,19 @@ impl RasterBuilder { false, )); - let bbox_builder = StructBuilder::from_fields( - match RasterSchema::bounding_box_type() { - DataType::Struct(fields) => fields, - _ => panic!("Expected struct type for bounding box"), - }, - capacity, + // Now create the main builder with pre-built components + let mut main_builder = StructBuilder::new( + RasterSchema::fields(), + vec![ + Box::new(metadata_builder), + Box::new(crs_builder), + Box::new(bbox_builder), + Box::new(bands_builder), + ], ); Self { - metadata_builder, - crs_builder: StringViewBuilder::new(), - bbox_builder, - bands_builder, + main_builder, } } @@ -536,7 +544,6 @@ impl RasterBuilder { /// * `bbox` - Optional bounding box coordinates /// /// # Examples - /// ```ignore /// // From iterator - copy all fields from existing raster /// builder.start_raster(raster.metadata(), raster.crs(), raster.bounding_box(0).as_ref())?; /// @@ -560,7 +567,12 @@ impl RasterBuilder { /// Get direct access to the BinaryBuilder for writing the current band's data pub fn band_data_writer(&mut self) -> &mut BinaryBuilder { - let band_builder = self.bands_builder.values(); + let bands_builder = self.main_builder + .field_builder::>(raster_indices::BANDS) + .unwrap(); + let band_builder = bands_builder.values(); + // Ensure we have at least one field (band metadata and data) + // Field 0 = metadata (StructBuilder), Field 1 = data (BinaryBuilder) band_builder.field_builder::(1).unwrap() } @@ -609,7 +621,10 @@ impl RasterBuilder { /// TODO: The band_metadata is in the finish in the band call, but in the /// start in the raster call. Make it consistent. pub fn finish_band(&mut self, band_metadata: BandMetadata) -> Result<(), ArrowError> { - let band_builder = self.bands_builder.values(); + let bands_builder = self.main_builder + .field_builder::>(raster_indices::BANDS) + .unwrap(); + let band_builder = bands_builder.values(); let metadata_builder = band_builder.field_builder::(0).unwrap(); @@ -670,145 +685,210 @@ impl RasterBuilder { /// Finish all bands for the current raster pub fn finish_raster(&mut self) -> Result<(), ArrowError> { - self.bands_builder.append(true); + let bands_builder = self.main_builder + .field_builder::>(raster_indices::BANDS) + .unwrap(); + bands_builder.append(true); + // Mark this raster as valid (not null) in the main struct + self.main_builder.append(true); Ok(()) } /// Append raster metadata from a MetadataRef trait object fn append_metadata_from_ref(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { + let metadata_builder = self.main_builder + .field_builder::(raster_indices::METADATA) + .unwrap(); + // Width - self.metadata_builder + metadata_builder .field_builder::(metadata_indices::WIDTH) .unwrap() .append_value(metadata.width()); // Height - self.metadata_builder + metadata_builder .field_builder::(metadata_indices::HEIGHT) .unwrap() .append_value(metadata.height()); // Geotransform parameters - self.metadata_builder + metadata_builder .field_builder::(metadata_indices::UPPERLEFT_X) .unwrap() .append_value(metadata.upper_left_x()); - self.metadata_builder + metadata_builder .field_builder::(metadata_indices::UPPERLEFT_Y) .unwrap() .append_value(metadata.upper_left_y()); - self.metadata_builder + metadata_builder .field_builder::(metadata_indices::SCALE_X) .unwrap() .append_value(metadata.scale_x()); - self.metadata_builder + metadata_builder .field_builder::(metadata_indices::SCALE_Y) .unwrap() .append_value(metadata.scale_y()); - self.metadata_builder + metadata_builder .field_builder::(metadata_indices::SKEW_X) .unwrap() .append_value(metadata.skew_x()); - self.metadata_builder + metadata_builder .field_builder::(metadata_indices::SKEW_Y) .unwrap() .append_value(metadata.skew_y()); - self.metadata_builder.append(true); + metadata_builder.append(true); Ok(()) } /// Set the CRS for the current raster pub fn set_crs(&mut self, crs: Option<&str>) -> Result<(), ArrowError> { + let crs_builder = self.main_builder + .field_builder::(raster_indices::CRS) + .unwrap(); match crs { - Some(crs_data) => self.crs_builder.append_value(crs_data), - None => self.crs_builder.append_null(), + Some(crs_data) => crs_builder.append_value(crs_data), + None => crs_builder.append_null(), } Ok(()) } /// Append a bounding box to the current raster pub fn append_bounding_box(&mut self, bbox: Option<&BoundingBox>) -> Result<(), ArrowError> { + let bbox_builder = self.main_builder + .field_builder::(raster_indices::BBOX) + .unwrap(); + if let Some(bbox) = bbox { - self.bbox_builder + bbox_builder .field_builder::(bounding_box_indices::MIN_X) .unwrap() .append_value(bbox.min_x); - self.bbox_builder + bbox_builder .field_builder::(bounding_box_indices::MIN_Y) .unwrap() .append_value(bbox.min_y); - self.bbox_builder + bbox_builder .field_builder::(bounding_box_indices::MAX_X) .unwrap() .append_value(bbox.max_x); - self.bbox_builder + bbox_builder .field_builder::(bounding_box_indices::MAX_Y) .unwrap() .append_value(bbox.max_y); - self.bbox_builder.append(true); + bbox_builder.append(true); } else { // Append null bounding box - need to fill in null values for all fields - self.bbox_builder + bbox_builder .field_builder::(bounding_box_indices::MIN_X) .unwrap() .append_null(); - self.bbox_builder + bbox_builder .field_builder::(bounding_box_indices::MIN_Y) .unwrap() .append_null(); - self.bbox_builder + bbox_builder .field_builder::(bounding_box_indices::MAX_X) .unwrap() .append_null(); - self.bbox_builder + bbox_builder .field_builder::(bounding_box_indices::MAX_Y) .unwrap() .append_null(); - self.bbox_builder.append(false); + bbox_builder.append(false); } Ok(()) } /// Append a null raster pub fn append_null(&mut self) -> Result<(), ArrowError> { - self.metadata_builder.append(false); - self.crs_builder.append_null(); - self.bbox_builder.append(false); - self.bands_builder.append(false); + // Since metadata fields are non-nullable, provide default values + let metadata_builder = self.main_builder + .field_builder::(raster_indices::METADATA) + .unwrap(); + + metadata_builder + .field_builder::(metadata_indices::WIDTH) + .unwrap() + .append_value(0u64); + + metadata_builder + .field_builder::(metadata_indices::HEIGHT) + .unwrap() + .append_value(0u64); + + metadata_builder + .field_builder::(metadata_indices::UPPERLEFT_X) + .unwrap() + .append_value(0.0f64); + + metadata_builder + .field_builder::(metadata_indices::UPPERLEFT_Y) + .unwrap() + .append_value(0.0f64); + + metadata_builder + .field_builder::(metadata_indices::SCALE_X) + .unwrap() + .append_value(0.0f64); + + metadata_builder + .field_builder::(metadata_indices::SCALE_Y) + .unwrap() + .append_value(0.0f64); + + metadata_builder + .field_builder::(metadata_indices::SKEW_X) + .unwrap() + .append_value(0.0f64); + + metadata_builder + .field_builder::(metadata_indices::SKEW_Y) + .unwrap() + .append_value(0.0f64); + + // Mark the metadata struct as valid since it has valid values + metadata_builder.append(true); + + // Append null CRS (now using StringBuilder instead of StringViewBuilder) + let crs_builder = self.main_builder + .field_builder::(raster_indices::CRS) + .unwrap(); + crs_builder.append_null(); + + // Append null bounding box + self.append_bounding_box(None)?; + + // Append null bands + let bands_builder = self.main_builder + .field_builder::>(raster_indices::BANDS) + .unwrap(); + bands_builder.append(false); + + // Mark this raster as null in the main struct + self.main_builder.append(false); + Ok(()) } /// Finish building and return the constructed StructArray pub fn finish(mut self) -> Result { - let metadata_array = self.metadata_builder.finish(); - let crs_array = self.crs_builder.finish(); - let bbox_array = self.bbox_builder.finish(); - let bands_array = self.bands_builder.finish(); - - let fields = RasterSchema::fields(); - let arrays: Vec = vec![ - Arc::new(metadata_array), - Arc::new(crs_array), - Arc::new(bbox_array), - Arc::new(bands_array), - ]; - - Ok(StructArray::new(fields, arrays, None)) + Ok(self.main_builder.finish()) } } @@ -1223,7 +1303,7 @@ impl ExactSizeIterator for BandIterator<'_> {} /// Implementation of RasterRef for complete raster access pub struct RasterRefImpl<'a> { metadata: MetadataRefImpl<'a>, - crs: &'a StringViewArray, + crs: &'a StringArray, bbox: &'a StructArray, bands: BandsRefImpl<'a>, } @@ -1240,7 +1320,7 @@ impl<'a> RasterRefImpl<'a> { let crs = raster_struct .column(raster_indices::CRS) .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); let bbox = raster_struct From 809923de8611f00ef59813b2ca0e4ce4d25421b2 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Tue, 21 Oct 2025 09:08:52 -0700 Subject: [PATCH 12/18] adding gdal --- Cargo.lock | 126 +- Cargo.toml | 5 + rust/sedona-functions/src/executor.rs | 14 +- rust/sedona-functions/src/rs_value.rs | 32 +- rust/sedona-functions/src/rs_width.rs | 26 +- rust/sedona-gdal/Cargo.lock | 2994 ++++++++++++++++++ rust/sedona-gdal/Cargo.toml | 28 + rust/sedona-gdal/src/dataset.rs | 32 + rust/sedona-gdal/src/lib.rs | 2 + rust/sedona-gdal/src/rs_value.rs | 338 ++ rust/sedona-raster/Cargo.toml | 5 +- rust/sedona-raster/src/datatype_functions.rs | 72 + rust/sedona-raster/src/lib.rs | 1 + 13 files changed, 3631 insertions(+), 44 deletions(-) create mode 100644 rust/sedona-gdal/Cargo.lock create mode 100644 rust/sedona-gdal/Cargo.toml create mode 100644 rust/sedona-gdal/src/dataset.rs create mode 100644 rust/sedona-gdal/src/lib.rs create mode 100644 rust/sedona-gdal/src/rs_value.rs create mode 100644 rust/sedona-raster/src/datatype_functions.rs diff --git a/Cargo.lock b/Cargo.lock index 7eca687c..3b4dfbd4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -950,22 +950,25 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.71.1" +version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools 0.13.0", + "itertools 0.11.0", + "lazy_static", + "lazycell", "log", "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", "syn 2.0.106", + "which", ] [[package]] @@ -983,7 +986,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 2.1.1", "shlex", "syn 2.0.106", ] @@ -1471,7 +1474,7 @@ dependencies = [ "crossterm_winapi", "document-features", "parking_lot", - "rustix", + "rustix 1.1.2", "winapi", ] @@ -2404,7 +2407,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", - "rustix", + "rustix 1.1.2", "windows-sys 0.59.0", ] @@ -2581,6 +2584,34 @@ dependencies = [ "slab", ] +[[package]] +name = "gdal" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82ab834e8be6b54fee3d0141fce5e776ad405add1f9d0da054281926e0d35a9f" +dependencies = [ + "bitflags", + "chrono", + "gdal-sys", + "geo-types", + "libc", + "once_cell", + "semver", + "thiserror 1.0.69", +] + +[[package]] +name = "gdal-sys" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18ad5d608ee6726efcf6e1d91261eb6dec7da3ee7db6bda984cdfb8a7d65ebf9" +dependencies = [ + "bindgen 0.69.5", + "libc", + "pkg-config", + "semver", +] + [[package]] name = "generational-arena" version = "0.2.9" @@ -3321,6 +3352,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "lexical-core" version = "1.0.6" @@ -3489,6 +3532,12 @@ dependencies = [ "cc", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -4209,7 +4258,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "socket2", "thiserror 2.0.17", @@ -4229,7 +4278,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "rustls-pki-types", "slab", @@ -4572,6 +4621,12 @@ version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" @@ -4587,6 +4642,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + [[package]] name = "rustix" version = "1.1.2" @@ -4596,7 +4664,7 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.11.0", "windows-sys 0.61.2", ] @@ -4907,6 +4975,17 @@ dependencies = [ "wkt 0.14.0", ] +[[package]] +name = "sedona-gdal" +version = "0.2.0" +dependencies = [ + "arrow", + "gdal", + "gdal-sys", + "rstest", + "sedona-raster", +] + [[package]] name = "sedona-geo" version = "0.2.0" @@ -5087,6 +5166,16 @@ dependencies = [ "wkb", ] +[[package]] +name = "sedona-raster" +version = "0.2.0" +dependencies = [ + "arrow", + "arrow-schema", + "rstest", + "sedona-schema", +] + [[package]] name = "sedona-s2geography" version = "0.2.0" @@ -5113,6 +5202,7 @@ dependencies = [ name = "sedona-schema" version = "0.2.0" dependencies = [ + "arrow", "arrow-array", "arrow-schema", "datafusion-common", @@ -5584,7 +5674,7 @@ dependencies = [ "fastrand", "getrandom 0.3.3", "once_cell", - "rustix", + "rustix 1.1.2", "windows-sys 0.61.2", ] @@ -6185,6 +6275,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.44", +] + [[package]] name = "winapi" version = "0.3.9" @@ -6504,7 +6606,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix", + "rustix 1.1.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index f314e539..00c9a898 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,9 +27,11 @@ members = [ "rust/sedona-adbc", "rust/sedona-expr", "rust/sedona-functions", + "rust/sedona-gdal", "rust/sedona-geo", "rust/sedona-geometry", "rust/sedona-geoparquet", + "rust/sedona-raster", "rust/sedona-schema", "rust/sedona-spatial-join", "rust/sedona-testing", @@ -90,6 +92,9 @@ mimalloc = { version = "0.1", default-features = false } libmimalloc-sys = { version = "0.1", default-features = false } once_cell = "1.20" +gdal = { version = "0.17", features = ["bindgen"] } +gdal-sys = { version = "0.10", features = ["bindgen"] } + geos = { version = "10.0.0", features = ["geo", "v3_10_0"] } geo-types = "0.7.17" diff --git a/rust/sedona-functions/src/executor.rs b/rust/sedona-functions/src/executor.rs index 04b4a020..8eda7e74 100644 --- a/rust/sedona-functions/src/executor.rs +++ b/rust/sedona-functions/src/executor.rs @@ -23,7 +23,7 @@ use datafusion_common::error::Result; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::ColumnarValue; use sedona_common::sedona_internal_err; -use sedona_schema::datatypes::{SedonaType, raster_iterator, RasterRefImpl}; +use sedona_schema::datatypes::{raster_iterator, RasterRefImpl, SedonaType}; use wkb::reader::Wkb; /// Helper for writing general kernel implementations with geometry @@ -116,7 +116,7 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { ColumnarValue::Array(array) => array, ColumnarValue::Scalar(_) => { return Err(DataFusionError::NotImplemented( - "Scalar raster input not yet supported".to_string() + "Scalar raster input not yet supported".to_string(), )); } }; @@ -125,9 +125,9 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { let raster_struct = raster_array .as_any() .downcast_ref::() - .ok_or_else(|| DataFusionError::Internal( - "Expected StructArray for raster data".to_string() - ))?; + .ok_or_else(|| { + DataFusionError::Internal("Expected StructArray for raster data".to_string()) + })?; // Create raster iterator let iterator = raster_iterator(raster_struct); @@ -139,9 +139,7 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { } else { // Get the raster at this index let raster = iterator.get(i).ok_or_else(|| { - DataFusionError::Internal( - format!("Failed to get raster at index {}", i) - ) + DataFusionError::Internal(format!("Failed to get raster at index {}", i)) })?; func(i, Some(raster))?; } diff --git a/rust/sedona-functions/src/rs_value.rs b/rust/sedona-functions/src/rs_value.rs index 55b9b949..c4f7d86b 100644 --- a/rust/sedona-functions/src/rs_value.rs +++ b/rust/sedona-functions/src/rs_value.rs @@ -29,8 +29,25 @@ use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; /// RS_Value() scalar UDF implementation /// -/// Extract the value at a given point from the raster -/// TODO: Implement support for point geometry input +/// Extracts the pixel value at a specified location from a raster band. +/// +/// This function samples a raster at the given column and row coordinates (colX, colY) +/// within the specified band. The coordinates are 0-based pixel indices where: +/// - colX: column index (0 to raster width - 1) +/// - colY: row index (0 to raster height - 1) +/// - band: band number (1-based index, where 1 is the first band) +/// +/// Returns Float64 to provide a unified return type that can represent values from +/// different raster data types (UInt8, UInt16, Float32, etc.). Returns null if: +/// - The input raster is null +/// +/// Throws an exception if: +/// - The coordinates are outside the raster bounds +/// - The specified band does not exist +/// +/// TODO: should we return null if the pixel value is nodata? +/// +/// Future versions may support point geometry input for coordinate specification. pub fn rs_value_udf() -> SedonaScalarUDF { SedonaScalarUDF::new_stub( "rs_value", @@ -41,12 +58,13 @@ pub fn rs_value_udf() -> SedonaScalarUDF { ArgMatcher::is_numeric(), ArgMatcher::is_numeric(), ], - SedonaType::Arrow(DataType::Boolean), + SedonaType::Arrow(DataType::Float64), ), Volatility::Immutable, Some(rs_value_doc()), ) } + fn rs_value_doc() -> Documentation { Documentation::builder( DOC_SECTION_OTHER, @@ -56,16 +74,12 @@ fn rs_value_doc() -> Documentation { format!("RS_Value (raster: Raster, colX: Integer, colY: Integer, band: Integer)"), ) .with_argument("raster", "Raster: Input raster") - .with_optional_argument("x", "Integer: X coordinate") - .with_optional_argument("y", "Integer: Y coordinate") - .with_optional_argument("point", "Point: Point geometry") + .with_optional_argument("x", "coordinate") + .with_optional_argument("y", "Y coordinate") .with_argument("band_id", "Integer: Band number (1-based index)") .with_sql_example(format!( "SELECT RS_Value(raster, x, y, band_id)", )) - .with_sql_example(format!( - "SELECT RS_Value(raster, point, band_id)", - )) .build() } diff --git a/rust/sedona-functions/src/rs_width.rs b/rust/sedona-functions/src/rs_width.rs index 38d3dec9..62b30ab5 100644 --- a/rust/sedona-functions/src/rs_width.rs +++ b/rust/sedona-functions/src/rs_width.rs @@ -25,8 +25,8 @@ use datafusion_expr::{ }; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_schema::{ - datatypes::{RasterRef, SedonaType}, - matchers::ArgMatcher + datatypes::{RasterRef, SedonaType}, + matchers::ArgMatcher, }; /// RS_Width() scalar UDF implementation @@ -44,15 +44,11 @@ pub fn rs_width_udf() -> SedonaScalarUDF { fn rs_width_doc() -> Documentation { Documentation::builder( DOC_SECTION_OTHER, - format!( - "Return the width component of a raster", - ), + format!("Return the width component of a raster",), format!("RS_Width(raster: Raster)"), ) .with_argument("raster", "Raster: Input raster") - .with_sql_example(format!( - "SELECT RS_Width(raster)", - )) + .with_sql_example(format!("SELECT RS_Width(raster)",)) .build() } @@ -112,18 +108,18 @@ mod tests { fn udf_invoke() { // Create test rasters with different widths let raster_array = create_test_raster_array(); - + // Create the UDF and invoke it let kernel = RS_Width {}; let args = vec![ColumnarValue::Array(raster_array)]; let arg_types = vec![RASTER]; - + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); - + // Check the result if let ColumnarValue::Array(result_array) = result { let width_array = result_array.as_any().downcast_ref::().unwrap(); - + assert_eq!(width_array.len(), 3); assert_eq!(width_array.value(0), 10); // First raster width assert!(width_array.is_null(1)); // Second raster is null @@ -134,9 +130,11 @@ mod tests { } /// Create a test raster array with different widths for testing + // TODO: Parameterize the creation of rasters and move the + // function to sedona-testing fn create_test_raster_array() -> ArrayRef { - let mut builder = RasterBuilder::new(10); - + let mut builder = RasterBuilder::new(3); + // First raster: 10x12 let metadata1 = RasterMetadata { width: 10, diff --git a/rust/sedona-gdal/Cargo.lock b/rust/sedona-gdal/Cargo.lock new file mode 100644 index 00000000..8271b820 --- /dev/null +++ b/rust/sedona-gdal/Cargo.lock @@ -0,0 +1,2994 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "arrow" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num", +] + +[[package]] +name = "arrow-array" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.5", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-csv" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b" +dependencies = [ + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", + "zstd", +] + +[[package]] +name = "arrow-json" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", +] + +[[package]] +name = "arrow-ord" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", +] + +[[package]] +name = "arrow-row" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" +dependencies = [ + "bitflags", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-select" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bigdecimal" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "bindgen" +version = "0.71.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.107", +] + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "cc" +version = "1.2.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +dependencies = [ + "iana-time-zone", + "num-traits", + "windows-link", +] + +[[package]] +name = "chrono-tz" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" +dependencies = [ + "chrono", + "phf", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "comfy-table" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" +dependencies = [ + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "darling" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69dfeda1633bf8ec75b068d9f6c27cdc392ffcf5ff83128d5dbab65b73c1fd02" +dependencies = [ + "arrow", + "arrow-ipc", + "arrow-schema", + "async-trait", + "bytes", + "chrono", + "datafusion-catalog", + "datafusion-catalog-listing", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-table", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "rand", + "regex", + "sqlparser", + "tempfile", + "tokio", + "url", + "uuid", +] + +[[package]] +name = "datafusion-catalog" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2848fd1e85e2953116dab9cc2eb109214b0888d7bbd2230e30c07f1794f642c0" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "051a1634628c2d1296d4e326823e7536640d87a118966cdaff069b68821ad53b" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "log", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765e4ad4ef7a4500e389a3f1e738791b71ff4c29fd00912c2f541d62b25da096" +dependencies = [ + "ahash", + "arrow", + "arrow-ipc", + "base64", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap", + "libc", + "log", + "object_store", + "paste", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common-runtime" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40a2ae8393051ce25d232a6065c4558ab5a535c9637d5373bacfd464ac88ea12" +dependencies = [ + "futures", + "log", + "tokio", +] + +[[package]] +name = "datafusion-datasource" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90cd841a77f378bc1a5c4a1c37345e1885a9203b008203f9f4b3a769729bf330" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "rand", + "tokio", + "url", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77f4a2c64939c6f0dd15b246723a699fa30d59d0133eb36a86e8ff8c6e2a8dc6" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-json" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11387aaf931b2993ad9273c63ddca33f05aef7d02df9b70fb757429b4b71cdae" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "serde_json", + "tokio", +] + +[[package]] +name = "datafusion-doc" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ff336d1d755399753a9e4fbab001180e346fc8bfa063a97f1214b82274c00f8" + +[[package]] +name = "datafusion-execution" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "042ea192757d1b2d7dcf71643e7ff33f6542c7704f00228d8b85b40003fd8e0f" +dependencies = [ + "arrow", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "025222545d6d7fab71e2ae2b356526a1df67a2872222cbae7535e557a42abd2e" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap", + "paste", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d5c267104849d5fa6d81cf5ba88f35ecd58727729c5eb84066c25227b644ae2" +dependencies = [ + "arrow", + "datafusion-common", + "indexmap", + "itertools 0.14.0", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c620d105aa208fcee45c588765483314eb415f5571cfd6c1bae3a59c5b4d15bb" +dependencies = [ + "arrow", + "arrow-buffer", + "base64", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", + "hex", + "itertools 0.14.0", + "log", + "rand", + "regex", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35f61d5198a35ed368bf3aacac74f0d0fa33de7a7cb0c57e9f68ab1346d2f952" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13efdb17362be39b5024f6da0d977ffe49c0212929ec36eec550e07e2bc7812f" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-functions-table" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecf156589cc21ef59fe39c7a9a841b4a97394549643bbfa88cc44e8588cf8fe5" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edcb25e3e369f1366ec9a261456e45b5aad6ea1c0c8b4ce546587207c501ed9e" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8996a8e11174d0bd7c62dc2f316485affc6ae5ffd5b8a68b508137ace2310294" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95ee8d1be549eb7316f437035f2cec7ec42aba8374096d807c4de006a3b5d78a" +dependencies = [ + "datafusion-expr", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "datafusion-optimizer" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9fa98671458254928af854e5f6c915e66b860a8bde505baea0ff2892deab74d" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "indexmap", + "itertools 0.14.0", + "log", + "regex", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3515d51531cca5f7b5a6f3ea22742b71bb36fc378b465df124ff9a2fa349b002" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.14.0", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24485475d9c618a1d33b2a3dad003d946dc7a7bbf0354d125301abc0a5a79e3e" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9da411a0a64702f941a12af2b979434d14ec5d36c6f49296966b2c7639cbb3a" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-pruning", + "itertools 0.14.0", + "log", +] + +[[package]] +name = "datafusion-physical-plan" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6d168282bb7b54880bb3159f89b51c047db4287f5014d60c3ef4c6e1468212b" +dependencies = [ + "ahash", + "arrow", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.14.0", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "datafusion-pruning" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391a457b9d23744c53eeb89edd1027424cba100581488d89800ed841182df905" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-datasource", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", +] + +[[package]] +name = "datafusion-session" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053201c2bb729c7938f85879034df2b5a52cfaba16f1b3b66ab8505c81b2aad3" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9082779be8ce4882189b229c0cff4393bd0808282a7194130c9f32159f185e25" +dependencies = [ + "arrow", + "bigdecimal", + "datafusion-common", + "datafusion-expr", + "indexmap", + "log", + "regex", + "sqlparser", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "flatbuffers" +version = "25.9.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" +dependencies = [ + "bitflags", + "rustc_version", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "gdal" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e721cea67b420fd4b5cb15ba8145f2f1d3a6931a27fdbfadb46cff02015e1cde" +dependencies = [ + "bitflags", + "chrono", + "gdal-sys", + "geo-types", + "semver", + "thiserror 2.0.17", +] + +[[package]] +name = "gdal-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "febef67dc08a956a9ecb04de2b40dbd15ad56be49421aad9ae0cdcbe9a24166c" +dependencies = [ + "bindgen", + "pkg-config", + "semver", +] + +[[package]] +name = "geo-traits" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7c353d12a704ccfab1ba8bfb1a7fe6cb18b665bf89d37f4f7890edcd260206" +dependencies = [ + "geo-types", +] + +[[package]] +name = "geo-types" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a4dcd69d35b2c87a7c83bce9af69fd65c9d68d3833a0ded568983928f3fc99" +dependencies = [ + "approx", + "num-traits", + "serde", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "http" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "humantime" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" + +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" + +[[package]] +name = "icu_properties" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "potential_utf", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" + +[[package]] +name = "icu_provider" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +dependencies = [ + "displaydoc", + "icu_locale_core", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +dependencies = [ + "equivalent", + "hashbrown 0.16.0", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lexical-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" +dependencies = [ + "lexical-parse-integer", + "lexical-util", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "lexical-util" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" + +[[package]] +name = "lexical-write-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" +dependencies = [ + "lexical-util", + "lexical-write-integer", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "litemap" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" + +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "object_store" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "http", + "humantime", + "itertools 0.14.0", + "parking_lot", + "percent-encoding", + "thiserror 2.0.17", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "petgraph" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.5", + "indexmap", + "serde", +] + +[[package]] +name = "phf" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "potential_utf" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.107", +] + +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + +[[package]] +name = "rstest" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03e905296805ab93e13c1ec3a03f4b6c4f35e9498a3d5fa96dc626d22c03cd89" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", + "rustc_version", +] + +[[package]] +name = "rstest_macros" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef0053bbffce09062bee4bcc499b0fbe7a57b879f1efe088d6d8d4c7adcdef9b" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn 2.0.107", + "unicode-ident", +] + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sedona-common" +version = "0.2.0" +dependencies = [ + "datafusion", + "datafusion-common", + "regex", +] + +[[package]] +name = "sedona-expr" +version = "0.2.0" +dependencies = [ + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "geo-traits", + "sedona-common", + "sedona-geometry", + "sedona-schema", + "serde", + "serde_json", +] + +[[package]] +name = "sedona-functions" +version = "0.2.0" +dependencies = [ + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "geo-traits", + "sedona-common", + "sedona-expr", + "sedona-geometry", + "sedona-schema", + "serde_json", + "wkb", + "wkt", +] + +[[package]] +name = "sedona-gdal" +version = "0.2.0" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "gdal", + "rstest", + "sedona-expr", + "sedona-functions", + "sedona-raster", + "sedona-schema", +] + +[[package]] +name = "sedona-geometry" +version = "0.2.0" +dependencies = [ + "geo-traits", + "lru", + "serde", + "serde_with", + "thiserror 2.0.17", + "wkb", +] + +[[package]] +name = "sedona-raster" +version = "0.2.0" +dependencies = [ + "arrow", + "arrow-schema", + "sedona-schema", +] + +[[package]] +name = "sedona-schema" +version = "0.2.0" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "sedona-common", + "serde_json", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + +[[package]] +name = "serde_with" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff" +dependencies = [ + "serde", + "serde_with_macros", +] + +[[package]] +name = "serde_with_macros" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "sqlparser" +version = "0.55.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl 2.0.17", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tokio" +version = "1.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +dependencies = [ + "bytes", + "pin-project-lite", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "toml_datetime" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +dependencies = [ + "winnow", +] + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "tracing-core" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +dependencies = [ + "once_cell", +] + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + +[[package]] +name = "unicode-ident" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "url" +version = "2.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "uuid" +version = "1.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +dependencies = [ + "getrandom 0.3.4", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn 2.0.107", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "winnow" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "wkb" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "908e92c755a5f5ff8781c1c7ebcedb60ce5443879d20f4a0b6a1ee8fb3e6dfb6" +dependencies = [ + "byteorder", + "geo-traits", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "wkt" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb2b923ccc882312e559ffaa832a055ba9d1ac0cc8e86b3e25453247e4b81d7" +dependencies = [ + "geo-traits", + "geo-types", + "log", + "num-traits", + "thiserror 1.0.69", +] + +[[package]] +name = "writeable" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" + +[[package]] +name = "yoke" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/rust/sedona-gdal/Cargo.toml b/rust/sedona-gdal/Cargo.toml new file mode 100644 index 00000000..52730d55 --- /dev/null +++ b/rust/sedona-gdal/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "sedona-gdal" +version.workspace = true +homepage.workspace = true +repository.workspace = true +description.workspace = true +readme.workspace = true +edition.workspace = true +rust-version.workspace = true + +[lints.clippy] +result_large_err = "allow" + +[dev-dependencies] +rstest = { workspace = true } + +[dependencies] +arrow = { workspace = true } +arrow-array = { workspace = true } +arrow-schema = { workspace = true } +datafusion-common = { workspace = true } +datafusion-expr = { workspace = true } +gdal = {workspace = true} +gdal-sys = {workspace = true} +sedona-expr = { path = "../sedona-expr" } +sedona-functions = { path = "../sedona-functions" } +sedona-raster = { path = "../sedona-raster" } +sedona-schema = { path = "../sedona-schema" } diff --git a/rust/sedona-gdal/src/dataset.rs b/rust/sedona-gdal/src/dataset.rs new file mode 100644 index 00000000..916bdf2d --- /dev/null +++ b/rust/sedona-gdal/src/dataset.rs @@ -0,0 +1,32 @@ +use arrow_schema::ArrowError; +use gdal::{Dataset}; +use sedona_schema::datatypes::{BandMetadataRef, StorageType}; + +/// Get the out-db dataset reference from a raster band. +pub fn get_outdb_dataset(metadata: &dyn BandMetadataRef) -> Result { + if metadata.storage_type() != StorageType::OutDbRef { + return Err(ArrowError::ParseError( + "Raster band is not stored out-of-db".to_string(), + )); + } + + let url = match metadata.outdb_url() { + Some(url) => url, + None => { + return Err(ArrowError::ParseError( + "Raster band does not have an out-db URL".to_string(), + )) + } + }; + + + // These datasets may appear in multiple rasters and called repeatedly. + // Adding a caching layer here would improve performance. + open_outdb_band(&url) +} + +fn open_outdb_band(url: &str) -> Result { + let full_url = format!("/vsicurl/{}", url); + let ds = Dataset::open(full_url).map_err(|e| ArrowError::ParseError(e.to_string()))?; + Ok(ds) +} \ No newline at end of file diff --git a/rust/sedona-gdal/src/lib.rs b/rust/sedona-gdal/src/lib.rs new file mode 100644 index 00000000..912e3e24 --- /dev/null +++ b/rust/sedona-gdal/src/lib.rs @@ -0,0 +1,2 @@ +pub mod rs_value; +mod dataset; \ No newline at end of file diff --git a/rust/sedona-gdal/src/rs_value.rs b/rust/sedona-gdal/src/rs_value.rs new file mode 100644 index 00000000..ecd2b314 --- /dev/null +++ b/rust/sedona-gdal/src/rs_value.rs @@ -0,0 +1,338 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::sync::Arc; + +use arrow_array::builder::Float64Builder; +use arrow_schema::{ArrowError, DataType}; +use datafusion_common::{error::Result, scalar::ScalarValue}; +use datafusion_expr::ColumnarValue; +use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; +use sedona_functions::executor::RasterExecutor; +use sedona_schema::datatypes::{BandMetadataRef, BandRef, RasterRef, SedonaType}; +use sedona_raster::datatype_functions::{bytes_per_pixel, read_pixel_value}; + +/// RS_Value() implementation using [DistanceExt] +pub fn rs_value_impl() -> ScalarKernelRef { + Arc::new(RSValue {}) +} + +#[derive(Debug)] +struct RSValue {} + +impl SedonaScalarKernel for RSValue { + fn return_type(&self, _arg_types: &[SedonaType]) -> Result, datafusion_common::DataFusionError> { + Ok(Some(SedonaType::Arrow(DataType::Float64))) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + // Extract coordinate and band arguments as scalars + let x = match &args[1] { + ColumnarValue::Scalar(scalar) => { + let val = scalar.cast_to(&DataType::Int64).map_err(|e| datafusion_common::DataFusionError::Execution(format!("Failed to cast x coordinate: {}", e)))?; + match val { + ScalarValue::Int64(Some(v)) => v as usize, + _ => return Err(datafusion_common::DataFusionError::NotImplemented("Invalid x coordinate".to_string())), + } + }, + _ => return Err(datafusion_common::DataFusionError::NotImplemented("Array x coordinates not supported".to_string())), + }; + let y = match &args[2] { + ColumnarValue::Scalar(scalar) => { + let val = scalar.cast_to(&DataType::Int64).map_err(|e| datafusion_common::DataFusionError::Execution(format!("Failed to cast y coordinate: {}", e)))?; + match val { + ScalarValue::Int64(Some(v)) => v as usize, + _ => return Err(datafusion_common::DataFusionError::NotImplemented("Invalid y coordinate".to_string())), + } + }, + _ => return Err(datafusion_common::DataFusionError::NotImplemented("Array y coordinates not supported".to_string())), + }; + let band_index = match &args[3] { + ColumnarValue::Scalar(scalar) => { + let val = scalar.cast_to(&DataType::Int64).map_err(|e| datafusion_common::DataFusionError::Execution(format!("Failed to cast band index: {}", e)))?; + match val { + ScalarValue::Int64(Some(v)) => (v as usize).saturating_sub(1), + _ => return Err(datafusion_common::DataFusionError::NotImplemented("Invalid band index".to_string())), + } + }, + _ => return Err(datafusion_common::DataFusionError::NotImplemented("Array band numbers not supported".to_string())), + }; + + let mut builder = Float64Builder::with_capacity(executor.num_iterations()); + + executor.execute_raster_void(|_i, raster_opt| { + match raster_opt { + None => builder.append_null(), + Some(raster) => { + match invoke_scalar(&raster, x, y, band_index) { + Ok(value) => builder.append_value(value), + Err(_) => builder.append_null(), // Handle errors by appending null + } + } + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +fn invoke_scalar(raster: &dyn RasterRef, x: usize, y: usize, band_index: usize) -> Result { + // Extract metadata from the raster + let metadata = raster.metadata(); + let width = metadata.width() as usize; + let height = metadata.height() as usize; + + // Check that x,y are within width/height + if x >= width || y >= height { + return Err(ArrowError::InvalidArgumentError( + "Coordinates are outside raster bounds".to_string(), + )); + } + + // Get the band + let bands = raster.bands(); + if band_index >= bands.len() { + return Err(ArrowError::InvalidArgumentError( + "Specified band does not exist".to_string(), + )); + } + let band = bands.band(band_index).ok_or_else(|| ArrowError::InvalidArgumentError( + "Failed to get band at index".to_string(), + ))?; + let band_metadata = band.metadata(); + + match band_metadata.storage_type() { + sedona_schema::datatypes::StorageType::InDb => get_indb_pixel(band_metadata, &*band, x, y, width, height), + sedona_schema::datatypes::StorageType::OutDbRef => get_outdb_pixel(band_metadata, x, y, width, height), + } +} + +fn get_indb_pixel(metadata: &dyn BandMetadataRef, band: &dyn BandRef, x: usize, y: usize, width: usize, _height: usize) -> Result { + if let Some(_nodata_bytes) = metadata.nodata_value() { + // TODO: Compare pixel value against nodata value + } + + let data_type = metadata.data_type(); + let bytes_per_px = bytes_per_pixel(data_type.clone())?; + let offset = (y * width + x) * bytes_per_px; + + let band_data = band.data(); + if offset + bytes_per_px > band_data.len() { + return Err(ArrowError::InvalidArgumentError( + "Pixel offset exceeds band data length".to_string(), + )); + } + + let pixel_bytes = &band_data[offset..offset + bytes_per_px]; + read_pixel_value(pixel_bytes, data_type) +} + +fn get_outdb_pixel(metadata: &dyn BandMetadataRef, x: usize, y: usize, _width: usize, _height: usize) -> Result { + use crate::dataset::get_outdb_dataset; + + let dataset = get_outdb_dataset(metadata)?; + + let band_index = match metadata.outdb_band_id() { + Some(index) => index, + None => { + return Err(ArrowError::ParseError( + "Raster band does not have a band index".to_string(), + )) + } + }; + + let band = dataset.rasterband(band_index as usize).map_err(|_| { + ArrowError::ParseError("Failed to get raster band from dataset".to_string()) + })?; + + // Read a single pixel at the specified coordinates + let pixel_data = band.read_as::((x as isize, y as isize), (1, 1), (1, 1), None) + .map_err(|_| ArrowError::ParseError("Failed to read pixel data from GDAL".to_string()))?; + + Ok(pixel_data.data()[0]) +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{Array, ArrayRef, Float64Array}; + use sedona_schema::datatypes::{BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, RASTER}; + + #[test] + fn udf_invoke() { + // Test with different band data types + let band_types = vec![ + BandDataType::UInt8, + BandDataType::Int16, + BandDataType::UInt16, + BandDataType::Int32, + BandDataType::UInt32, + BandDataType::Float32, + BandDataType::Float64, + ]; + + for band_data_type in band_types { + println!("Testing with band data type: {:?}", band_data_type); + + // Create test rasters with the current band data type + let raster_array = create_indb_test_raster_array(band_data_type.clone()); + + // Create the UDF and invoke it + let kernel = RSValue {}; + /// Get pixel at (2,3) in band 1 + let args = vec![ + ColumnarValue::Array(raster_array), + ColumnarValue::Scalar(ScalarValue::from(2i64)), + ColumnarValue::Scalar(ScalarValue::from(3i64)), + ColumnarValue::Scalar(ScalarValue::from(1i64)) + ]; + let arg_types = vec![ + RASTER, + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + ]; + + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); + + // Check the result + if let ColumnarValue::Array(result_array) = result { + let pixel_array = result_array.as_any().downcast_ref::().unwrap(); + + assert_eq!(pixel_array.len(), 3); + + // Expected pixel value at (2,3) for 10x12 raster: row 3 * width 10 + col 2 = 32 + let expected_first = 32.0; + assert_eq!(pixel_array.value(0), expected_first, "Failed for band type {:?}", band_data_type); + assert!(pixel_array.is_null(1), "Second raster should be null for band type {:?}", band_data_type); + + // Expected pixel value at (2,3) for 30x15 raster: row 3 * width 30 + col 2 = 92 + let expected_third = 92.0; + assert_eq!(pixel_array.value(2), expected_third, "Failed for band type {:?}", band_data_type); + } else { + panic!("Expected array result for band type {:?}", band_data_type); + } + } + } + + /// Create a test raster array with different widths for testing + // TODO: Parameterize the creation of rasters and move the + // function to sedona-testing + fn create_indb_test_raster_array(band_data_type: BandDataType) -> ArrayRef { + let mut builder = RasterBuilder::new(3); + + // First raster: 10x12 + let metadata1 = RasterMetadata { + width: 10, + height: 12, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: band_data_type.clone(), + outdb_url: None, + outdb_band_id: None, + }; + + builder.start_raster(&metadata1, None, None).unwrap(); + let test_data1 = gen_sequential(10 * 12, band_data_type.clone()); + builder.band_data_writer().append_value(&test_data1); + builder.finish_band(band_metadata.clone()).unwrap(); + builder.finish_raster().unwrap(); + + // Second raster: null + builder.append_null().unwrap(); + + // Third raster: 30x15 + let metadata3 = RasterMetadata { + width: 30, + height: 5, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata3, None, None).unwrap(); + let test_data3 = gen_sequential(30 * 15, band_data_type.clone()); + builder.band_data_writer().append_value(&test_data3); + builder.finish_band(band_metadata).unwrap(); + builder.finish_raster().unwrap(); + + Arc::new(builder.finish().unwrap()) + } + + /// Generates sequential pixel values of BandDataType for testing + /// TODO: Add no-data values for testing + fn gen_sequential(num_pixels: usize, band_data_type: BandDataType) -> Vec { + let bytes_per_px = bytes_per_pixel(band_data_type.clone()).unwrap(); + let total_bytes = num_pixels * bytes_per_px; + let mut data = Vec::with_capacity(total_bytes); + + for i in 0..num_pixels { + match band_data_type { + BandDataType::UInt8 => { + data.push(i as u8); + } + BandDataType::Int16 => { + let bytes = (i as i16).to_le_bytes(); + data.extend_from_slice(&bytes); + } + BandDataType::UInt16 => { + let bytes = (i as u16).to_le_bytes(); + data.extend_from_slice(&bytes); + } + BandDataType::Int32 => { + let bytes = (i as i32).to_le_bytes(); + data.extend_from_slice(&bytes); + } + BandDataType::UInt32 => { + let bytes = (i as u32).to_le_bytes(); + data.extend_from_slice(&bytes); + } + BandDataType::Float32 => { + let bytes = (i as f32).to_le_bytes(); + data.extend_from_slice(&bytes); + } + BandDataType::Float64 => { + let bytes = (i as f64).to_le_bytes(); + data.extend_from_slice(&bytes); + } + } + } + + data + } + +} diff --git a/rust/sedona-raster/Cargo.toml b/rust/sedona-raster/Cargo.toml index 05ff60c6..965ced8f 100644 --- a/rust/sedona-raster/Cargo.toml +++ b/rust/sedona-raster/Cargo.toml @@ -14,5 +14,8 @@ result_large_err = "allow" [dev-dependencies] rstest = { workspace = true } + [dependencies] -arrow = { workspace = true } \ No newline at end of file +arrow = { workspace = true } +arrow-schema = { workspace = true } +sedona-schema = { path = "../sedona-schema" } \ No newline at end of file diff --git a/rust/sedona-raster/src/datatype_functions.rs b/rust/sedona-raster/src/datatype_functions.rs new file mode 100644 index 00000000..7fbd25be --- /dev/null +++ b/rust/sedona-raster/src/datatype_functions.rs @@ -0,0 +1,72 @@ + +use arrow_schema::ArrowError; +use sedona_schema::datatypes::BandDataType; + +pub fn bytes_per_pixel(data_type: BandDataType) -> Result { + match data_type { + BandDataType::UInt8 => Ok(1), + BandDataType::Int16 => Ok(2), + BandDataType::UInt16 => Ok(2), + BandDataType::Int32 => Ok(4), + BandDataType::UInt32 => Ok(4), + BandDataType::Float32 => Ok(4), + BandDataType::Float64 => Ok(8), + } +} + +/// Extract a pixel value from raw bytes and convert to f64 +pub fn read_pixel_value(bytes: &[u8], data_type: BandDataType) -> Result { + let expected_bytes = bytes_per_pixel(data_type.clone())?; + if bytes.len() != expected_bytes { + return Err(ArrowError::InvalidArgumentError("Invalid byte length for specified data type".to_string())); + } + + match data_type { + BandDataType::UInt8 => { + Ok(bytes[0] as f64) + } + BandDataType::Int16 => { + let value = i16::from_le_bytes([bytes[0], bytes[1]]); + Ok(value as f64) + } + BandDataType::UInt16 => { + let value = u16::from_le_bytes([bytes[0], bytes[1]]); + Ok(value as f64) + } + BandDataType::Int32 => { + let value = i32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + Ok(value as f64) + } + BandDataType::UInt32 => { + let value = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + Ok(value as f64) + } + BandDataType::Float32 => { + let value = f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + Ok(value as f64) + } + BandDataType::Float64 => { + let value = f64::from_le_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], + bytes[4], bytes[5], bytes[6], bytes[7] + ]); + Ok(value) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn basic_bytes_per_pixel_tests() { + assert_eq!(bytes_per_pixel(BandDataType::UInt8).unwrap(), 1); + assert_eq!(bytes_per_pixel(BandDataType::Int16).unwrap(), 2); + assert_eq!(bytes_per_pixel(BandDataType::UInt16).unwrap(), 2); + assert_eq!(bytes_per_pixel(BandDataType::Int32).unwrap(), 4); + assert_eq!(bytes_per_pixel(BandDataType::UInt32).unwrap(), 4); + assert_eq!(bytes_per_pixel(BandDataType::Float32).unwrap(), 4); + assert_eq!(bytes_per_pixel(BandDataType::Float64).unwrap(), 8); + } +} \ No newline at end of file diff --git a/rust/sedona-raster/src/lib.rs b/rust/sedona-raster/src/lib.rs index 8b137891..08b2aa5c 100644 --- a/rust/sedona-raster/src/lib.rs +++ b/rust/sedona-raster/src/lib.rs @@ -1 +1,2 @@ +pub mod datatype_functions; \ No newline at end of file From eabeb52a440a843519f1e1b6c980a72568d3086a Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Tue, 21 Oct 2025 11:51:19 -0700 Subject: [PATCH 13/18] add a temp test for outdb --- rust/sedona-functions/src/executor.rs | 2 +- rust/sedona-functions/src/rs_width.rs | 6 +- rust/sedona-gdal/src/dataset.rs | 5 +- rust/sedona-gdal/src/lib.rs | 2 +- rust/sedona-gdal/src/rs_value.rs | 246 +++++++++++++++---- rust/sedona-raster/src/datatype_functions.rs | 16 +- rust/sedona-raster/src/lib.rs | 3 +- rust/sedona-schema/src/datatypes.rs | 71 +++--- 8 files changed, 253 insertions(+), 98 deletions(-) diff --git a/rust/sedona-functions/src/executor.rs b/rust/sedona-functions/src/executor.rs index 8eda7e74..200364cd 100644 --- a/rust/sedona-functions/src/executor.rs +++ b/rust/sedona-functions/src/executor.rs @@ -23,7 +23,7 @@ use datafusion_common::error::Result; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::ColumnarValue; use sedona_common::sedona_internal_err; -use sedona_schema::datatypes::{raster_iterator, RasterRefImpl, SedonaType}; +use sedona_schema::datatypes::{raster_iterator, SedonaType}; use wkb::reader::Wkb; /// Helper for writing general kernel implementations with geometry diff --git a/rust/sedona-functions/src/rs_width.rs b/rust/sedona-functions/src/rs_width.rs index 62b30ab5..482faada 100644 --- a/rust/sedona-functions/src/rs_width.rs +++ b/rust/sedona-functions/src/rs_width.rs @@ -35,7 +35,7 @@ use sedona_schema::{ pub fn rs_width_udf() -> SedonaScalarUDF { SedonaScalarUDF::new( "rs_width", - vec![Arc::new(RS_Width {})], + vec![Arc::new(RsWidth {})], Volatility::Immutable, Some(rs_width_doc()), ) @@ -53,9 +53,9 @@ fn rs_width_doc() -> Documentation { } #[derive(Debug)] -struct RS_Width {} +struct RsWidth {} -impl SedonaScalarKernel for RS_Width { +impl SedonaScalarKernel for RsWidth { fn return_type(&self, args: &[SedonaType]) -> Result> { let matcher = ArgMatcher::new( vec![ArgMatcher::is_raster()], diff --git a/rust/sedona-gdal/src/dataset.rs b/rust/sedona-gdal/src/dataset.rs index 916bdf2d..f60a7615 100644 --- a/rust/sedona-gdal/src/dataset.rs +++ b/rust/sedona-gdal/src/dataset.rs @@ -1,5 +1,5 @@ use arrow_schema::ArrowError; -use gdal::{Dataset}; +use gdal::Dataset; use sedona_schema::datatypes::{BandMetadataRef, StorageType}; /// Get the out-db dataset reference from a raster band. @@ -19,7 +19,6 @@ pub fn get_outdb_dataset(metadata: &dyn BandMetadataRef) -> Result Result { let full_url = format!("/vsicurl/{}", url); let ds = Dataset::open(full_url).map_err(|e| ArrowError::ParseError(e.to_string()))?; Ok(ds) -} \ No newline at end of file +} diff --git a/rust/sedona-gdal/src/lib.rs b/rust/sedona-gdal/src/lib.rs index 912e3e24..bb827615 100644 --- a/rust/sedona-gdal/src/lib.rs +++ b/rust/sedona-gdal/src/lib.rs @@ -1,2 +1,2 @@ +mod dataset; pub mod rs_value; -mod dataset; \ No newline at end of file diff --git a/rust/sedona-gdal/src/rs_value.rs b/rust/sedona-gdal/src/rs_value.rs index ecd2b314..03f6c95c 100644 --- a/rust/sedona-gdal/src/rs_value.rs +++ b/rust/sedona-gdal/src/rs_value.rs @@ -22,8 +22,8 @@ use datafusion_common::{error::Result, scalar::ScalarValue}; use datafusion_expr::ColumnarValue; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::RasterExecutor; -use sedona_schema::datatypes::{BandMetadataRef, BandRef, RasterRef, SedonaType}; use sedona_raster::datatype_functions::{bytes_per_pixel, read_pixel_value}; +use sedona_schema::datatypes::{BandMetadataRef, BandRef, RasterRef, SedonaType}; /// RS_Value() implementation using [DistanceExt] pub fn rs_value_impl() -> ScalarKernelRef { @@ -34,7 +34,10 @@ pub fn rs_value_impl() -> ScalarKernelRef { struct RSValue {} impl SedonaScalarKernel for RSValue { - fn return_type(&self, _arg_types: &[SedonaType]) -> Result, datafusion_common::DataFusionError> { + fn return_type( + &self, + _arg_types: &[SedonaType], + ) -> Result, datafusion_common::DataFusionError> { Ok(Some(SedonaType::Arrow(DataType::Float64))) } @@ -48,33 +51,72 @@ impl SedonaScalarKernel for RSValue { // Extract coordinate and band arguments as scalars let x = match &args[1] { ColumnarValue::Scalar(scalar) => { - let val = scalar.cast_to(&DataType::Int64).map_err(|e| datafusion_common::DataFusionError::Execution(format!("Failed to cast x coordinate: {}", e)))?; + let val = scalar.cast_to(&DataType::Int64).map_err(|e| { + datafusion_common::DataFusionError::Execution(format!( + "Failed to cast x coordinate: {}", + e + )) + })?; match val { ScalarValue::Int64(Some(v)) => v as usize, - _ => return Err(datafusion_common::DataFusionError::NotImplemented("Invalid x coordinate".to_string())), + _ => { + return Err(datafusion_common::DataFusionError::NotImplemented( + "Invalid x coordinate".to_string(), + )) + } } - }, - _ => return Err(datafusion_common::DataFusionError::NotImplemented("Array x coordinates not supported".to_string())), + } + _ => { + return Err(datafusion_common::DataFusionError::NotImplemented( + "Array x coordinates not supported".to_string(), + )) + } }; let y = match &args[2] { ColumnarValue::Scalar(scalar) => { - let val = scalar.cast_to(&DataType::Int64).map_err(|e| datafusion_common::DataFusionError::Execution(format!("Failed to cast y coordinate: {}", e)))?; + let val = scalar.cast_to(&DataType::Int64).map_err(|e| { + datafusion_common::DataFusionError::Execution(format!( + "Failed to cast y coordinate: {}", + e + )) + })?; match val { ScalarValue::Int64(Some(v)) => v as usize, - _ => return Err(datafusion_common::DataFusionError::NotImplemented("Invalid y coordinate".to_string())), + _ => { + return Err(datafusion_common::DataFusionError::NotImplemented( + "Invalid y coordinate".to_string(), + )) + } } - }, - _ => return Err(datafusion_common::DataFusionError::NotImplemented("Array y coordinates not supported".to_string())), + } + _ => { + return Err(datafusion_common::DataFusionError::NotImplemented( + "Array y coordinates not supported".to_string(), + )) + } }; let band_index = match &args[3] { ColumnarValue::Scalar(scalar) => { - let val = scalar.cast_to(&DataType::Int64).map_err(|e| datafusion_common::DataFusionError::Execution(format!("Failed to cast band index: {}", e)))?; + let val = scalar.cast_to(&DataType::Int64).map_err(|e| { + datafusion_common::DataFusionError::Execution(format!( + "Failed to cast band index: {}", + e + )) + })?; match val { ScalarValue::Int64(Some(v)) => (v as usize).saturating_sub(1), - _ => return Err(datafusion_common::DataFusionError::NotImplemented("Invalid band index".to_string())), + _ => { + return Err(datafusion_common::DataFusionError::NotImplemented( + "Invalid band index".to_string(), + )) + } } - }, - _ => return Err(datafusion_common::DataFusionError::NotImplemented("Array band numbers not supported".to_string())), + } + _ => { + return Err(datafusion_common::DataFusionError::NotImplemented( + "Array band numbers not supported".to_string(), + )) + } }; let mut builder = Float64Builder::with_capacity(executor.num_iterations()); @@ -96,19 +138,24 @@ impl SedonaScalarKernel for RSValue { } } -fn invoke_scalar(raster: &dyn RasterRef, x: usize, y: usize, band_index: usize) -> Result { +fn invoke_scalar( + raster: &dyn RasterRef, + x: usize, + y: usize, + band_index: usize, +) -> Result { // Extract metadata from the raster let metadata = raster.metadata(); let width = metadata.width() as usize; let height = metadata.height() as usize; - + // Check that x,y are within width/height if x >= width || y >= height { return Err(ArrowError::InvalidArgumentError( "Coordinates are outside raster bounds".to_string(), )); } - + // Get the band let bands = raster.bands(); if band_index >= bands.len() { @@ -116,18 +163,29 @@ fn invoke_scalar(raster: &dyn RasterRef, x: usize, y: usize, band_index: usize) "Specified band does not exist".to_string(), )); } - let band = bands.band(band_index).ok_or_else(|| ArrowError::InvalidArgumentError( - "Failed to get band at index".to_string(), - ))?; + let band = bands.band(band_index).ok_or_else(|| { + ArrowError::InvalidArgumentError("Failed to get band at index".to_string()) + })?; let band_metadata = band.metadata(); match band_metadata.storage_type() { - sedona_schema::datatypes::StorageType::InDb => get_indb_pixel(band_metadata, &*band, x, y, width, height), - sedona_schema::datatypes::StorageType::OutDbRef => get_outdb_pixel(band_metadata, x, y, width, height), + sedona_schema::datatypes::StorageType::InDb => { + get_indb_pixel(band_metadata, &*band, x, y, width, height) + } + sedona_schema::datatypes::StorageType::OutDbRef => { + get_outdb_pixel(band_metadata, x, y, width, height) + } } } -fn get_indb_pixel(metadata: &dyn BandMetadataRef, band: &dyn BandRef, x: usize, y: usize, width: usize, _height: usize) -> Result { +fn get_indb_pixel( + metadata: &dyn BandMetadataRef, + band: &dyn BandRef, + x: usize, + y: usize, + width: usize, + _height: usize, +) -> Result { if let Some(_nodata_bytes) = metadata.nodata_value() { // TODO: Compare pixel value against nodata value } @@ -135,21 +193,27 @@ fn get_indb_pixel(metadata: &dyn BandMetadataRef, band: &dyn BandRef, x: usize, let data_type = metadata.data_type(); let bytes_per_px = bytes_per_pixel(data_type.clone())?; let offset = (y * width + x) * bytes_per_px; - + let band_data = band.data(); if offset + bytes_per_px > band_data.len() { return Err(ArrowError::InvalidArgumentError( "Pixel offset exceeds band data length".to_string(), )); } - + let pixel_bytes = &band_data[offset..offset + bytes_per_px]; read_pixel_value(pixel_bytes, data_type) } -fn get_outdb_pixel(metadata: &dyn BandMetadataRef, x: usize, y: usize, _width: usize, _height: usize) -> Result { +fn get_outdb_pixel( + metadata: &dyn BandMetadataRef, + x: usize, + y: usize, + _width: usize, + _height: usize, +) -> Result { use crate::dataset::get_outdb_dataset; - + let dataset = get_outdb_dataset(metadata)?; let band_index = match metadata.outdb_band_id() { @@ -160,15 +224,16 @@ fn get_outdb_pixel(metadata: &dyn BandMetadataRef, x: usize, y: usize, _width: u )) } }; - + let band = dataset.rasterband(band_index as usize).map_err(|_| { ArrowError::ParseError("Failed to get raster band from dataset".to_string()) })?; - + // Read a single pixel at the specified coordinates - let pixel_data = band.read_as::((x as isize, y as isize), (1, 1), (1, 1), None) + let pixel_data = band + .read_as::((x as isize, y as isize), (1, 1), (1, 1), None) .map_err(|_| ArrowError::ParseError("Failed to read pixel data from GDAL".to_string()))?; - + Ok(pixel_data.data()[0]) } @@ -176,10 +241,49 @@ fn get_outdb_pixel(metadata: &dyn BandMetadataRef, x: usize, y: usize, _width: u mod tests { use super::*; use arrow_array::{Array, ArrayRef, Float64Array}; - use sedona_schema::datatypes::{BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, RASTER}; + use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, RASTER, + }; #[test] - fn udf_invoke() { + fn udf_invoke_outdb() { + let raster_array = create_outdb_test_raster_array(); + let kernel = RSValue {}; + // Get pixel at (2,3) in band 1 + let args = vec![ + ColumnarValue::Array(raster_array), + ColumnarValue::Scalar(ScalarValue::from(2i64)), + ColumnarValue::Scalar(ScalarValue::from(3i64)), + ColumnarValue::Scalar(ScalarValue::from(1i64)), + ]; + let arg_types = vec![ + RASTER, + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + ]; + + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); + + // Check the result + if let ColumnarValue::Array(result_array) = result { + let pixel_array = result_array + .as_any() + .downcast_ref::() + .unwrap(); + + assert_eq!(pixel_array.len(), 1); + + // Expected pixel value at (2,3) for 10x12 raster: row 3 * width 10 + col 2 = 32 + let expected_first = 201.0; + assert_eq!(pixel_array.value(0), expected_first,); + } else { + panic!("Expected array result for outdb"); + } + } + + #[test] + fn udf_invoke_indb_all_band_types() { // Test with different band data types let band_types = vec![ BandDataType::UInt8, @@ -192,19 +296,15 @@ mod tests { ]; for band_data_type in band_types { - println!("Testing with band data type: {:?}", band_data_type); - - // Create test rasters with the current band data type let raster_array = create_indb_test_raster_array(band_data_type.clone()); - // Create the UDF and invoke it let kernel = RSValue {}; - /// Get pixel at (2,3) in band 1 + // Get pixel at (2,3) in band 1 let args = vec![ ColumnarValue::Array(raster_array), ColumnarValue::Scalar(ScalarValue::from(2i64)), ColumnarValue::Scalar(ScalarValue::from(3i64)), - ColumnarValue::Scalar(ScalarValue::from(1i64)) + ColumnarValue::Scalar(ScalarValue::from(1i64)), ]; let arg_types = vec![ RASTER, @@ -217,18 +317,35 @@ mod tests { // Check the result if let ColumnarValue::Array(result_array) = result { - let pixel_array = result_array.as_any().downcast_ref::().unwrap(); + let pixel_array = result_array + .as_any() + .downcast_ref::() + .unwrap(); assert_eq!(pixel_array.len(), 3); - + // Expected pixel value at (2,3) for 10x12 raster: row 3 * width 10 + col 2 = 32 let expected_first = 32.0; - assert_eq!(pixel_array.value(0), expected_first, "Failed for band type {:?}", band_data_type); - assert!(pixel_array.is_null(1), "Second raster should be null for band type {:?}", band_data_type); - - // Expected pixel value at (2,3) for 30x15 raster: row 3 * width 30 + col 2 = 92 + assert_eq!( + pixel_array.value(0), + expected_first, + "Failed for band type {:?}", + band_data_type + ); + assert!( + pixel_array.is_null(1), + "Second raster should be null for band type {:?}", + band_data_type + ); + + // Expected pixel value at (2,3) for 30x15 raster: row 3 * width 30 + col 2 = 92 let expected_third = 92.0; - assert_eq!(pixel_array.value(2), expected_third, "Failed for band type {:?}", band_data_type); + assert_eq!( + pixel_array.value(2), + expected_third, + "Failed for band type {:?}", + band_data_type + ); } else { panic!("Expected array result for band type {:?}", band_data_type); } @@ -303,7 +420,7 @@ mod tests { for i in 0..num_pixels { match band_data_type { BandDataType::UInt8 => { - data.push(i as u8); + data.push(i as u8); } BandDataType::Int16 => { let bytes = (i as i16).to_le_bytes(); @@ -335,4 +452,39 @@ mod tests { data } + fn create_outdb_test_raster_array() -> ArrayRef { + // TODO: Unit tests should not query external resources. + // This function is for proof-of-concept purposes only. + let url = "https://sentinel-cogs.s3.amazonaws.com/sentinel-s2-l2a-cogs/1/C/CV/2018/10/S2B_1CCV_20181004_0_L2A/AOT.tif"; + let mut builder = RasterBuilder::new(3); + + // First raster: 10x12 + let metadata1 = RasterMetadata { + width: 10, + height: 12, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::OutDbRef, + datatype: BandDataType::UInt16, + outdb_url: Some(url.to_string()), + outdb_band_id: Some(1), + }; + + builder.start_raster(&metadata1, None, None).unwrap(); + let test_data1 = vec![0u8]; + builder.band_data_writer().append_value(&test_data1); + builder.finish_band(band_metadata.clone()).unwrap(); + builder.finish_raster().unwrap(); + + Arc::new(builder.finish().unwrap()) + } } diff --git a/rust/sedona-raster/src/datatype_functions.rs b/rust/sedona-raster/src/datatype_functions.rs index 7fbd25be..d85aeab1 100644 --- a/rust/sedona-raster/src/datatype_functions.rs +++ b/rust/sedona-raster/src/datatype_functions.rs @@ -1,4 +1,3 @@ - use arrow_schema::ArrowError; use sedona_schema::datatypes::BandDataType; @@ -18,13 +17,13 @@ pub fn bytes_per_pixel(data_type: BandDataType) -> Result { pub fn read_pixel_value(bytes: &[u8], data_type: BandDataType) -> Result { let expected_bytes = bytes_per_pixel(data_type.clone())?; if bytes.len() != expected_bytes { - return Err(ArrowError::InvalidArgumentError("Invalid byte length for specified data type".to_string())); + return Err(ArrowError::InvalidArgumentError( + "Invalid byte length for specified data type".to_string(), + )); } match data_type { - BandDataType::UInt8 => { - Ok(bytes[0] as f64) - } + BandDataType::UInt8 => Ok(bytes[0] as f64), BandDataType::Int16 => { let value = i16::from_le_bytes([bytes[0], bytes[1]]); Ok(value as f64) @@ -47,8 +46,7 @@ pub fn read_pixel_value(bytes: &[u8], data_type: BandDataType) -> Result { let value = f64::from_le_bytes([ - bytes[0], bytes[1], bytes[2], bytes[3], - bytes[4], bytes[5], bytes[6], bytes[7] + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], ]); Ok(value) } @@ -58,7 +56,7 @@ pub fn read_pixel_value(bytes: &[u8], data_type: BandDataType) -> Result &mut BinaryBuilder { - let bands_builder = self.main_builder + let bands_builder = self + .main_builder .field_builder::>(raster_indices::BANDS) .unwrap(); let band_builder = bands_builder.values(); // Ensure we have at least one field (band metadata and data) - // Field 0 = metadata (StructBuilder), Field 1 = data (BinaryBuilder) + // Field 0 = metadata (StructBuilder), Field 1 = data (BinaryBuilder) band_builder.field_builder::(1).unwrap() } @@ -621,7 +620,8 @@ impl RasterBuilder { /// TODO: The band_metadata is in the finish in the band call, but in the /// start in the raster call. Make it consistent. pub fn finish_band(&mut self, band_metadata: BandMetadata) -> Result<(), ArrowError> { - let bands_builder = self.main_builder + let bands_builder = self + .main_builder .field_builder::>(raster_indices::BANDS) .unwrap(); let band_builder = bands_builder.values(); @@ -685,7 +685,8 @@ impl RasterBuilder { /// Finish all bands for the current raster pub fn finish_raster(&mut self) -> Result<(), ArrowError> { - let bands_builder = self.main_builder + let bands_builder = self + .main_builder .field_builder::>(raster_indices::BANDS) .unwrap(); bands_builder.append(true); @@ -696,10 +697,11 @@ impl RasterBuilder { /// Append raster metadata from a MetadataRef trait object fn append_metadata_from_ref(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { - let metadata_builder = self.main_builder + let metadata_builder = self + .main_builder .field_builder::(raster_indices::METADATA) .unwrap(); - + // Width metadata_builder .field_builder::(metadata_indices::WIDTH) @@ -750,7 +752,8 @@ impl RasterBuilder { /// Set the CRS for the current raster pub fn set_crs(&mut self, crs: Option<&str>) -> Result<(), ArrowError> { - let crs_builder = self.main_builder + let crs_builder = self + .main_builder .field_builder::(raster_indices::CRS) .unwrap(); match crs { @@ -762,10 +765,11 @@ impl RasterBuilder { /// Append a bounding box to the current raster pub fn append_bounding_box(&mut self, bbox: Option<&BoundingBox>) -> Result<(), ArrowError> { - let bbox_builder = self.main_builder + let bbox_builder = self + .main_builder .field_builder::(raster_indices::BBOX) .unwrap(); - + if let Some(bbox) = bbox { bbox_builder .field_builder::(bounding_box_indices::MIN_X) @@ -818,45 +822,46 @@ impl RasterBuilder { /// Append a null raster pub fn append_null(&mut self) -> Result<(), ArrowError> { // Since metadata fields are non-nullable, provide default values - let metadata_builder = self.main_builder + let metadata_builder = self + .main_builder .field_builder::(raster_indices::METADATA) .unwrap(); - + metadata_builder .field_builder::(metadata_indices::WIDTH) .unwrap() .append_value(0u64); - + metadata_builder .field_builder::(metadata_indices::HEIGHT) .unwrap() .append_value(0u64); - + metadata_builder .field_builder::(metadata_indices::UPPERLEFT_X) .unwrap() .append_value(0.0f64); - + metadata_builder .field_builder::(metadata_indices::UPPERLEFT_Y) .unwrap() .append_value(0.0f64); - + metadata_builder .field_builder::(metadata_indices::SCALE_X) .unwrap() .append_value(0.0f64); - + metadata_builder .field_builder::(metadata_indices::SCALE_Y) .unwrap() .append_value(0.0f64); - + metadata_builder .field_builder::(metadata_indices::SKEW_X) .unwrap() .append_value(0.0f64); - + metadata_builder .field_builder::(metadata_indices::SKEW_Y) .unwrap() @@ -864,18 +869,20 @@ impl RasterBuilder { // Mark the metadata struct as valid since it has valid values metadata_builder.append(true); - + // Append null CRS (now using StringBuilder instead of StringViewBuilder) - let crs_builder = self.main_builder + let crs_builder = self + .main_builder .field_builder::(raster_indices::CRS) .unwrap(); crs_builder.append_null(); - + // Append null bounding box self.append_bounding_box(None)?; - + // Append null bands - let bands_builder = self.main_builder + let bands_builder = self + .main_builder .field_builder::>(raster_indices::BANDS) .unwrap(); bands_builder.append(false); From 898b88029c3c0572094e91939edba0b01ce1c6d9 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Tue, 21 Oct 2025 11:52:21 -0700 Subject: [PATCH 14/18] cargo lock --- Cargo.lock | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 3b4dfbd4..958f1e1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4980,10 +4980,17 @@ name = "sedona-gdal" version = "0.2.0" dependencies = [ "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", "gdal", "gdal-sys", "rstest", + "sedona-expr", + "sedona-functions", "sedona-raster", + "sedona-schema", ] [[package]] From 64c9a3a4f2f802b042a495bd3c58b5cd2b2d67e0 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Tue, 21 Oct 2025 12:54:19 -0700 Subject: [PATCH 15/18] fix warnings --- rust/sedona-functions/src/lib.rs | 1 + rust/sedona-functions/src/register.rs | 2 ++ rust/sedona-functions/src/rs_value.rs | 28 ++++++++--------------- rust/sedona-functions/src/rs_width.rs | 2 +- rust/sedona-gdal/src/dataset.rs | 16 +++++++++++++ rust/sedona-gdal/src/lib.rs | 17 ++++++++++++++ rust/sedona-gdal/src/register.rs | 33 +++++++++++++++++++++++++++ rust/sedona-schema/src/datatypes.rs | 12 +++++----- 8 files changed, 85 insertions(+), 26 deletions(-) create mode 100644 rust/sedona-gdal/src/register.rs diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 9fffbd40..c5d4a6a6 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -21,6 +21,7 @@ mod overlay; mod predicates; mod referencing; pub mod register; +mod rs_value; mod rs_width; mod sd_format; pub mod st_analyze_aggr; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index b08338ca..7e1b71fa 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -60,6 +60,7 @@ pub fn default_function_set() -> FunctionSet { crate::predicates::st_within_udf, crate::referencing::st_line_interpolate_point_udf, crate::referencing::st_line_locate_point_udf, + crate::rs_width::rs_width_udf, crate::sd_format::sd_format_udf, crate::st_area::st_area_udf, crate::st_asbinary::st_asbinary_udf, @@ -127,6 +128,7 @@ pub mod stubs { pub use crate::overlay::*; pub use crate::predicates::*; pub use crate::referencing::*; + pub use crate::rs_value::rs_value_udf; pub use crate::st_area::st_area_udf; pub use crate::st_azimuth::st_azimuth_udf; pub use crate::st_centroid::st_centroid_udf; diff --git a/rust/sedona-functions/src/rs_value.rs b/rust/sedona-functions/src/rs_value.rs index c4f7d86b..17b33bcf 100644 --- a/rust/sedona-functions/src/rs_value.rs +++ b/rust/sedona-functions/src/rs_value.rs @@ -14,17 +14,11 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use std::{sync::Arc, vec}; +use std::vec; -use crate::executor::WkbExecutor; -use arrow_array::builder::UInt64Builder; use arrow_schema::DataType; -use datafusion_common::error::{DataFusionError, Result}; -use datafusion_expr::{ - scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, -}; -use sedona_common::sedona_internal_err; -use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Volatility}; +use sedona_expr::scalar_udf::SedonaScalarUDF; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; /// RS_Value() scalar UDF implementation @@ -54,7 +48,7 @@ pub fn rs_value_udf() -> SedonaScalarUDF { ArgMatcher::new( vec![ ArgMatcher::is_raster(), - ArgMatcher::is_numeric(), + ArgMatcher::is_numeric(), ArgMatcher::is_numeric(), ArgMatcher::is_numeric(), ], @@ -68,18 +62,14 @@ pub fn rs_value_udf() -> SedonaScalarUDF { fn rs_value_doc() -> Documentation { Documentation::builder( DOC_SECTION_OTHER, - format!( - "Returns the value at the given point in the raster.", - ), + format!("Returns the value at the given point in the raster.",), format!("RS_Value (raster: Raster, colX: Integer, colY: Integer, band: Integer)"), ) .with_argument("raster", "Raster: Input raster") - .with_optional_argument("x", "coordinate") - .with_optional_argument("y", "Y coordinate") + .with_argument("x", "Integer: X coordinate") + .with_argument("y", "Integer: Y coordinate") .with_argument("band_id", "Integer: Band number (1-based index)") - .with_sql_example(format!( - "SELECT RS_Value(raster, x, y, band_id)", - )) + .with_sql_example(format!("SELECT RS_Value(raster, x, y, band_id)",)) .build() } @@ -94,4 +84,4 @@ mod tests { assert_eq!(udf.name(), "rs_value"); assert!(udf.documentation().is_some()); } -} \ No newline at end of file +} diff --git a/rust/sedona-functions/src/rs_width.rs b/rust/sedona-functions/src/rs_width.rs index 482faada..7ee3ab47 100644 --- a/rust/sedona-functions/src/rs_width.rs +++ b/rust/sedona-functions/src/rs_width.rs @@ -110,7 +110,7 @@ mod tests { let raster_array = create_test_raster_array(); // Create the UDF and invoke it - let kernel = RS_Width {}; + let kernel = RsWidth {}; let args = vec![ColumnarValue::Array(raster_array)]; let arg_types = vec![RASTER]; diff --git a/rust/sedona-gdal/src/dataset.rs b/rust/sedona-gdal/src/dataset.rs index f60a7615..7917b1c4 100644 --- a/rust/sedona-gdal/src/dataset.rs +++ b/rust/sedona-gdal/src/dataset.rs @@ -1,3 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. use arrow_schema::ArrowError; use gdal::Dataset; use sedona_schema::datatypes::{BandMetadataRef, StorageType}; diff --git a/rust/sedona-gdal/src/lib.rs b/rust/sedona-gdal/src/lib.rs index bb827615..d3a19357 100644 --- a/rust/sedona-gdal/src/lib.rs +++ b/rust/sedona-gdal/src/lib.rs @@ -1,2 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + mod dataset; pub mod rs_value; diff --git a/rust/sedona-gdal/src/register.rs b/rust/sedona-gdal/src/register.rs new file mode 100644 index 00000000..ef8087e4 --- /dev/null +++ b/rust/sedona-gdal/src/register.rs @@ -0,0 +1,33 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use sedona_expr::aggregate_udf::SedonaAccumulatorRef; +use sedona_expr::scalar_udf::ScalarKernelRef; + +use crate::{ + rs_value::rs_value_impl, +}; + +pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> { + vec![ + ("rs_value", rs_value_impl()), + ] +} + +pub fn aggregate_kernels() -> Vec<(&'static str, SedonaAccumulatorRef)> { + vec![ + ] +} \ No newline at end of file diff --git a/rust/sedona-schema/src/datatypes.rs b/rust/sedona-schema/src/datatypes.rs index 996550d7..72c027fc 100644 --- a/rust/sedona-schema/src/datatypes.rs +++ b/rust/sedona-schema/src/datatypes.rs @@ -14,21 +14,21 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use arrow::buffer::{BooleanBuffer, MutableBuffer, NullBuffer}; +use arrow::buffer::MutableBuffer; use arrow_array::{ builder::{ BinaryBuilder, Float64Builder, ListBuilder, StringBuilder, StructBuilder, UInt32Builder, UInt64Builder, }, - Array, ArrayRef, BinaryArray, Float64Array, ListArray, StringArray, StringViewArray, - StructArray, UInt32Array, UInt64Array, + Array, BinaryArray, Float64Array, ListArray, StringArray, StructArray, UInt32Array, + UInt64Array, }; use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; use datafusion_common::error::{DataFusionError, Result}; use sedona_common::sedona_internal_err; use serde_json::Value; use std::fmt::{Debug, Display}; -use std::sync::{Arc, LazyLock}; +use std::sync::LazyLock; use crate::crs::{deserialize_crs, Crs}; use crate::extension_type::ExtensionType; @@ -439,7 +439,7 @@ impl RasterSchema { /// CRS schema to store json representation pub fn crs_type() -> DataType { - DataType::Utf8 + DataType::Utf8 // TODO: Consider Utf8View } } @@ -519,7 +519,7 @@ impl RasterBuilder { )); // Now create the main builder with pre-built components - let mut main_builder = StructBuilder::new( + let main_builder = StructBuilder::new( RasterSchema::fields(), vec![ Box::new(metadata_builder), From 1ef03dd8c9488f6f1cbfce24c5d16f0f17e1d100 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Tue, 21 Oct 2025 17:08:45 -0700 Subject: [PATCH 16/18] adding a gdal loader --- rust/sedona-gdal/src/dataset.rs | 2 + rust/sedona-gdal/src/lib.rs | 1 + rust/sedona-gdal/src/loaders.rs | 253 +++++++++++++++++++ rust/sedona-raster/src/datatype_functions.rs | 83 ++++++ 4 files changed, 339 insertions(+) create mode 100644 rust/sedona-gdal/src/loaders.rs diff --git a/rust/sedona-gdal/src/dataset.rs b/rust/sedona-gdal/src/dataset.rs index 7917b1c4..7f979277 100644 --- a/rust/sedona-gdal/src/dataset.rs +++ b/rust/sedona-gdal/src/dataset.rs @@ -37,6 +37,8 @@ pub fn get_outdb_dataset(metadata: &dyn BandMetadataRef) -> Result, // 1-based index (GDAL convention) + tile_width: Option, // Optional override, will use GDAL's block size if None + tile_height: Option, // Optional override, will use GDAL's block size if None +) -> Result, ArrowError> { + + let dataset = Dataset::open(filepath.to_string()) + .map_err(|err| ArrowError::ParseError(err.to_string()))?; + + // Determine tile size - use GDAL's natural block size if not specified + let (tile_width, tile_height) = if tile_width.is_some() && tile_height.is_some() { + (tile_width.unwrap(), tile_height.unwrap()) + } else { + // Get the natural block size from the first band + let first_band = dataset.rasterband(band_indexes[0]).map_err(|e| { + ArrowError::InvalidArgumentError(format!("Failed to read band {}: {e}", band_indexes[0])) + })?; + + let (block_width, block_height) = first_band.block_size(); + (block_width, block_height) + }; + + println!("Using tile size: {}×{}", tile_width, tile_height); + + // Get the geotransform which contains scale, skew, and origin information + let geotransform = dataset.geo_transform() + .map_err(|e| ArrowError::ParseError(format!("Failed to get geotransform: {e}")))?; + + // Extract geotransform components + // geotransform = [upperleft_x, scale_x, skew_x, upperleft_y, skew_y, scale_y] + let (origin_x, pixel_width, rotation_x, origin_y, rotation_y, pixel_height) = ( + geotransform[0], // Upper-left X coordinate + geotransform[1], // Pixel width (scale_x) + geotransform[2], // X-direction skew + geotransform[3], // Upper-left Y coordinate + geotransform[4], // Y-direction skew + geotransform[5], // Pixel height (scale_y, usually negative) + ); + + + let (raster_width, raster_height) = dataset.raster_size(); + let x_count = (raster_width + tile_width - 1) / tile_width; + let y_count = (raster_height + tile_height - 1) / tile_height; + + let mut raster_builder = RasterBuilder::new(x_count * y_count); + + // FIXME: deal with the edge tiles when things don't divide evenly + for tile_y in 0..y_count { + for tile_x in 0..x_count { + let x_offset = tile_x * tile_width; + let y_offset = tile_y * tile_height; + + // Calculate the actual geographic coordinates for this tile + // using the geotransform from the original raster + let tile_origin_x = origin_x + (x_offset as f64) * pixel_width + (y_offset as f64) * rotation_x; + let tile_origin_y = origin_y + (x_offset as f64) * rotation_y + (y_offset as f64) * pixel_height; + + // Create raster metadata for this tile with actual geotransform values + let tile_metadata = RasterMetadata { + width: tile_width as u64, + height: tile_height as u64, + upperleft_x: tile_origin_x, + upperleft_y: tile_origin_y, + scale_x: pixel_width, + scale_y: pixel_height, + skew_x: rotation_x, + skew_y: rotation_y, + bounding_box: None, + }; + + // Start the raster + raster_builder.start_raster(&tile_metadata, None, None)?; + + for band_index in band_indexes { + let band: RasterBand = dataset.rasterband(*band_index).map_err(|e| { + ArrowError::InvalidArgumentError(format!("Failed to read file: {filepath} {e}")) + })?; + + let x_size = if x_offset + tile_width > raster_width { + raster_width - x_offset + } else { + tile_width + }; + let y_size = if y_offset + tile_height > raster_height { + raster_height - y_offset + } else { + tile_height + }; + + let data_type = gdaldatatype_to_banddatatype(band.band_type())?; + let data_type_bytes = bytes_per_pixel(data_type.clone())?; + let buffer_size_bytes = x_size * y_size * data_type_bytes.clone(); + + // Get a mutable buffer slice for GDAL to write directly into + let (buffer, slice) = raster_builder.get_band_buffer_slice(buffer_size_bytes); + + // Note: If you want resampling, buffer_size can be different from window_size + band.read_into_slice( + (x_offset as isize, y_offset as isize), // window_origin + (x_size, y_size), // window_size + (x_size, y_size), // buffer_size (no resampling) + slice, // buffer + Some(ResampleAlg::Average), // TODO: consider other algorithms + ) + .map_err(|e| ArrowError::ParseError(format!("Failed to read file: {filepath} {e}")))?; + + // Commit the buffer to the band data + raster_builder.commit_band_buffer(buffer); + + let nodata_value = match band.no_data_value() { + Some(val) => Some(f64_to_bandtype_bytes(val, data_type.clone())?), + None => None, + }; + + // Create band metadata + let band_metadata = BandMetadata { + nodata_value: nodata_value, + storage_type: StorageType::InDb, + datatype: data_type, // Updated to use the correct variable + outdb_url: None, + outdb_band_id: None, + }; + + // Finish the band + raster_builder.finish_band(band_metadata)?; + } + + // Complete the raster + raster_builder.finish_raster()?; + } + } + + let raster_struct = raster_builder.finish()?; + Ok(Arc::new(raster_struct)) +} + +fn gdaldatatype_to_banddatatype( + gdal_data_type: GdalDataType, +) -> Result { + match gdal_data_type { + GdalDataType::UInt8 => Ok(BandDataType::UInt8), + GdalDataType::UInt16 => Ok(BandDataType::UInt16), + GdalDataType::Int16 => Ok(BandDataType::Int16), + GdalDataType::UInt32 => Ok(BandDataType::UInt32), + GdalDataType::Int32 => Ok(BandDataType::Int32), + GdalDataType::Float32 => Ok(BandDataType::Float32), + GdalDataType::Float64 => Ok(BandDataType::Float64), + _ => Err(ArrowError::InvalidArgumentError(format!( + "Unsupported GDAL data type: {:?}", + gdal_data_type + ))), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use gdal::Metadata; + + #[test] + fn test_load_raster() { + let filename = "/Users/jess/code/data/rasters/test1.tiff"; + let bands = vec![1]; + + // Test with auto-detected tile size + let _rasters = load_raster(filename, &bands, None, None).unwrap(); + + // Test with custom tile size + let _rasters = load_raster(filename, &bands, Some(128), Some(128)).unwrap(); + } + + #[test] + fn test_gdal_tile_properties() { + let filename = "/Users/jess/code/data/rasters/test1.tiff"; + + let dataset = Dataset::open(filename).unwrap(); + + println!("Dataset properties:"); + println!("- Raster size: {:?}", dataset.raster_size()); + println!("- Raster count: {}", dataset.raster_count()); + println!("- Driver: {:?}", dataset.driver().short_name()); + + // Check for tile/block size on the first band + if let Ok(band) = dataset.rasterband(1) { + println!("\nBand 1 properties:"); + println!("- Band type: {:?}", band.band_type()); + println!("- Size: {:?}", band.size()); + + // Check for block size (this is the natural tile/block size for the format) + println!("- Block size: {:?}", band.block_size()); + + // Check for overviews (pyramid levels) + println!("- Overview count: {:?}", band.overview_count()); + + // Check for no data value + if let Some(nodata) = band.no_data_value() { + println!("- No data value: {}", nodata); + } + } + + // Check dataset metadata for tiling information + println!("\nDataset metadata:"); + for entry in dataset.metadata() { + println!("- {}: {}", entry.key, entry.value); + } + + // Check for specific metadata items related to tiling + if let Some(tile_width) = dataset.metadata_item("TILEWIDTH", "") { + println!("- TILEWIDTH: {}", tile_width); + } + if let Some(tile_height) = dataset.metadata_item("TILEHEIGHT", "") { + println!("- TILEHEIGHT: {}", tile_height); + } + if let Some(block_x) = dataset.metadata_item("BLOCK_X_SIZE", "") { + println!("- BLOCK_X_SIZE: {}", block_x); + } + if let Some(block_y) = dataset.metadata_item("BLOCK_Y_SIZE", "") { + println!("- BLOCK_Y_SIZE: {}", block_y); + } + } +} \ No newline at end of file diff --git a/rust/sedona-raster/src/datatype_functions.rs b/rust/sedona-raster/src/datatype_functions.rs index d85aeab1..d83a108f 100644 --- a/rust/sedona-raster/src/datatype_functions.rs +++ b/rust/sedona-raster/src/datatype_functions.rs @@ -53,6 +53,53 @@ pub fn read_pixel_value(bytes: &[u8], data_type: BandDataType) -> Result &mut [u8] { + slice +} + +pub fn cast_slice_to_u16(slice: &mut [u8]) -> &mut [u16] { + let len = slice.len() / 2; + unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut u16, len) } +} + +pub fn cast_slice_to_i16(slice: &mut [u8]) -> &mut [i16] { + let len = slice.len() / 2; + unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut i16, len) } +} + +pub fn cast_slice_to_u32(slice: &mut [u8]) -> &mut [u32] { + let len = slice.len() / 4; + unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut u32, len) } +} + +pub fn cast_slice_to_i32(slice: &mut [u8]) -> &mut [i32] { + let len = slice.len() / 4; + unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut i32, len) } +} + +pub fn cast_slice_to_f32(slice: &mut [u8]) -> &mut [f32] { + let len = slice.len() / 4; + unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut f32, len) } +} + +pub fn cast_slice_to_f64(slice: &mut [u8]) -> &mut [f64] { + let len = slice.len() / 8; + unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut f64, len) } +} + +pub fn f64_to_bandtype_bytes(value: f64, data_type: BandDataType) -> Result, ArrowError> { + match data_type { + BandDataType::UInt8 => Ok(vec![value as u8]), + BandDataType::Int16 => Ok((value as i16).to_le_bytes().to_vec()), + BandDataType::UInt16 => Ok((value as u16).to_le_bytes().to_vec()), + BandDataType::Int32 => Ok((value as i32).to_le_bytes().to_vec()), + BandDataType::UInt32 => Ok((value as u32).to_le_bytes().to_vec()), + BandDataType::Float32 => Ok((value as f32).to_le_bytes().to_vec()), + BandDataType::Float64 => Ok((value as f64).to_le_bytes().to_vec()), + } +} + #[cfg(test)] mod tests { use super::*; @@ -67,4 +114,40 @@ mod tests { assert_eq!(bytes_per_pixel(BandDataType::Float32).unwrap(), 4); assert_eq!(bytes_per_pixel(BandDataType::Float64).unwrap(), 8); } + + #[test] + fn basic_read_pixel_value_tests() { + let nodataval = 17.0 as f64; + assert_eq!( + read_pixel_value(&[17u8], BandDataType::UInt8).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17i16.to_le_bytes(), BandDataType::Int16).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17u16.to_le_bytes(), BandDataType::UInt16).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17i32.to_le_bytes(), BandDataType::Int32).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17u32.to_le_bytes(), BandDataType::UInt32).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17f32.to_le_bytes(), BandDataType::Float32).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17f64.to_le_bytes(), BandDataType::Float64).unwrap(), + nodataval + ); + + let invalid = -300.0 as f64; + assert!(read_pixel_value(&[invalid as u8], BandDataType::UInt8).is_err()); + } } From 4bd161803bebe9c62f047671fc362cc5669fb495 Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Thu, 23 Oct 2025 09:09:59 -0700 Subject: [PATCH 17/18] use 1-based band_number consistently --- rust/sedona-functions/src/rs_asmatrix.rs | 206 +++++++++++++++ rust/sedona-gdal/src/dataset.rs | 43 +++- rust/sedona-gdal/src/lib.rs | 4 +- rust/sedona-gdal/src/loaders.rs | 253 ------------------- rust/sedona-gdal/src/readers.rs | 175 +++++++++++++ rust/sedona-gdal/src/rs_value.rs | 151 ++++------- rust/sedona-raster/src/datatype_functions.rs | 55 ++-- rust/sedona-raster/src/display_functions.rs | 109 ++++++++ rust/sedona-raster/src/lib.rs | 1 + rust/sedona-schema/src/datatypes.rs | 47 ++-- 10 files changed, 634 insertions(+), 410 deletions(-) create mode 100644 rust/sedona-functions/src/rs_asmatrix.rs delete mode 100644 rust/sedona-gdal/src/loaders.rs create mode 100644 rust/sedona-gdal/src/readers.rs create mode 100644 rust/sedona-raster/src/display_functions.rs diff --git a/rust/sedona-functions/src/rs_asmatrix.rs b/rust/sedona-functions/src/rs_asmatrix.rs new file mode 100644 index 00000000..da13de29 --- /dev/null +++ b/rust/sedona-functions/src/rs_asmatrix.rs @@ -0,0 +1,206 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::{sync::Arc, vec}; + +use crate::executor::RasterExecutor; +use arrow_array::builder::UInt64Builder; +use arrow_schema::DataType; +use datafusion_common::error::Result; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_schema::{ + datatypes::{RasterRef, SedonaType}, + matchers::ArgMatcher, +} +use sedona_raster::display_functions::pretty_print_indb; + +/// RS_AsMatrix() scalar UDF implementation +/// +/// Extract the width of the raster +pub fn rs_asmatrix_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_asmatrix", + vec![Arc::new(RsAsMatrix {})], + Volatility::Immutable, + Some(rs_asmatrix_doc()), + ) +} + +fn rs_asmatrix_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + format!("Returns a string, that when printed, outputs the raster band as a pretty printed 2D matrix."), + format!("RS_AsMatrix(raster: Raster, band_number: Numeric, postDecimalPrecision: Numeric)"), + ) + .with_argument("raster", "Raster: Input raster") + .with_argument("band_number", "Numeric: Band number (1-based).") + .with_argument("postDecimalPrecision", "Numeric: Number of digits after decimal point. Optional, default is 6.") + .with_sql_example("SELECT RS_AsMatrix(raster, band_number)") + .with_sql_example("SELECT RS_AsMatrix(raster, band_number, postDecimalPrecision)") + .build() +} + +#[derive(Debug)] +struct RsAsMatrix {} + +impl SedonaScalarKernel for RsAsMatrix { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), + ArgMatcher::is_numeric(), + ArgMatcher::is_optional(ArgMatcher::is_numeric()) + ], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let mut builder = Utf8Builder::with_capacity(executor.num_iterations()); + + let band_number = extract_numeric_scalar(&args[1])? as usize; + let precision = if args.len() > 2 { + extract_numeric_scalar(&args[2])? as usize + } else { + 6usize + }; + + executor.execute_raster_void(|_i, raster_opt| { + match raster_opt { + Some(raster) => { + // TODO: maybe move this into gdal so that we can display outdb rasters too? + builder.append_value(pretty_print_indb(raster, band_number, precision)); + } + None => builder.append_null(), + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{Array, ArrayRef, Utf8Array}; + use datafusion_expr::ScalarUDF; + use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, RASTER, + }; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = rs_asmatrix_udf().into(); + assert_eq!(udf.name(), "rs_asmatrix"); + assert!(udf.documentation().is_some()); + } + + #[test] + fn udf_invoke() { + let raster_array = create_test_raster_array(); + + let kernel = RsAsMatrix {}; + let args = vec![ColumnarValue::Array(raster_array), + ColumnarValue::Scalar(ScalarValue::from(1usize)), + ColumnarValue::Scalar(ScalarValue::from(2usize)) + ]; + let arg_types = vec![RASTER, SedonaType::Arrow(DataType::UInt64), SedonaType::Arrow(DataType::UInt64)]; + + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); + + let expected_first = " 1.00 1.00 1.00 \n 1.00 1.00 1.00 \n"; + let expected_third = " 3.00 3.00 \n 3.00 3.00 \n 3.00 3.00 \n"; + + if let ColumnarValue::Array(result_array) = result { + let width_array = result_array.as_any().downcast_ref::().unwrap(); + + assert_eq!(width_array.len(), 3); + assert_eq!(width_array.value(0), expected_first); + assert!(width_array.is_null(1)); // Second raster is null + assert_eq!(width_array.value(2), expected_third); + } else { + panic!("Expected array result"); + } + } + + /// Create a test raster array with different widths for testing + // TODO: Parameterize the creation of rasters and move the + // function to sedona-testing + fn create_test_raster_array() -> ArrayRef { + let mut builder = RasterBuilder::new(3); + + // First raster: 10x12 + let metadata1 = RasterMetadata { + width: 3, + height: 2, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, + }; + + builder.start_raster(&metadata1, None, None).unwrap(); + let test_data1 = vec![1u8; 3*2]; // width * height + builder.band_data_writer().append_value(&test_data1); + builder.finish_band(band_metadata.clone()).unwrap(); + builder.finish_raster().unwrap(); + + // Second raster: null + builder.append_null().unwrap(); + + // Third raster: 30x15 + let metadata3 = RasterMetadata { + width: 2, + height: 3, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata3, None, None).unwrap(); + let test_data3 = vec![3u8; 2*3]; // width * height + builder.band_data_writer().append_value(&test_data3); + builder.finish_band(band_metadata).unwrap(); + builder.finish_raster().unwrap(); + + Arc::new(builder.finish().unwrap()) + } +} diff --git a/rust/sedona-gdal/src/dataset.rs b/rust/sedona-gdal/src/dataset.rs index 7f979277..14c0e7e9 100644 --- a/rust/sedona-gdal/src/dataset.rs +++ b/rust/sedona-gdal/src/dataset.rs @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. use arrow_schema::ArrowError; -use gdal::Dataset; +use gdal::{Dataset, Metadata}; use sedona_schema::datatypes::{BandMetadataRef, StorageType}; /// Get the out-db dataset reference from a raster band. -pub fn get_outdb_dataset(metadata: &dyn BandMetadataRef) -> Result { +pub fn outdb_dataset(metadata: &dyn BandMetadataRef) -> Result { if metadata.storage_type() != StorageType::OutDbRef { return Err(ArrowError::ParseError( "Raster band is not stored out-of-db".to_string(), @@ -47,3 +47,42 @@ fn open_outdb_band(url: &str) -> Result { let ds = Dataset::open(full_url).map_err(|e| ArrowError::ParseError(e.to_string()))?; Ok(ds) } + +/// Extract geotransform components from a GDAL dataset +/// Returns (upper_left_x, pixel_width, x_skew, upper_left_y, y_skew, pixel_height) +pub fn geotransform_components( + dataset: &Dataset, +) -> Result<(f64, f64, f64, f64, f64, f64), ArrowError> { + let geotransform = dataset + .geo_transform() + .map_err(|e| ArrowError::ParseError(format!("Failed to get geotransform: {e}")))?; + Ok(( + geotransform[0], // Upper-left X coordinate + geotransform[3], // Upper-left Y coordinate + geotransform[1], // Pixel width (scale_x) + geotransform[5], // Pixel height (scale_y, usually negative) + geotransform[2], // X-direction skew + geotransform[4], // Y-direction skew + )) +} + +/// Extract tile size from a GDAL dataset +/// If not provided, defaults to raster size +pub fn tile_size(dataset: &Dataset) -> Result<(usize, usize), ArrowError> { + let raster_width = dataset.raster_size().0; + let raster_height = dataset.raster_size().1; + + let tile_width = match dataset.metadata_item("TILEWIDTH", "") { + Some(val) => val.parse::().unwrap_or(raster_width), + None => raster_width, + }; + let tile_height = match dataset.metadata_item("TILEHEIGHT", "") { + Some(val) => val.parse::().unwrap_or(raster_height), + None => raster_height, + }; + + Ok((tile_width, tile_height)) +} + +#[cfg(test)] +mod test {} diff --git a/rust/sedona-gdal/src/lib.rs b/rust/sedona-gdal/src/lib.rs index 84d1b0d2..d1e637ca 100644 --- a/rust/sedona-gdal/src/lib.rs +++ b/rust/sedona-gdal/src/lib.rs @@ -15,6 +15,6 @@ // specific language governing permissions and limitations // under the License. -mod dataset; -pub mod loaders; +pub mod dataset; +pub mod readers; pub mod rs_value; diff --git a/rust/sedona-gdal/src/loaders.rs b/rust/sedona-gdal/src/loaders.rs deleted file mode 100644 index 46437fdf..00000000 --- a/rust/sedona-gdal/src/loaders.rs +++ /dev/null @@ -1,253 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use arrow_array::StructArray; -use arrow_schema::ArrowError; -use gdal::raster::{GdalDataType, RasterBand, ResampleAlg}; -use gdal::Dataset; -use sedona_raster::datatype_functions::{ - bytes_per_pixel, f64_to_bandtype_bytes, - cast_slice_to_u8, cast_slice_to_u16, cast_slice_to_i16, - cast_slice_to_u32, cast_slice_to_i32, cast_slice_to_f32, cast_slice_to_f64 -}; -use sedona_schema::datatypes::{ - BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, -}; -use std::sync::Arc; - -fn load_raster( - filepath: &str, - band_indexes: &Vec, // 1-based index (GDAL convention) - tile_width: Option, // Optional override, will use GDAL's block size if None - tile_height: Option, // Optional override, will use GDAL's block size if None -) -> Result, ArrowError> { - - let dataset = Dataset::open(filepath.to_string()) - .map_err(|err| ArrowError::ParseError(err.to_string()))?; - - // Determine tile size - use GDAL's natural block size if not specified - let (tile_width, tile_height) = if tile_width.is_some() && tile_height.is_some() { - (tile_width.unwrap(), tile_height.unwrap()) - } else { - // Get the natural block size from the first band - let first_band = dataset.rasterband(band_indexes[0]).map_err(|e| { - ArrowError::InvalidArgumentError(format!("Failed to read band {}: {e}", band_indexes[0])) - })?; - - let (block_width, block_height) = first_band.block_size(); - (block_width, block_height) - }; - - println!("Using tile size: {}×{}", tile_width, tile_height); - - // Get the geotransform which contains scale, skew, and origin information - let geotransform = dataset.geo_transform() - .map_err(|e| ArrowError::ParseError(format!("Failed to get geotransform: {e}")))?; - - // Extract geotransform components - // geotransform = [upperleft_x, scale_x, skew_x, upperleft_y, skew_y, scale_y] - let (origin_x, pixel_width, rotation_x, origin_y, rotation_y, pixel_height) = ( - geotransform[0], // Upper-left X coordinate - geotransform[1], // Pixel width (scale_x) - geotransform[2], // X-direction skew - geotransform[3], // Upper-left Y coordinate - geotransform[4], // Y-direction skew - geotransform[5], // Pixel height (scale_y, usually negative) - ); - - - let (raster_width, raster_height) = dataset.raster_size(); - let x_count = (raster_width + tile_width - 1) / tile_width; - let y_count = (raster_height + tile_height - 1) / tile_height; - - let mut raster_builder = RasterBuilder::new(x_count * y_count); - - // FIXME: deal with the edge tiles when things don't divide evenly - for tile_y in 0..y_count { - for tile_x in 0..x_count { - let x_offset = tile_x * tile_width; - let y_offset = tile_y * tile_height; - - // Calculate the actual geographic coordinates for this tile - // using the geotransform from the original raster - let tile_origin_x = origin_x + (x_offset as f64) * pixel_width + (y_offset as f64) * rotation_x; - let tile_origin_y = origin_y + (x_offset as f64) * rotation_y + (y_offset as f64) * pixel_height; - - // Create raster metadata for this tile with actual geotransform values - let tile_metadata = RasterMetadata { - width: tile_width as u64, - height: tile_height as u64, - upperleft_x: tile_origin_x, - upperleft_y: tile_origin_y, - scale_x: pixel_width, - scale_y: pixel_height, - skew_x: rotation_x, - skew_y: rotation_y, - bounding_box: None, - }; - - // Start the raster - raster_builder.start_raster(&tile_metadata, None, None)?; - - for band_index in band_indexes { - let band: RasterBand = dataset.rasterband(*band_index).map_err(|e| { - ArrowError::InvalidArgumentError(format!("Failed to read file: {filepath} {e}")) - })?; - - let x_size = if x_offset + tile_width > raster_width { - raster_width - x_offset - } else { - tile_width - }; - let y_size = if y_offset + tile_height > raster_height { - raster_height - y_offset - } else { - tile_height - }; - - let data_type = gdaldatatype_to_banddatatype(band.band_type())?; - let data_type_bytes = bytes_per_pixel(data_type.clone())?; - let buffer_size_bytes = x_size * y_size * data_type_bytes.clone(); - - // Get a mutable buffer slice for GDAL to write directly into - let (buffer, slice) = raster_builder.get_band_buffer_slice(buffer_size_bytes); - - // Note: If you want resampling, buffer_size can be different from window_size - band.read_into_slice( - (x_offset as isize, y_offset as isize), // window_origin - (x_size, y_size), // window_size - (x_size, y_size), // buffer_size (no resampling) - slice, // buffer - Some(ResampleAlg::Average), // TODO: consider other algorithms - ) - .map_err(|e| ArrowError::ParseError(format!("Failed to read file: {filepath} {e}")))?; - - // Commit the buffer to the band data - raster_builder.commit_band_buffer(buffer); - - let nodata_value = match band.no_data_value() { - Some(val) => Some(f64_to_bandtype_bytes(val, data_type.clone())?), - None => None, - }; - - // Create band metadata - let band_metadata = BandMetadata { - nodata_value: nodata_value, - storage_type: StorageType::InDb, - datatype: data_type, // Updated to use the correct variable - outdb_url: None, - outdb_band_id: None, - }; - - // Finish the band - raster_builder.finish_band(band_metadata)?; - } - - // Complete the raster - raster_builder.finish_raster()?; - } - } - - let raster_struct = raster_builder.finish()?; - Ok(Arc::new(raster_struct)) -} - -fn gdaldatatype_to_banddatatype( - gdal_data_type: GdalDataType, -) -> Result { - match gdal_data_type { - GdalDataType::UInt8 => Ok(BandDataType::UInt8), - GdalDataType::UInt16 => Ok(BandDataType::UInt16), - GdalDataType::Int16 => Ok(BandDataType::Int16), - GdalDataType::UInt32 => Ok(BandDataType::UInt32), - GdalDataType::Int32 => Ok(BandDataType::Int32), - GdalDataType::Float32 => Ok(BandDataType::Float32), - GdalDataType::Float64 => Ok(BandDataType::Float64), - _ => Err(ArrowError::InvalidArgumentError(format!( - "Unsupported GDAL data type: {:?}", - gdal_data_type - ))), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use gdal::Metadata; - - #[test] - fn test_load_raster() { - let filename = "/Users/jess/code/data/rasters/test1.tiff"; - let bands = vec![1]; - - // Test with auto-detected tile size - let _rasters = load_raster(filename, &bands, None, None).unwrap(); - - // Test with custom tile size - let _rasters = load_raster(filename, &bands, Some(128), Some(128)).unwrap(); - } - - #[test] - fn test_gdal_tile_properties() { - let filename = "/Users/jess/code/data/rasters/test1.tiff"; - - let dataset = Dataset::open(filename).unwrap(); - - println!("Dataset properties:"); - println!("- Raster size: {:?}", dataset.raster_size()); - println!("- Raster count: {}", dataset.raster_count()); - println!("- Driver: {:?}", dataset.driver().short_name()); - - // Check for tile/block size on the first band - if let Ok(band) = dataset.rasterband(1) { - println!("\nBand 1 properties:"); - println!("- Band type: {:?}", band.band_type()); - println!("- Size: {:?}", band.size()); - - // Check for block size (this is the natural tile/block size for the format) - println!("- Block size: {:?}", band.block_size()); - - // Check for overviews (pyramid levels) - println!("- Overview count: {:?}", band.overview_count()); - - // Check for no data value - if let Some(nodata) = band.no_data_value() { - println!("- No data value: {}", nodata); - } - } - - // Check dataset metadata for tiling information - println!("\nDataset metadata:"); - for entry in dataset.metadata() { - println!("- {}: {}", entry.key, entry.value); - } - - // Check for specific metadata items related to tiling - if let Some(tile_width) = dataset.metadata_item("TILEWIDTH", "") { - println!("- TILEWIDTH: {}", tile_width); - } - if let Some(tile_height) = dataset.metadata_item("TILEHEIGHT", "") { - println!("- TILEHEIGHT: {}", tile_height); - } - if let Some(block_x) = dataset.metadata_item("BLOCK_X_SIZE", "") { - println!("- BLOCK_X_SIZE: {}", block_x); - } - if let Some(block_y) = dataset.metadata_item("BLOCK_Y_SIZE", "") { - println!("- BLOCK_Y_SIZE: {}", block_y); - } - } -} \ No newline at end of file diff --git a/rust/sedona-gdal/src/readers.rs b/rust/sedona-gdal/src/readers.rs new file mode 100644 index 00000000..dccb14d5 --- /dev/null +++ b/rust/sedona-gdal/src/readers.rs @@ -0,0 +1,175 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::dataset::{geotransform_components, tile_size}; +use arrow_array::StructArray; +use arrow_schema::ArrowError; +use gdal::raster::{GdalDataType, RasterBand}; +use gdal::Dataset; +use sedona_raster::datatype_functions::{bytes_per_pixel, f64_to_bandtype_bytes}; +use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, +}; +use std::sync::Arc; + +pub fn read_raster( + filepath: &str, +) -> Result, ArrowError> { + let dataset = Dataset::open(filepath.to_string()) + .map_err(|err| ArrowError::ParseError(err.to_string()))?; + + // Extract geotransform components + let (origin_x, origin_y, pixel_width, pixel_height, rotation_x, rotation_y) = geotransform_components(&dataset)?; + + let (raster_width, raster_height) = dataset.raster_size(); + + let (tile_width, tile_height) = tile_size(&dataset)?; + + let x_tile_count = (raster_width + tile_width - 1) / tile_width; + let y_tile_count = (raster_height + tile_height - 1) / tile_height; + + let mut raster_builder = RasterBuilder::new(x_tile_count * y_tile_count); + let band_count = dataset.raster_count(); + + for tile_y in 0..y_tile_count { + for tile_x in 0..x_tile_count { + let x_offset = tile_x * tile_width; + let y_offset = tile_y * tile_height; + + // Calculate geographic coordinates for this tile + // using the geotransform from the original raster + let tile_origin_x = + origin_x + (x_offset as f64) * pixel_width + (y_offset as f64) * rotation_x; + let tile_origin_y = + origin_y + (x_offset as f64) * rotation_y + (y_offset as f64) * pixel_height; + + // Create raster metadata for this tile with actual geotransform values + let tile_metadata = RasterMetadata { + width: tile_width as u64, + height: tile_height as u64, + upperleft_x: tile_origin_x, + upperleft_y: tile_origin_y, + scale_x: pixel_width, + scale_y: pixel_height, + skew_x: rotation_x, + skew_y: rotation_y, + bounding_box: None, // TODO: should we calculate bounding box here? + }; + + raster_builder.start_raster(&tile_metadata, None, None)?; + + for band_number in 1..=band_count { + let band: RasterBand = dataset.rasterband(band_number).unwrap(); + // This should be the same as tile width/height, except for edge tiles + // but we would need to update the width/height in the metadata above then. + // For now, fail if sizes don't match. + let (x_size, y_size) = band.size(); + if x_size != tile_width || y_size != tile_height { + return Err(ArrowError::ParseError(format!( + "Band size ({}, {}) does not match expected tile size ({}, {})", + x_size, y_size, tile_width, tile_height + ))); + } + + let data_type = gdaldatatype_to_banddatatype(band.band_type())?; + let data_type_bytes = bytes_per_pixel(data_type.clone())?; + let buffer_size_bytes = x_size * y_size * data_type_bytes.clone(); + + // Get a mutable buffer slice for GDAL to write directly into + let (buffer, slice) = raster_builder.get_band_buffer_slice(buffer_size_bytes); + + // TODO: Do we need resampling? If so set buffer_size to different from window_size + // and have a ResampleAlgorithm. + band.read_into_slice( + (x_offset as isize, y_offset as isize), // window_origin + (x_size, y_size), // window_size + (x_size, y_size), // buffer_size (no resampling) + slice, // buffer + None, // resampling algorithms + ) + .map_err(|e| { + ArrowError::ParseError(format!("Failed to read band {band_number} {e}")) + })?; + + raster_builder.commit_band_buffer(buffer); + + let nodata_value = match band.no_data_value() { + Some(val) => Some(f64_to_bandtype_bytes(val, data_type.clone())?), + None => None, + }; + + let band_metadata = BandMetadata { + nodata_value: nodata_value, + storage_type: StorageType::InDb, + datatype: data_type, + outdb_url: None, + outdb_band_id: None, + }; + + // Finalize the band + raster_builder.finish_band(band_metadata)?; + } + + // Finalize the raster + raster_builder.finish_raster()?; + } + } + + // Finalize the raster struct array + let raster_struct = raster_builder.finish()?; + Ok(Arc::new(raster_struct)) +} + +fn gdaldatatype_to_banddatatype(gdal_data_type: GdalDataType) -> Result { + match gdal_data_type { + GdalDataType::UInt8 => Ok(BandDataType::UInt8), + GdalDataType::UInt16 => Ok(BandDataType::UInt16), + GdalDataType::Int16 => Ok(BandDataType::Int16), + GdalDataType::UInt32 => Ok(BandDataType::UInt32), + GdalDataType::Int32 => Ok(BandDataType::Int32), + GdalDataType::Float32 => Ok(BandDataType::Float32), + GdalDataType::Float64 => Ok(BandDataType::Float64), + _ => Err(ArrowError::InvalidArgumentError(format!( + "Unsupported GDAL data type: {:?}", + gdal_data_type + ))), + } +} + +#[cfg(test)] +mod tests { + // use super::*; + // use sedona_raster::display_functions::pretty_print_indb; + // use sedona_schema::datatypes::raster_iterator; + + #[test] + fn test_load_raster() { + // TODO: Add proper tests here. + // To load a raster and view contents + // for prototyping fun: + // + // let filepath = "/test1.tiff"; + // let result = read_raster(filepath); + // assert!(result.is_ok()); + // + // To view loaded raster: + // let raster_array = result.unwrap(); + // for raster in raster_iterator(&raster_array) { + // println!("{}", pretty_print_indb(&raster, 1, 2).unwrap()); + // } + } +} diff --git a/rust/sedona-gdal/src/rs_value.rs b/rust/sedona-gdal/src/rs_value.rs index 03f6c95c..8e1702ce 100644 --- a/rust/sedona-gdal/src/rs_value.rs +++ b/rust/sedona-gdal/src/rs_value.rs @@ -23,9 +23,10 @@ use datafusion_expr::ColumnarValue; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::RasterExecutor; use sedona_raster::datatype_functions::{bytes_per_pixel, read_pixel_value}; -use sedona_schema::datatypes::{BandMetadataRef, BandRef, RasterRef, SedonaType}; +use sedona_schema::datatypes::{BandMetadataRef, BandRef, RasterRef, SedonaType, StorageType}; +use crate::dataset::outdb_dataset; -/// RS_Value() implementation using [DistanceExt] +/// RS_Value() implementation pub fn rs_value_impl() -> ScalarKernelRef { Arc::new(RSValue {}) } @@ -48,76 +49,9 @@ impl SedonaScalarKernel for RSValue { ) -> Result { let executor = RasterExecutor::new(arg_types, args); - // Extract coordinate and band arguments as scalars - let x = match &args[1] { - ColumnarValue::Scalar(scalar) => { - let val = scalar.cast_to(&DataType::Int64).map_err(|e| { - datafusion_common::DataFusionError::Execution(format!( - "Failed to cast x coordinate: {}", - e - )) - })?; - match val { - ScalarValue::Int64(Some(v)) => v as usize, - _ => { - return Err(datafusion_common::DataFusionError::NotImplemented( - "Invalid x coordinate".to_string(), - )) - } - } - } - _ => { - return Err(datafusion_common::DataFusionError::NotImplemented( - "Array x coordinates not supported".to_string(), - )) - } - }; - let y = match &args[2] { - ColumnarValue::Scalar(scalar) => { - let val = scalar.cast_to(&DataType::Int64).map_err(|e| { - datafusion_common::DataFusionError::Execution(format!( - "Failed to cast y coordinate: {}", - e - )) - })?; - match val { - ScalarValue::Int64(Some(v)) => v as usize, - _ => { - return Err(datafusion_common::DataFusionError::NotImplemented( - "Invalid y coordinate".to_string(), - )) - } - } - } - _ => { - return Err(datafusion_common::DataFusionError::NotImplemented( - "Array y coordinates not supported".to_string(), - )) - } - }; - let band_index = match &args[3] { - ColumnarValue::Scalar(scalar) => { - let val = scalar.cast_to(&DataType::Int64).map_err(|e| { - datafusion_common::DataFusionError::Execution(format!( - "Failed to cast band index: {}", - e - )) - })?; - match val { - ScalarValue::Int64(Some(v)) => (v as usize).saturating_sub(1), - _ => { - return Err(datafusion_common::DataFusionError::NotImplemented( - "Invalid band index".to_string(), - )) - } - } - } - _ => { - return Err(datafusion_common::DataFusionError::NotImplemented( - "Array band numbers not supported".to_string(), - )) - } - }; + let x = extract_numeric_scalar(&args[1])?; + let y = extract_numeric_scalar(&args[2])?; + let band_number = extract_numeric_scalar(&args[3])?; let mut builder = Float64Builder::with_capacity(executor.num_iterations()); @@ -125,9 +59,10 @@ impl SedonaScalarKernel for RSValue { match raster_opt { None => builder.append_null(), Some(raster) => { - match invoke_scalar(&raster, x, y, band_index) { + match invoke_scalar(&raster, x, y, band_number) { Ok(value) => builder.append_value(value), - Err(_) => builder.append_null(), // Handle errors by appending null + // TODO: Error or null on bad index? + Err(_) => builder.append_null(), } } } @@ -142,7 +77,7 @@ fn invoke_scalar( raster: &dyn RasterRef, x: usize, y: usize, - band_index: usize, + band_number: usize, ) -> Result { // Extract metadata from the raster let metadata = raster.metadata(); @@ -156,29 +91,28 @@ fn invoke_scalar( )); } - // Get the band + // Get the band (using 1-based band numbering) let bands = raster.bands(); - if band_index >= bands.len() { - return Err(ArrowError::InvalidArgumentError( - "Specified band does not exist".to_string(), - )); + if band_number == 0 || band_number > bands.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "Band number {} does not exist (valid range: 1-{}, raster has {} bands)", + band_number, + bands.len(), + bands.len() + ))); } - let band = bands.band(band_index).ok_or_else(|| { + let band = bands.band(band_number).ok_or_else(|| { ArrowError::InvalidArgumentError("Failed to get band at index".to_string()) })?; let band_metadata = band.metadata(); match band_metadata.storage_type() { - sedona_schema::datatypes::StorageType::InDb => { - get_indb_pixel(band_metadata, &*band, x, y, width, height) - } - sedona_schema::datatypes::StorageType::OutDbRef => { - get_outdb_pixel(band_metadata, x, y, width, height) - } + StorageType::InDb => indb_pixel(band_metadata, &*band, x, y, width, height), + StorageType::OutDbRef => outdb_pixel(band_metadata, x, y, width, height), } } -fn get_indb_pixel( +fn indb_pixel( metadata: &dyn BandMetadataRef, band: &dyn BandRef, x: usize, @@ -187,11 +121,12 @@ fn get_indb_pixel( _height: usize, ) -> Result { if let Some(_nodata_bytes) = metadata.nodata_value() { - // TODO: Compare pixel value against nodata value + // TODO: Compare pixel value against nodata value? } let data_type = metadata.data_type(); let bytes_per_px = bytes_per_pixel(data_type.clone())?; + // TODO: we may want to consider a different ordering let offset = (y * width + x) * bytes_per_px; let band_data = band.data(); @@ -205,18 +140,18 @@ fn get_indb_pixel( read_pixel_value(pixel_bytes, data_type) } -fn get_outdb_pixel( +fn outdb_pixel( metadata: &dyn BandMetadataRef, x: usize, y: usize, _width: usize, _height: usize, ) -> Result { - use crate::dataset::get_outdb_dataset; - let dataset = get_outdb_dataset(metadata)?; - let band_index = match metadata.outdb_band_id() { + let dataset = outdb_dataset(metadata)?; + + let band_number = match metadata.outdb_band_id() { Some(index) => index, None => { return Err(ArrowError::ParseError( @@ -225,7 +160,7 @@ fn get_outdb_pixel( } }; - let band = dataset.rasterband(band_index as usize).map_err(|_| { + let band = dataset.rasterband(band_number as usize).map_err(|_| { ArrowError::ParseError("Failed to get raster band from dataset".to_string()) })?; @@ -237,6 +172,28 @@ fn get_outdb_pixel( Ok(pixel_data.data()[0]) } +fn extract_numeric_scalar(arg: &ColumnarValue) -> Result { + if let ColumnarValue::Scalar(scalar) = arg { + match scalar { + ScalarValue::Int8(Some(val)) => Ok(*val as usize), + ScalarValue::Int16(Some(val)) => Ok(*val as usize), + ScalarValue::Int32(Some(val)) => Ok(*val as usize), + ScalarValue::Int64(Some(val)) => Ok(*val as usize), + ScalarValue::UInt8(Some(val)) => Ok(*val as usize), + ScalarValue::UInt16(Some(val)) => Ok(*val as usize), + ScalarValue::UInt32(Some(val)) => Ok(*val as usize), + ScalarValue::UInt64(Some(val)) => Ok(*val as usize), + _ => Err(ArrowError::ParseError( + "Failed to extract numeric scalar: unsupported type or null value".to_string(), + )), + } + } else { + Err(ArrowError::ParseError( + "Failed to extract scalar value: expected scalar, got array".to_string(), + )) + } +} + #[cfg(test)] mod tests { use super::*; @@ -274,7 +231,6 @@ mod tests { assert_eq!(pixel_array.len(), 1); - // Expected pixel value at (2,3) for 10x12 raster: row 3 * width 10 + col 2 = 32 let expected_first = 201.0; assert_eq!(pixel_array.value(0), expected_first,); } else { @@ -458,8 +414,7 @@ mod tests { let url = "https://sentinel-cogs.s3.amazonaws.com/sentinel-s2-l2a-cogs/1/C/CV/2018/10/S2B_1CCV_20181004_0_L2A/AOT.tif"; let mut builder = RasterBuilder::new(3); - // First raster: 10x12 - let metadata1 = RasterMetadata { + let metadata = RasterMetadata { width: 10, height: 12, upperleft_x: 0.0, @@ -479,7 +434,7 @@ mod tests { outdb_band_id: Some(1), }; - builder.start_raster(&metadata1, None, None).unwrap(); + builder.start_raster(&metadata, None, None).unwrap(); let test_data1 = vec![0u8]; builder.band_data_writer().append_value(&test_data1); builder.finish_band(band_metadata.clone()).unwrap(); diff --git a/rust/sedona-raster/src/datatype_functions.rs b/rust/sedona-raster/src/datatype_functions.rs index d83a108f..4c613eb2 100644 --- a/rust/sedona-raster/src/datatype_functions.rs +++ b/rust/sedona-raster/src/datatype_functions.rs @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + use arrow_schema::ArrowError; use sedona_schema::datatypes::BandDataType; @@ -53,41 +70,6 @@ pub fn read_pixel_value(bytes: &[u8], data_type: BandDataType) -> Result &mut [u8] { - slice -} - -pub fn cast_slice_to_u16(slice: &mut [u8]) -> &mut [u16] { - let len = slice.len() / 2; - unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut u16, len) } -} - -pub fn cast_slice_to_i16(slice: &mut [u8]) -> &mut [i16] { - let len = slice.len() / 2; - unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut i16, len) } -} - -pub fn cast_slice_to_u32(slice: &mut [u8]) -> &mut [u32] { - let len = slice.len() / 4; - unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut u32, len) } -} - -pub fn cast_slice_to_i32(slice: &mut [u8]) -> &mut [i32] { - let len = slice.len() / 4; - unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut i32, len) } -} - -pub fn cast_slice_to_f32(slice: &mut [u8]) -> &mut [f32] { - let len = slice.len() / 4; - unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut f32, len) } -} - -pub fn cast_slice_to_f64(slice: &mut [u8]) -> &mut [f64] { - let len = slice.len() / 8; - unsafe { std::slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut f64, len) } -} - pub fn f64_to_bandtype_bytes(value: f64, data_type: BandDataType) -> Result, ArrowError> { match data_type { BandDataType::UInt8 => Ok(vec![value as u8]), @@ -146,8 +128,5 @@ mod tests { read_pixel_value(&17f64.to_le_bytes(), BandDataType::Float64).unwrap(), nodataval ); - - let invalid = -300.0 as f64; - assert!(read_pixel_value(&[invalid as u8], BandDataType::UInt8).is_err()); } } diff --git a/rust/sedona-raster/src/display_functions.rs b/rust/sedona-raster/src/display_functions.rs new file mode 100644 index 00000000..501af256 --- /dev/null +++ b/rust/sedona-raster/src/display_functions.rs @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatype_functions::{bytes_per_pixel, read_pixel_value}; +use arrow::error::ArrowError; +use sedona_schema::datatypes::{RasterRef, RasterRefImpl, StorageType}; + +/// Pretty print a raster band to a string with specified precision +pub fn pretty_print_indb( + raster: &RasterRefImpl, + band_number: usize, + precision: usize, // TODO: change this to an optional format string +) -> Result { + let band = raster.bands().band(band_number).unwrap(); + let metadata = raster.metadata(); + let height = metadata.height() as usize; + let width = metadata.width() as usize; + let mut result = String::new(); + + let slice = band.data() as &[u8]; + let data_type = band.metadata().data_type(); + if band.metadata().storage_type() != StorageType::InDb { + return Err(ArrowError::InvalidArgumentError( + "Pretty print indb not supported for non-InDb storage".to_string(), + )); + } + let bytes_per_pixel = bytes_per_pixel(data_type.clone()).unwrap_or(1); + for row in 0..height { + for col in 0..width { + let start = (row * width + col) * bytes_per_pixel; + let end = start + bytes_per_pixel; + let pixel_bytes = &slice[start..end]; + + match read_pixel_value(pixel_bytes, data_type.clone()) { + Ok(value) => result.push_str(&format!("{:8.*} ", precision, value)), + Err(_) => result.push_str(&format!("{:>8} ", "?")), // Well-spaced question mark + } + } + result.push('\n'); + } + + Ok(result) +} + +#[cfg(test)] +mod tests { + use super::*; + use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, + }; + + #[test] + fn test_pretty_print() { + let mut raster_builder = RasterBuilder::new(1); + + let metadata1 = RasterMetadata { + width: 3, + height: 2, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + let band_data_type = BandDataType::Float32; + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: band_data_type.clone(), + outdb_url: None, + outdb_band_id: None, + }; + + raster_builder.start_raster(&metadata1, None, None).unwrap(); + let pixel_values: Vec = vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6111]; + let test_data1: Vec = pixel_values + .iter() + .flat_map(|&val| val.to_le_bytes()) + .collect(); + raster_builder.band_data_writer().append_value(&test_data1); + raster_builder.finish_band(band_metadata.clone()).unwrap(); + raster_builder.finish_raster().unwrap(); + + let raster_struct = raster_builder.finish().unwrap(); + let raster = sedona_schema::datatypes::RasterRefImpl::new(&raster_struct, 0); + + let pretty = pretty_print_indb(&raster, 0, 2).unwrap(); + + let expected = " 1.10 2.20 3.30 \n 4.40 5.50 6.61 \n"; + assert_eq!(pretty, expected); + } +} diff --git a/rust/sedona-raster/src/lib.rs b/rust/sedona-raster/src/lib.rs index 1452ef30..d5e19990 100644 --- a/rust/sedona-raster/src/lib.rs +++ b/rust/sedona-raster/src/lib.rs @@ -1 +1,2 @@ pub mod datatype_functions; +pub mod display_functions; diff --git a/rust/sedona-schema/src/datatypes.rs b/rust/sedona-schema/src/datatypes.rs index 72c027fc..7298af51 100644 --- a/rust/sedona-schema/src/datatypes.rs +++ b/rust/sedona-schema/src/datatypes.rs @@ -1010,8 +1010,9 @@ pub trait BandsRef { fn is_empty(&self) -> bool { self.len() == 0 } - /// Get a specific band by index (returns None if out of bounds) - fn band(&self, index: usize) -> Option>; + /// Get a specific band by number (returns None if out of bounds) + /// By convention, band numbers are 1-based + fn band(&self, number: usize) -> Option>; /// Iterator over all bands fn iter(&self) -> BandIterator<'_>; } @@ -1229,11 +1230,15 @@ impl<'a> BandsRef for BandsRefImpl<'a> { end - start } - /// Get a specific band by index - /// IMPORTANT: This function is utilizing zero based band indexing. - /// We may want to consider one-based indexing to match - /// raster standard band conventions. - fn band(&self, index: usize) -> Option> { + /// Get a specific band by number (1-based index) + fn band(&self, number: usize) -> Option> { + // TODO: Error instead of returning None for out-of-bounds + if number == 0 { + return None; // Band numbers are 1-based + } + // By convention, band numbers are 1-based. + // Convert to zero-based index. + let index = number - 1; if index >= self.len() { return None; } @@ -1275,7 +1280,7 @@ impl<'a> BandsRef for BandsRefImpl<'a> { fn iter(&self) -> BandIterator<'_> { BandIterator { bands: self, - current: 0, + current: 1, // Start at 1 for 1-based band numbering } } } @@ -1290,7 +1295,8 @@ impl<'a> Iterator for BandIterator<'a> { type Item = Box; fn next(&mut self) -> Option { - if self.current < self.bands.len() { + // current is 1-based, compare against len() + 1 + if self.current <= self.bands.len() { let band = self.bands.band(self.current); self.current += 1; band @@ -1300,7 +1306,8 @@ impl<'a> Iterator for BandIterator<'a> { } fn size_hint(&self) -> (usize, Option) { - let remaining = self.bands.len().saturating_sub(self.current); + // current is 1-based, so remaining calculation needs adjustment + let remaining = self.bands.len().saturating_sub(self.current - 1); (remaining, Some(remaining)) } } @@ -1873,7 +1880,8 @@ mod tests { assert_eq!(bands.len(), 1); assert!(!bands.is_empty()); - let band = bands.band(0).unwrap(); + // Access band with 1-based band_number + let band = bands.band(1).unwrap(); assert_eq!(band.data().len(), 100); assert_eq!(band.data()[0], 1u8); @@ -1934,8 +1942,10 @@ mod tests { assert_eq!(bands.len(), 3); // Test each band has different data + // Use 1-based band numbers for i in 0..3 { - let band = bands.band(i).unwrap(); + // Access band with 1-based band_number + let band = bands.band(i + 1).unwrap(); let expected_value = i as u8; assert!(band.data().iter().all(|&x| x == expected_value)); } @@ -2050,7 +2060,7 @@ mod tests { assert_eq!(target_bbox.max_x, -120.0); // But band data and metadata should be different - let target_band = target_raster.bands().band(0).unwrap(); + let target_band = target_raster.bands().band(1).unwrap(); let target_band_meta = target_band.metadata(); assert_eq!(target_band_meta.data_type(), BandDataType::UInt16); assert!(target_band_meta.nodata_value().is_none()); @@ -2104,7 +2114,8 @@ mod tests { assert_eq!(raster_2.metadata().height(), 3); assert_eq!(raster_2.metadata().upper_left_x(), 2.0); - let band = raster_2.bands().band(0).unwrap(); + // Access band data with 1-based band_number + let band = raster_2.bands().band(1).unwrap(); assert_eq!(band.data().len(), 9); assert!(band.data().iter().all(|&x| x == 2u8)); @@ -2373,8 +2384,10 @@ mod tests { BandDataType::Float64, ]; + // i is zero-based index for (i, expected_type) in expected_types.iter().enumerate() { - let band = bands.band(i).unwrap(); + // Bands are 1-based band_number + let band = bands.band(i + 1).unwrap(); let band_metadata = band.metadata(); let actual_type = band_metadata.data_type(); @@ -2442,7 +2455,7 @@ mod tests { assert_eq!(bands.len(), 2); // Test InDb band - let indb_band = bands.band(0).unwrap(); + let indb_band = bands.band(1).unwrap(); let indb_metadata = indb_band.metadata(); assert_eq!(indb_metadata.storage_type(), StorageType::InDb); assert_eq!(indb_metadata.data_type(), BandDataType::UInt8); @@ -2451,7 +2464,7 @@ mod tests { assert_eq!(indb_band.data().len(), 100); // Test OutDbRef band - let outdb_band = bands.band(1).unwrap(); + let outdb_band = bands.band(2).unwrap(); let outdb_metadata = outdb_band.metadata(); assert_eq!(outdb_metadata.storage_type(), StorageType::OutDbRef); assert_eq!(outdb_metadata.data_type(), BandDataType::Float32); From b766f224f5bb9fb736e04eaadc678aa8bf424ced Mon Sep 17 00:00:00 2001 From: jesspav <202656197+jesspav@users.noreply.github.com> Date: Fri, 24 Oct 2025 18:56:45 -0700 Subject: [PATCH 18/18] Add benchmarking --- Cargo.lock | 3 + rust/sedona-functions/Cargo.toml | 1 + .../benches/native-functions.rs | 5 + rust/sedona-functions/src/lib.rs | 1 + rust/sedona-functions/src/register.rs | 1 + rust/sedona-functions/src/rs_asmatrix.rs | 87 +++++--- rust/sedona-gdal/Cargo.toml | 6 + rust/sedona-gdal/benches/gdal-functions.rs | 38 ++++ rust/sedona-gdal/src/lib.rs | 1 + rust/sedona-gdal/src/readers.rs | 33 ++- rust/sedona-gdal/src/register.rs | 13 +- rust/sedona-gdal/src/rs_value.rs | 4 +- rust/sedona-raster/src/display_functions.rs | 31 ++- rust/sedona-schema/src/datatypes.rs | 5 +- rust/sedona-testing/src/benchmark_util.rs | 198 +++++++++++++++++- rust/sedona-testing/src/testers.rs | 35 ++++ 16 files changed, 390 insertions(+), 72 deletions(-) create mode 100644 rust/sedona-gdal/benches/gdal-functions.rs diff --git a/Cargo.lock b/Cargo.lock index 958f1e1f..0c8a27be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4967,6 +4967,7 @@ dependencies = [ "sedona-common", "sedona-expr", "sedona-geometry", + "sedona-raster", "sedona-schema", "sedona-testing", "serde_json", @@ -4982,6 +4983,7 @@ dependencies = [ "arrow", "arrow-array", "arrow-schema", + "criterion", "datafusion-common", "datafusion-expr", "gdal", @@ -4991,6 +4993,7 @@ dependencies = [ "sedona-functions", "sedona-raster", "sedona-schema", + "sedona-testing", ] [[package]] diff --git a/rust/sedona-functions/Cargo.toml b/rust/sedona-functions/Cargo.toml index 8c5f409a..d552d568 100644 --- a/rust/sedona-functions/Cargo.toml +++ b/rust/sedona-functions/Cargo.toml @@ -44,6 +44,7 @@ geo-traits = { workspace = true } sedona-common = { path = "../sedona-common" } sedona-expr = { path = "../sedona-expr" } sedona-geometry = { path = "../sedona-geometry" } +sedona-raster = { path = "../sedona-raster" } sedona-schema = { path = "../sedona-schema" } wkb = { workspace = true } wkt = { workspace = true } diff --git a/rust/sedona-functions/benches/native-functions.rs b/rust/sedona-functions/benches/native-functions.rs index ef5ee81d..4b30e89b 100644 --- a/rust/sedona-functions/benches/native-functions.rs +++ b/rust/sedona-functions/benches/native-functions.rs @@ -21,6 +21,11 @@ use sedona_testing::benchmark_util::{benchmark, BenchmarkArgSpec::*, BenchmarkAr fn criterion_benchmark(c: &mut Criterion) { let f = sedona_functions::register::default_function_set(); + benchmark::scalar(c, &f, "native", "rs_width", Raster(128, 128, 1)); + + let args = BenchmarkArgs::ArrayScalar(Raster(8, 8, 1), Int32(1, 2)); + benchmark::scalar(c, &f, "native", "rs_asmatrix", args); + let st_asbinary: ScalarUDF = f.scalar_udf("st_asbinary").unwrap().clone().into(); let st_astext: ScalarUDF = f.scalar_udf("st_astext").unwrap().clone().into(); diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index c5d4a6a6..a8f68d44 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -21,6 +21,7 @@ mod overlay; mod predicates; mod referencing; pub mod register; +mod rs_asmatrix; mod rs_value; mod rs_width; mod sd_format; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index 7e1b71fa..4a2b5ed9 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -61,6 +61,7 @@ pub fn default_function_set() -> FunctionSet { crate::referencing::st_line_interpolate_point_udf, crate::referencing::st_line_locate_point_udf, crate::rs_width::rs_width_udf, + crate::rs_asmatrix::rs_asmatrix_udf, crate::sd_format::sd_format_udf, crate::st_area::st_area_udf, crate::st_asbinary::st_asbinary_udf, diff --git a/rust/sedona-functions/src/rs_asmatrix.rs b/rust/sedona-functions/src/rs_asmatrix.rs index da13de29..883ae113 100644 --- a/rust/sedona-functions/src/rs_asmatrix.rs +++ b/rust/sedona-functions/src/rs_asmatrix.rs @@ -17,18 +17,16 @@ use std::{sync::Arc, vec}; use crate::executor::RasterExecutor; -use arrow_array::builder::UInt64Builder; +use arrow_array::builder::StringBuilder; +use arrow_schema::ArrowError; use arrow_schema::DataType; -use datafusion_common::error::Result; +use datafusion_common::{error::Result, scalar::ScalarValue}; use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; -use sedona_schema::{ - datatypes::{RasterRef, SedonaType}, - matchers::ArgMatcher, -} -use sedona_raster::display_functions::pretty_print_indb; +use sedona_raster::display_functions::write_band_to_builder; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; /// RS_AsMatrix() scalar UDF implementation /// @@ -62,10 +60,11 @@ struct RsAsMatrix {} impl SedonaScalarKernel for RsAsMatrix { fn return_type(&self, args: &[SedonaType]) -> Result> { let matcher = ArgMatcher::new( - vec![ArgMatcher::is_raster(), - ArgMatcher::is_numeric(), - ArgMatcher::is_optional(ArgMatcher::is_numeric()) - ], + vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_numeric(), + ArgMatcher::optional(ArgMatcher::is_numeric()), + ], SedonaType::Arrow(DataType::Utf8), ); @@ -78,7 +77,10 @@ impl SedonaScalarKernel for RsAsMatrix { args: &[ColumnarValue], ) -> Result { let executor = RasterExecutor::new(arg_types, args); - let mut builder = Utf8Builder::with_capacity(executor.num_iterations()); + let mut builder = StringBuilder::with_capacity( + executor.num_iterations(), + 256 * executor.num_iterations(), + ); let band_number = extract_numeric_scalar(&args[1])? as usize; let precision = if args.len() > 2 { @@ -90,8 +92,11 @@ impl SedonaScalarKernel for RsAsMatrix { executor.execute_raster_void(|_i, raster_opt| { match raster_opt { Some(raster) => { - // TODO: maybe move this into gdal so that we can display outdb rasters too? - builder.append_value(pretty_print_indb(raster, band_number, precision)); + let mut output = String::new(); // TODO: Perf can be improved here + match write_band_to_builder(&raster, band_number, precision, &mut output) { + Ok(_) => builder.append_value(&output), + Err(_) => builder.append_null(), + } } None => builder.append_null(), } @@ -102,10 +107,32 @@ impl SedonaScalarKernel for RsAsMatrix { } } +fn extract_numeric_scalar(arg: &ColumnarValue) -> Result { + if let ColumnarValue::Scalar(scalar) = arg { + match scalar { + ScalarValue::Int8(Some(val)) => Ok(*val as usize), + ScalarValue::Int16(Some(val)) => Ok(*val as usize), + ScalarValue::Int32(Some(val)) => Ok(*val as usize), + ScalarValue::Int64(Some(val)) => Ok(*val as usize), + ScalarValue::UInt8(Some(val)) => Ok(*val as usize), + ScalarValue::UInt16(Some(val)) => Ok(*val as usize), + ScalarValue::UInt32(Some(val)) => Ok(*val as usize), + ScalarValue::UInt64(Some(val)) => Ok(*val as usize), + _ => Err(ArrowError::ParseError( + "Failed to extract numeric scalar: unsupported type or null value".to_string(), + )), + } + } else { + Err(ArrowError::ParseError( + "Failed to extract scalar value: expected scalar, got array".to_string(), + )) + } +} + #[cfg(test)] mod tests { use super::*; - use arrow_array::{Array, ArrayRef, Utf8Array}; + use arrow_array::{Array, ArrayRef, StringArray}; use datafusion_expr::ScalarUDF; use sedona_schema::datatypes::{ BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, RASTER, @@ -123,24 +150,28 @@ mod tests { let raster_array = create_test_raster_array(); let kernel = RsAsMatrix {}; - let args = vec![ColumnarValue::Array(raster_array), - ColumnarValue::Scalar(ScalarValue::from(1usize)), - ColumnarValue::Scalar(ScalarValue::from(2usize)) + let args = vec![ + ColumnarValue::Array(raster_array), + ColumnarValue::Scalar(ScalarValue::from(1u64)), + ]; + let arg_types = vec![ + RASTER, + SedonaType::Arrow(DataType::UInt64), + SedonaType::Arrow(DataType::Utf8), ]; - let arg_types = vec![RASTER, SedonaType::Arrow(DataType::UInt64), SedonaType::Arrow(DataType::UInt64)]; let result = kernel.invoke_batch(&arg_types, &args).unwrap(); - let expected_first = " 1.00 1.00 1.00 \n 1.00 1.00 1.00 \n"; - let expected_third = " 3.00 3.00 \n 3.00 3.00 \n 3.00 3.00 \n"; + let expected_first = "1.000000 1.000000 1.000000 \n1.000000 1.000000 1.000000 \n"; + let expected_third = "3.000000 3.000000 \n3.000000 3.000000 \n3.000000 3.000000 \n"; if let ColumnarValue::Array(result_array) = result { - let width_array = result_array.as_any().downcast_ref::().unwrap(); + let string_array = result_array.as_any().downcast_ref::().unwrap(); - assert_eq!(width_array.len(), 3); - assert_eq!(width_array.value(0), expected_first); - assert!(width_array.is_null(1)); // Second raster is null - assert_eq!(width_array.value(2), expected_third); + assert_eq!(string_array.len(), 3); + assert_eq!(string_array.value(0), expected_first); + assert!(string_array.is_null(1)); // Second raster is null + assert_eq!(string_array.value(2), expected_third); } else { panic!("Expected array result"); } @@ -174,7 +205,7 @@ mod tests { }; builder.start_raster(&metadata1, None, None).unwrap(); - let test_data1 = vec![1u8; 3*2]; // width * height + let test_data1 = vec![1u8; 3 * 2]; // width * height builder.band_data_writer().append_value(&test_data1); builder.finish_band(band_metadata.clone()).unwrap(); builder.finish_raster().unwrap(); @@ -196,7 +227,7 @@ mod tests { }; builder.start_raster(&metadata3, None, None).unwrap(); - let test_data3 = vec![3u8; 2*3]; // width * height + let test_data3 = vec![3u8; 2 * 3]; // width * height builder.band_data_writer().append_value(&test_data3); builder.finish_band(band_metadata).unwrap(); builder.finish_raster().unwrap(); diff --git a/rust/sedona-gdal/Cargo.toml b/rust/sedona-gdal/Cargo.toml index 52730d55..5fe0618d 100644 --- a/rust/sedona-gdal/Cargo.toml +++ b/rust/sedona-gdal/Cargo.toml @@ -13,6 +13,8 @@ result_large_err = "allow" [dev-dependencies] rstest = { workspace = true } +sedona-testing = { path = "../../rust/sedona-testing", features = ["criterion"] } +criterion = { workspace = true} [dependencies] arrow = { workspace = true } @@ -26,3 +28,7 @@ sedona-expr = { path = "../sedona-expr" } sedona-functions = { path = "../sedona-functions" } sedona-raster = { path = "../sedona-raster" } sedona-schema = { path = "../sedona-schema" } + +[[bench]] +name = "gdal-functions" +harness = false \ No newline at end of file diff --git a/rust/sedona-gdal/benches/gdal-functions.rs b/rust/sedona-gdal/benches/gdal-functions.rs new file mode 100644 index 00000000..b736aec2 --- /dev/null +++ b/rust/sedona-gdal/benches/gdal-functions.rs @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use criterion::{criterion_group, criterion_main, Criterion}; +use sedona_expr::function_set::FunctionSet; +use sedona_testing::benchmark_util::{benchmark, BenchmarkArgSpec::*, BenchmarkArgs}; + +fn criterion_benchmark(c: &mut Criterion) { + let mut f = FunctionSet::new(); + for (name, kernel) in sedona_gdal::register::scalar_kernels() { + f.add_scalar_udf_kernel(name, kernel).unwrap(); + } + + let args = BenchmarkArgs::ArrayScalarScalarScalar( + Raster(128, 128, 1), + Int32(0, 127), + Int32(0, 127), + Int32(1, 2), + ); + + benchmark::scalar(c, &f, "sedona-gdal", "rs_value", args); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/rust/sedona-gdal/src/lib.rs b/rust/sedona-gdal/src/lib.rs index d1e637ca..7b860b10 100644 --- a/rust/sedona-gdal/src/lib.rs +++ b/rust/sedona-gdal/src/lib.rs @@ -17,4 +17,5 @@ pub mod dataset; pub mod readers; +pub mod register; pub mod rs_value; diff --git a/rust/sedona-gdal/src/readers.rs b/rust/sedona-gdal/src/readers.rs index dccb14d5..898f6a57 100644 --- a/rust/sedona-gdal/src/readers.rs +++ b/rust/sedona-gdal/src/readers.rs @@ -26,14 +26,13 @@ use sedona_schema::datatypes::{ }; use std::sync::Arc; -pub fn read_raster( - filepath: &str, -) -> Result, ArrowError> { +pub fn read_raster(filepath: &str) -> Result, ArrowError> { let dataset = Dataset::open(filepath.to_string()) .map_err(|err| ArrowError::ParseError(err.to_string()))?; // Extract geotransform components - let (origin_x, origin_y, pixel_width, pixel_height, rotation_x, rotation_y) = geotransform_components(&dataset)?; + let (origin_x, origin_y, pixel_width, pixel_height, rotation_x, rotation_y) = + geotransform_components(&dataset)?; let (raster_width, raster_height) = dataset.raster_size(); @@ -158,18 +157,18 @@ mod tests { #[test] fn test_load_raster() { - // TODO: Add proper tests here. - // To load a raster and view contents - // for prototyping fun: - // - // let filepath = "/test1.tiff"; - // let result = read_raster(filepath); - // assert!(result.is_ok()); - // - // To view loaded raster: - // let raster_array = result.unwrap(); - // for raster in raster_iterator(&raster_array) { - // println!("{}", pretty_print_indb(&raster, 1, 2).unwrap()); - // } + // TODO: Add proper tests here. + // To load a raster and view contents + // for prototyping fun: + // + // let filepath = "/test1.tiff"; + // let result = read_raster(filepath); + // assert!(result.is_ok()); + // + // To view loaded raster: + // let raster_array = result.unwrap(); + // for raster in raster_iterator(&raster_array) { + // println!("{}", pretty_print_indb(&raster, 1, 2).unwrap()); + // } } } diff --git a/rust/sedona-gdal/src/register.rs b/rust/sedona-gdal/src/register.rs index ef8087e4..201c5b8f 100644 --- a/rust/sedona-gdal/src/register.rs +++ b/rust/sedona-gdal/src/register.rs @@ -17,17 +17,12 @@ use sedona_expr::aggregate_udf::SedonaAccumulatorRef; use sedona_expr::scalar_udf::ScalarKernelRef; -use crate::{ - rs_value::rs_value_impl, -}; +use crate::rs_value::rs_value_impl; pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> { - vec![ - ("rs_value", rs_value_impl()), - ] + vec![("rs_value", rs_value_impl())] } pub fn aggregate_kernels() -> Vec<(&'static str, SedonaAccumulatorRef)> { - vec![ - ] -} \ No newline at end of file + vec![] +} diff --git a/rust/sedona-gdal/src/rs_value.rs b/rust/sedona-gdal/src/rs_value.rs index 8e1702ce..45b41588 100644 --- a/rust/sedona-gdal/src/rs_value.rs +++ b/rust/sedona-gdal/src/rs_value.rs @@ -16,6 +16,7 @@ // under the License. use std::sync::Arc; +use crate::dataset::outdb_dataset; use arrow_array::builder::Float64Builder; use arrow_schema::{ArrowError, DataType}; use datafusion_common::{error::Result, scalar::ScalarValue}; @@ -24,7 +25,6 @@ use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::RasterExecutor; use sedona_raster::datatype_functions::{bytes_per_pixel, read_pixel_value}; use sedona_schema::datatypes::{BandMetadataRef, BandRef, RasterRef, SedonaType, StorageType}; -use crate::dataset::outdb_dataset; /// RS_Value() implementation pub fn rs_value_impl() -> ScalarKernelRef { @@ -147,8 +147,6 @@ fn outdb_pixel( _width: usize, _height: usize, ) -> Result { - - let dataset = outdb_dataset(metadata)?; let band_number = match metadata.outdb_band_id() { diff --git a/rust/sedona-raster/src/display_functions.rs b/rust/sedona-raster/src/display_functions.rs index 501af256..450a99d9 100644 --- a/rust/sedona-raster/src/display_functions.rs +++ b/rust/sedona-raster/src/display_functions.rs @@ -18,18 +18,19 @@ use crate::datatype_functions::{bytes_per_pixel, read_pixel_value}; use arrow::error::ArrowError; use sedona_schema::datatypes::{RasterRef, RasterRefImpl, StorageType}; +use std::fmt::Write; -/// Pretty print a raster band to a string with specified precision -pub fn pretty_print_indb( +/// Write raster band matrix directly to a StringBuilder with specified precision +pub fn write_band_to_builder( raster: &RasterRefImpl, band_number: usize, - precision: usize, // TODO: change this to an optional format string -) -> Result { + precision: usize, + out: &mut impl Write, +) -> Result<(), ArrowError> { let band = raster.bands().band(band_number).unwrap(); let metadata = raster.metadata(); let height = metadata.height() as usize; let width = metadata.width() as usize; - let mut result = String::new(); let slice = band.data() as &[u8]; let data_type = band.metadata().data_type(); @@ -39,6 +40,7 @@ pub fn pretty_print_indb( )); } let bytes_per_pixel = bytes_per_pixel(data_type.clone()).unwrap_or(1); + for row in 0..height { for col in 0..width { let start = (row * width + col) * bytes_per_pixel; @@ -46,19 +48,23 @@ pub fn pretty_print_indb( let pixel_bytes = &slice[start..end]; match read_pixel_value(pixel_bytes, data_type.clone()) { - Ok(value) => result.push_str(&format!("{:8.*} ", precision, value)), - Err(_) => result.push_str(&format!("{:>8} ", "?")), // Well-spaced question mark + Ok(value) => { + out.write_fmt(format_args!("{:8.*} ", precision, value)) + .unwrap(); + } + Err(_) => out.write_fmt(format_args!("{:>8} ", "?")).unwrap(), } } - result.push('\n'); + out.write_fmt(format_args!("\n")).unwrap(); } - Ok(result) + Ok(()) } #[cfg(test)] mod tests { use super::*; + use arrow::array::StringBuilder; use sedona_schema::datatypes::{ BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, }; @@ -101,9 +107,12 @@ mod tests { let raster_struct = raster_builder.finish().unwrap(); let raster = sedona_schema::datatypes::RasterRefImpl::new(&raster_struct, 0); - let pretty = pretty_print_indb(&raster, 0, 2).unwrap(); + let builder = StringBuilder::new(); + write_band_to_builder(&raster, 1, 2, &mut builder).unwrap(); + let binding = builder.finish(); + let result = binding.value(0); let expected = " 1.10 2.20 3.30 \n 4.40 5.50 6.61 \n"; - assert_eq!(pretty, expected); + assert_eq!(result, expected); } } diff --git a/rust/sedona-schema/src/datatypes.rs b/rust/sedona-schema/src/datatypes.rs index 7298af51..e99912d0 100644 --- a/rust/sedona-schema/src/datatypes.rs +++ b/rust/sedona-schema/src/datatypes.rs @@ -104,9 +104,12 @@ impl SedonaType { /// Given an [`ExtensionType`], construct a SedonaType pub fn from_extension_type(extension: ExtensionType) -> Result { - let (edges, crs) = deserialize_edges_and_crs(&extension.extension_metadata)?; if extension.extension_name == "geoarrow.wkb" { + let (edges, crs) = deserialize_edges_and_crs(&extension.extension_metadata)?; sedona_type_wkb(edges, crs, extension.storage_type) + } else if extension.extension_name == "sedona.raster" { + // For raster extension types, return the RASTER constant + Ok(RASTER) } else { sedona_internal_err!( "Extension type not implemented: <{}>:{}", diff --git a/rust/sedona-testing/src/benchmark_util.rs b/rust/sedona-testing/src/benchmark_util.rs index 6b55848d..8ee45b10 100644 --- a/rust/sedona-testing/src/benchmark_util.rs +++ b/rust/sedona-testing/src/benchmark_util.rs @@ -26,7 +26,7 @@ use rand::{distributions::Uniform, rngs::StdRng, Rng, SeedableRng}; use sedona_common::sedona_internal_err; use sedona_geometry::types::GeometryTypeId; -use sedona_schema::datatypes::{SedonaType, WKB_GEOMETRY}; +use sedona_schema::datatypes::{SedonaType, RASTER, WKB_GEOMETRY}; use crate::{ datagen::RandomPartitionedDataBuilder, @@ -169,6 +169,13 @@ pub enum BenchmarkArgs { ArrayArray(BenchmarkArgSpec, BenchmarkArgSpec), /// Invoke a function with an array and two scalar inputs ArrayScalarScalar(BenchmarkArgSpec, BenchmarkArgSpec, BenchmarkArgSpec), + /// Invoke a function with an array and three scalar inputs + ArrayScalarScalarScalar( + BenchmarkArgSpec, + BenchmarkArgSpec, + BenchmarkArgSpec, + BenchmarkArgSpec, + ), /// Invoke a ternary function with two arrays and a scalar ArrayArrayScalar(BenchmarkArgSpec, BenchmarkArgSpec, BenchmarkArgSpec), /// Invoke a ternary function with three arrays @@ -204,7 +211,8 @@ impl BenchmarkArgs { | BenchmarkArgs::ArrayArrayArrayArray(_, _, _, _) => self.specs(), BenchmarkArgs::ScalarArray(_, col) | BenchmarkArgs::ArrayScalar(col, _) - | BenchmarkArgs::ArrayScalarScalar(col, _, _) => { + | BenchmarkArgs::ArrayScalarScalar(col, _, _) + | BenchmarkArgs::ArrayScalarScalarScalar(col, _, _, _) => { vec![col.clone()] } }; @@ -217,6 +225,9 @@ impl BenchmarkArgs { BenchmarkArgs::ArrayScalarScalar(_, col0, col1) => { vec![col0.clone(), col1.clone()] } + BenchmarkArgs::ArrayScalarScalarScalar(_, col0, col1, col2) => { + vec![col0.clone(), col1.clone(), col2.clone()] + } _ => vec![], }; @@ -253,7 +264,8 @@ impl BenchmarkArgs { | BenchmarkArgs::ArrayArrayArray(col0, col1, col2) => { vec![col0.clone(), col1.clone(), col2.clone()] } - BenchmarkArgs::ArrayArrayArrayArray(col0, col1, col2, col3) => { + BenchmarkArgs::ArrayArrayArrayArray(col0, col1, col2, col3) + | BenchmarkArgs::ArrayScalarScalarScalar(col0, col1, col2, col3) => { vec![col0.clone(), col1.clone(), col2.clone(), col3.clone()] } } @@ -276,11 +288,16 @@ pub enum BenchmarkArgSpec { MultiPoint(usize), /// Randomly generated floating point input with a given range of values Float64(f64, f64), + /// Randomly generated integer input with a given range of values + Int32(i32, i32), /// A transformation of any of the above based on a [ScalarUDF] accepting /// a single argument Transformed(Box, ScalarUDF), /// A string that will be a constant String(String), + /// Randomly generated raster input with a specified width, height and number + // of bands. + Raster(usize, usize, usize), } // Custom implementation of Debug because otherwise the output of Transformed() @@ -293,8 +310,15 @@ impl Debug for BenchmarkArgSpec { Self::Polygon(arg0) => f.debug_tuple("Polygon").field(arg0).finish(), Self::MultiPoint(arg0) => f.debug_tuple("MultiPoint").field(arg0).finish(), Self::Float64(arg0, arg1) => f.debug_tuple("Float64").field(arg0).field(arg1).finish(), + Self::Int32(arg0, arg1) => f.debug_tuple("Int32").field(arg0).field(arg1).finish(), Self::Transformed(inner, t) => write!(f, "{}({:?})", t.name(), inner), Self::String(s) => write!(f, "String({s})"), + Self::Raster(width, height, bands) => f + .debug_tuple("Raster") + .field(width) + .field(height) + .field(bands) + .finish(), } } } @@ -308,11 +332,13 @@ impl BenchmarkArgSpec { | BenchmarkArgSpec::LineString(_) | BenchmarkArgSpec::MultiPoint(_) => WKB_GEOMETRY, BenchmarkArgSpec::Float64(_, _) => SedonaType::Arrow(DataType::Float64), + BenchmarkArgSpec::Int32(_, _) => SedonaType::Arrow(DataType::Int32), BenchmarkArgSpec::Transformed(inner, t) => { let tester = ScalarUdfTester::new(t.clone(), vec![inner.sedona_type()]); tester.return_type().unwrap() } BenchmarkArgSpec::String(_) => SedonaType::Arrow(DataType::Utf8), + BenchmarkArgSpec::Raster(_, _, _) => RASTER, } } @@ -374,6 +400,17 @@ impl BenchmarkArgSpec { }) .collect() } + BenchmarkArgSpec::Int32(lo, hi) => { + let mut rng = self.rng(i); + let dist = Uniform::new(*lo, *hi); + (0..num_batches) + .map(|_| -> Result { + let int32_array: arrow_array::Int32Array = + (0..rows_per_batch).map(|_| rng.sample(dist)).collect(); + Ok(Arc::new(int32_array)) + }) + .collect() + } BenchmarkArgSpec::Transformed(inner, t) => { let inner_type = inner.sedona_type(); let inner_arrays = inner.build_arrays(i, num_batches, rows_per_batch)?; @@ -394,6 +431,15 @@ impl BenchmarkArgSpec { .collect::>>()?; Ok(string_array) } + BenchmarkArgSpec::Raster(width, height, band_count) => self.build_raster( + i, + num_batches, + rows_per_batch, + width, + height, + band_count, + rows_per_batch, + ), } } @@ -426,6 +472,72 @@ impl BenchmarkArgSpec { .collect() } + fn build_raster( + &self, + i: usize, + num_batches: usize, + rows_per_batch: usize, + width: &usize, + height: &usize, + band_count: &usize, + _rows_per_raster: usize, + ) -> Result> { + use rand::distributions::Standard; + use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, + }; + + let mut rng = self.rng(i); + let mut batches = Vec::with_capacity(num_batches); + + for _ in 0..num_batches { + let mut builder = RasterBuilder::new(rows_per_batch); + + for _ in 0..rows_per_batch { + // Keep metadata constant across all rasters + let metadata = RasterMetadata { + width: *width as u64, + height: *height as u64, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata, None, None)?; + + // Generate random data for each band + for _ in 0..*band_count { + let band_metadata = BandMetadata { + nodata_value: None, + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, // Use UInt8 for simplicity + outdb_url: None, + outdb_band_id: None, + }; + + // Generate random pixel data + let num_pixels = width * height; + let pixel_data: Vec = + (0..num_pixels).map(|_| rng.sample(Standard)).collect(); + + builder.band_data_writer().append_value(&pixel_data); + builder.finish_band(band_metadata)?; + } + + builder.finish_raster()?; + } + + let struct_array = builder.finish()?; + batches.push(Arc::new(struct_array) as ArrayRef); + } + + Ok(batches) + } + fn rng(&self, i: usize) -> impl Rng { StdRng::seed_from_u64(42 + i as u64) } @@ -488,6 +600,19 @@ impl BenchmarkData { )?; } } + BenchmarkArgs::ArrayScalarScalarScalar(_, _, _, _) => { + let scalar0 = &self.scalars[0]; + let scalar1 = &self.scalars[1]; + let scalar2 = &self.scalars[2]; + for i in 0..self.num_batches { + tester.invoke_array_scalar_scalar_scalar( + self.arrays[0][i].clone(), + scalar0.clone(), + scalar1.clone(), + scalar2.clone(), + )?; + } + } BenchmarkArgs::ArrayArrayScalar(_, _, _) => { for i in 0..self.num_batches { tester.invoke_array_array_scalar( @@ -701,6 +826,35 @@ mod test { assert_eq!(data.scalars[0].data_type(), DataType::Float64); } + #[test] + fn args_array_scalar_scalar_scalar() { + let spec = BenchmarkArgs::ArrayScalarScalarScalar( + BenchmarkArgSpec::Point, + BenchmarkArgSpec::Float64(1.0, 2.0), + BenchmarkArgSpec::String("test".to_string()), + BenchmarkArgSpec::Int32(1, 10), + ); + assert_eq!( + spec.sedona_types(), + [ + WKB_GEOMETRY, + SedonaType::Arrow(DataType::Float64), + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int32) + ] + ); + + let data = spec.build_data(2, ROWS_PER_BATCH).unwrap(); + assert_eq!(data.num_batches, 2); + assert_eq!(data.arrays.len(), 1); + assert_eq!(data.scalars.len(), 3); + assert_eq!(data.arrays[0].len(), 2); + assert_eq!(WKB_GEOMETRY.storage_type(), data.arrays[0][0].data_type()); + assert_eq!(data.scalars[0].data_type(), DataType::Float64); + assert_eq!(data.scalars[1].data_type(), DataType::Utf8); + assert_eq!(data.scalars[2].data_type(), DataType::Int32); + } + #[test] fn args_scalar_array() { let spec = BenchmarkArgs::ScalarArray( @@ -857,4 +1011,42 @@ mod test { assert_eq!(data.arrays[3].len(), 2); assert_eq!(data.arrays[3][0].data_type(), &DataType::Float64); } + + #[test] + fn test_raster_generation() { + use sedona_schema::datatypes::{raster_iterator, RasterRef, RASTER}; + + let spec = BenchmarkArgs::Array(BenchmarkArgSpec::Raster(10, 5, 3)); + + assert_eq!(spec.sedona_types(), [RASTER]); + + let data = spec.build_data(2, 4).unwrap(); // 2 batches, 4 rasters per batch + assert_eq!(data.num_batches, 2); + assert_eq!(data.arrays.len(), 1); + assert_eq!(data.scalars.len(), 0); + assert_eq!(data.arrays[0].len(), 2); // 2 batches + + // Check that it's a raster type + assert_eq!(data.arrays[0][0].data_type(), RASTER.storage_type()); + + // Check the first batch has the right structure + let first_batch = data.arrays[0][0].clone(); + let raster_array = first_batch + .as_any() + .downcast_ref::() + .unwrap(); + + let iterator = raster_iterator(raster_array); + assert_eq!(iterator.len(), 4); // 4 rasters per batch + + // Check first raster metadata + let first_raster = iterator.get(0).unwrap(); + assert_eq!(first_raster.metadata().width(), 10); + assert_eq!(first_raster.metadata().height(), 5); + assert_eq!(first_raster.bands().len(), 3); + + // Check that band data exists and has the right size + let first_band = first_raster.bands().band(1).unwrap(); + assert_eq!(first_band.data().len(), 50); // 10 * 5 pixels + } } diff --git a/rust/sedona-testing/src/testers.rs b/rust/sedona-testing/src/testers.rs index a97bbeee..16113d71 100644 --- a/rust/sedona-testing/src/testers.rs +++ b/rust/sedona-testing/src/testers.rs @@ -296,6 +296,17 @@ impl ScalarUdfTester { self.invoke_arrays_scalar_scalar(vec![array], arg0, arg1) } + /// Invoke a binary function with an array and three scalars + pub fn invoke_array_scalar_scalar_scalar( + &self, + array: ArrayRef, + arg0: impl Literal, + arg1: impl Literal, + arg2: impl Literal, + ) -> Result { + self.invoke_arrays_scalar_scalar_scalar(vec![array], arg0, arg1, arg2) + } + /// Invoke a binary function with a scalar and an array pub fn invoke_scalar_array(&self, arg: impl Literal, array: ArrayRef) -> Result { self.invoke_scalar_arrays(arg, vec![array]) @@ -370,6 +381,30 @@ impl ScalarUdfTester { } } + fn invoke_arrays_scalar_scalar_scalar( + &self, + arrays: Vec, + arg0: impl Literal, + arg1: impl Literal, + arg2: impl Literal, + ) -> Result { + let mut args = zip(arrays, &self.arg_types) + .map(|(array, sedona_type)| { + ColumnarValue::Array(array).cast_to(sedona_type.storage_type(), None) + }) + .collect::>>()?; + let index = args.len(); + args.push(Self::scalar_arg(arg0, &self.arg_types[index])?); + args.push(Self::scalar_arg(arg1, &self.arg_types[index + 1])?); + args.push(Self::scalar_arg(arg2, &self.arg_types[index + 2])?); + + if let ColumnarValue::Array(array) = self.invoke(args)? { + Ok(array) + } else { + sedona_internal_err!("Expected array result from array/scalar invoke") + } + } + // Invoke a function with a set of arrays pub fn invoke_arrays(&self, arrays: Vec) -> Result { let args = zip(arrays, &self.arg_types)