diff --git a/Cargo.lock b/Cargo.lock index 7eca687c..0c8a27be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -950,22 +950,25 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.71.1" +version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools 0.13.0", + "itertools 0.11.0", + "lazy_static", + "lazycell", "log", "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", "syn 2.0.106", + "which", ] [[package]] @@ -983,7 +986,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 2.1.1", "shlex", "syn 2.0.106", ] @@ -1471,7 +1474,7 @@ dependencies = [ "crossterm_winapi", "document-features", "parking_lot", - "rustix", + "rustix 1.1.2", "winapi", ] @@ -2404,7 +2407,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", - "rustix", + "rustix 1.1.2", "windows-sys 0.59.0", ] @@ -2581,6 +2584,34 @@ dependencies = [ "slab", ] +[[package]] +name = "gdal" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82ab834e8be6b54fee3d0141fce5e776ad405add1f9d0da054281926e0d35a9f" +dependencies = [ + "bitflags", + "chrono", + "gdal-sys", + "geo-types", + "libc", + "once_cell", + "semver", + "thiserror 1.0.69", +] + +[[package]] +name = "gdal-sys" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18ad5d608ee6726efcf6e1d91261eb6dec7da3ee7db6bda984cdfb8a7d65ebf9" +dependencies = [ + "bindgen 0.69.5", + "libc", + "pkg-config", + "semver", +] + [[package]] name = "generational-arena" version = "0.2.9" @@ -3321,6 +3352,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "lexical-core" version = "1.0.6" @@ -3489,6 +3532,12 @@ dependencies = [ "cc", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -4209,7 +4258,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "socket2", "thiserror 2.0.17", @@ -4229,7 +4278,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "rustls-pki-types", "slab", @@ -4572,6 +4621,12 @@ version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" @@ -4587,6 +4642,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + [[package]] name = "rustix" version = "1.1.2" @@ -4596,7 +4664,7 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.11.0", "windows-sys 0.61.2", ] @@ -4899,6 +4967,7 @@ dependencies = [ "sedona-common", "sedona-expr", "sedona-geometry", + "sedona-raster", "sedona-schema", "sedona-testing", "serde_json", @@ -4907,6 +4976,26 @@ dependencies = [ "wkt 0.14.0", ] +[[package]] +name = "sedona-gdal" +version = "0.2.0" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "criterion", + "datafusion-common", + "datafusion-expr", + "gdal", + "gdal-sys", + "rstest", + "sedona-expr", + "sedona-functions", + "sedona-raster", + "sedona-schema", + "sedona-testing", +] + [[package]] name = "sedona-geo" version = "0.2.0" @@ -5087,6 +5176,16 @@ dependencies = [ "wkb", ] +[[package]] +name = "sedona-raster" +version = "0.2.0" +dependencies = [ + "arrow", + "arrow-schema", + "rstest", + "sedona-schema", +] + [[package]] name = "sedona-s2geography" version = "0.2.0" @@ -5113,6 +5212,7 @@ dependencies = [ name = "sedona-schema" version = "0.2.0" dependencies = [ + "arrow", "arrow-array", "arrow-schema", "datafusion-common", @@ -5584,7 +5684,7 @@ dependencies = [ "fastrand", "getrandom 0.3.3", "once_cell", - "rustix", + "rustix 1.1.2", "windows-sys 0.61.2", ] @@ -6185,6 +6285,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.44", +] + [[package]] name = "winapi" version = "0.3.9" @@ -6504,7 +6616,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix", + "rustix 1.1.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index f314e539..00c9a898 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,9 +27,11 @@ members = [ "rust/sedona-adbc", "rust/sedona-expr", "rust/sedona-functions", + "rust/sedona-gdal", "rust/sedona-geo", "rust/sedona-geometry", "rust/sedona-geoparquet", + "rust/sedona-raster", "rust/sedona-schema", "rust/sedona-spatial-join", "rust/sedona-testing", @@ -90,6 +92,9 @@ mimalloc = { version = "0.1", default-features = false } libmimalloc-sys = { version = "0.1", default-features = false } once_cell = "1.20" +gdal = { version = "0.17", features = ["bindgen"] } +gdal-sys = { version = "0.10", features = ["bindgen"] } + geos = { version = "10.0.0", features = ["geo", "v3_10_0"] } geo-types = "0.7.17" diff --git a/c/sedona-geoarrow-c/src/geoarrow_c.rs b/c/sedona-geoarrow-c/src/geoarrow_c.rs index 67b2608b..a0d3006f 100644 --- a/c/sedona-geoarrow-c/src/geoarrow_c.rs +++ b/c/sedona-geoarrow-c/src/geoarrow_c.rs @@ -281,6 +281,11 @@ fn geoarrow_type_id(sedona_type: &SedonaType) -> Result { + return Err(GeoArrowCError::Invalid( + "GeoArrow type ID not defined for Raster types".to_string(), + )); + } }; Ok(type_id) diff --git a/rust/sedona-functions/Cargo.toml b/rust/sedona-functions/Cargo.toml index 8c5f409a..d552d568 100644 --- a/rust/sedona-functions/Cargo.toml +++ b/rust/sedona-functions/Cargo.toml @@ -44,6 +44,7 @@ geo-traits = { workspace = true } sedona-common = { path = "../sedona-common" } sedona-expr = { path = "../sedona-expr" } sedona-geometry = { path = "../sedona-geometry" } +sedona-raster = { path = "../sedona-raster" } sedona-schema = { path = "../sedona-schema" } wkb = { workspace = true } wkt = { workspace = true } diff --git a/rust/sedona-functions/benches/native-functions.rs b/rust/sedona-functions/benches/native-functions.rs index ef5ee81d..4b30e89b 100644 --- a/rust/sedona-functions/benches/native-functions.rs +++ b/rust/sedona-functions/benches/native-functions.rs @@ -21,6 +21,11 @@ use sedona_testing::benchmark_util::{benchmark, BenchmarkArgSpec::*, BenchmarkAr fn criterion_benchmark(c: &mut Criterion) { let f = sedona_functions::register::default_function_set(); + benchmark::scalar(c, &f, "native", "rs_width", Raster(128, 128, 1)); + + let args = BenchmarkArgs::ArrayScalar(Raster(8, 8, 1), Int32(1, 2)); + benchmark::scalar(c, &f, "native", "rs_asmatrix", args); + let st_asbinary: ScalarUDF = f.scalar_udf("st_asbinary").unwrap().clone().into(); let st_astext: ScalarUDF = f.scalar_udf("st_astext").unwrap().clone().into(); diff --git a/rust/sedona-functions/src/executor.rs b/rust/sedona-functions/src/executor.rs index 0a98e71d..200364cd 100644 --- a/rust/sedona-functions/src/executor.rs +++ b/rust/sedona-functions/src/executor.rs @@ -16,14 +16,14 @@ // under the License. use std::iter::zip; -use arrow_array::ArrayRef; +use arrow_array::{Array, ArrayRef, StructArray}; use arrow_schema::DataType; use datafusion_common::cast::{as_binary_array, as_binary_view_array}; use datafusion_common::error::Result; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::ColumnarValue; use sedona_common::sedona_internal_err; -use sedona_schema::datatypes::SedonaType; +use sedona_schema::datatypes::{raster_iterator, SedonaType}; use wkb::reader::Wkb; /// Helper for writing general kernel implementations with geometry @@ -75,6 +75,116 @@ pub struct GenericExecutor<'a, 'b, Factory0, Factory1> { /// Alias for an executor that iterates over geometries as [Wkb] pub type WkbExecutor<'a, 'b> = GenericExecutor<'a, 'b, WkbGeometryFactory, WkbGeometryFactory>; +/// Helper for writing raster kernel implementations +/// +/// The [RasterExecutor] provides a simplified interface for executing functions +/// on raster arrays, handling the common pattern of downcasting to StructArray, +/// creating raster iterators, and handling null values. +pub struct RasterExecutor<'a, 'b> { + pub arg_types: &'a [SedonaType], + pub args: &'b [ColumnarValue], + num_iterations: usize, +} + +impl<'a, 'b> RasterExecutor<'a, 'b> { + /// Create a new [RasterExecutor] + pub fn new(arg_types: &'a [SedonaType], args: &'b [ColumnarValue]) -> Self { + Self { + arg_types, + args, + num_iterations: Self::calc_num_iterations(args), + } + } + + /// Return the number of iterations that will be performed + pub fn num_iterations(&self) -> usize { + self.num_iterations + } + + /// Execute a function by iterating over rasters in the first argument + /// + /// This handles the common pattern of: + /// 1. Downcasting array to StructArray + /// 2. Creating raster iterator + /// 3. Iterating with null checks + /// 4. Calling the provided function with each raster + pub fn execute_raster_void(&self, mut func: F) -> Result<()> + where + F: FnMut(usize, Option>) -> Result<()>, + { + let raster_array = match &self.args[0] { + ColumnarValue::Array(array) => array, + ColumnarValue::Scalar(_) => { + return Err(DataFusionError::NotImplemented( + "Scalar raster input not yet supported".to_string(), + )); + } + }; + + // Downcast to StructArray (rasters are stored as structs) + let raster_struct = raster_array + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Internal("Expected StructArray for raster data".to_string()) + })?; + + // Create raster iterator + let iterator = raster_iterator(raster_struct); + + // Iterate through each raster in the array + for i in 0..self.num_iterations { + if raster_struct.is_null(i) { + func(i, None)?; + } else { + // Get the raster at this index + let raster = iterator.get(i).ok_or_else(|| { + DataFusionError::Internal(format!("Failed to get raster at index {}", i)) + })?; + func(i, Some(raster))?; + } + } + + Ok(()) + } + + /// Finish an [ArrayRef] output as the appropriate [ColumnarValue] + /// + /// Converts the output into a [ColumnarValue::Scalar] if all arguments were scalars, + /// or a [ColumnarValue::Array] otherwise. + pub fn finish(&self, out: ArrayRef) -> Result { + for arg in self.args { + match arg { + // If any argument was an array, we return an array + ColumnarValue::Array(_) => { + return Ok(ColumnarValue::Array(out)); + } + ColumnarValue::Scalar(_) => {} + } + } + + // For all scalar arguments, we return a scalar + Ok(ColumnarValue::Scalar(ScalarValue::try_from_array(&out, 0)?)) + } + + /// Calculates the number of iterations that should happen based on the + /// argument ColumnarValue types + fn calc_num_iterations(args: &[ColumnarValue]) -> usize { + for arg in args { + match arg { + // If any argument is an array, we have to iterate array.len() times + ColumnarValue::Array(array) => { + return array.len(); + } + ColumnarValue::Scalar(_) => {} + } + } + + // All scalars: we iterate once + 1 + } +} + impl<'a, 'b, Factory0: GeometryFactory, Factory1: GeometryFactory> GenericExecutor<'a, 'b, Factory0, Factory1> { diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 6c144f7c..a8f68d44 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -21,6 +21,9 @@ mod overlay; mod predicates; mod referencing; pub mod register; +mod rs_asmatrix; +mod rs_value; +mod rs_width; mod sd_format; pub mod st_analyze_aggr; mod st_area; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index b08338ca..4a2b5ed9 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -60,6 +60,8 @@ pub fn default_function_set() -> FunctionSet { crate::predicates::st_within_udf, crate::referencing::st_line_interpolate_point_udf, crate::referencing::st_line_locate_point_udf, + crate::rs_width::rs_width_udf, + crate::rs_asmatrix::rs_asmatrix_udf, crate::sd_format::sd_format_udf, crate::st_area::st_area_udf, crate::st_asbinary::st_asbinary_udf, @@ -127,6 +129,7 @@ pub mod stubs { pub use crate::overlay::*; pub use crate::predicates::*; pub use crate::referencing::*; + pub use crate::rs_value::rs_value_udf; pub use crate::st_area::st_area_udf; pub use crate::st_azimuth::st_azimuth_udf; pub use crate::st_centroid::st_centroid_udf; diff --git a/rust/sedona-functions/src/rs_asmatrix.rs b/rust/sedona-functions/src/rs_asmatrix.rs new file mode 100644 index 00000000..883ae113 --- /dev/null +++ b/rust/sedona-functions/src/rs_asmatrix.rs @@ -0,0 +1,237 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::{sync::Arc, vec}; + +use crate::executor::RasterExecutor; +use arrow_array::builder::StringBuilder; +use arrow_schema::ArrowError; +use arrow_schema::DataType; +use datafusion_common::{error::Result, scalar::ScalarValue}; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::display_functions::write_band_to_builder; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +/// RS_AsMatrix() scalar UDF implementation +/// +/// Extract the width of the raster +pub fn rs_asmatrix_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_asmatrix", + vec![Arc::new(RsAsMatrix {})], + Volatility::Immutable, + Some(rs_asmatrix_doc()), + ) +} + +fn rs_asmatrix_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + format!("Returns a string, that when printed, outputs the raster band as a pretty printed 2D matrix."), + format!("RS_AsMatrix(raster: Raster, band_number: Numeric, postDecimalPrecision: Numeric)"), + ) + .with_argument("raster", "Raster: Input raster") + .with_argument("band_number", "Numeric: Band number (1-based).") + .with_argument("postDecimalPrecision", "Numeric: Number of digits after decimal point. Optional, default is 6.") + .with_sql_example("SELECT RS_AsMatrix(raster, band_number)") + .with_sql_example("SELECT RS_AsMatrix(raster, band_number, postDecimalPrecision)") + .build() +} + +#[derive(Debug)] +struct RsAsMatrix {} + +impl SedonaScalarKernel for RsAsMatrix { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_numeric(), + ArgMatcher::optional(ArgMatcher::is_numeric()), + ], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let mut builder = StringBuilder::with_capacity( + executor.num_iterations(), + 256 * executor.num_iterations(), + ); + + let band_number = extract_numeric_scalar(&args[1])? as usize; + let precision = if args.len() > 2 { + extract_numeric_scalar(&args[2])? as usize + } else { + 6usize + }; + + executor.execute_raster_void(|_i, raster_opt| { + match raster_opt { + Some(raster) => { + let mut output = String::new(); // TODO: Perf can be improved here + match write_band_to_builder(&raster, band_number, precision, &mut output) { + Ok(_) => builder.append_value(&output), + Err(_) => builder.append_null(), + } + } + None => builder.append_null(), + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +fn extract_numeric_scalar(arg: &ColumnarValue) -> Result { + if let ColumnarValue::Scalar(scalar) = arg { + match scalar { + ScalarValue::Int8(Some(val)) => Ok(*val as usize), + ScalarValue::Int16(Some(val)) => Ok(*val as usize), + ScalarValue::Int32(Some(val)) => Ok(*val as usize), + ScalarValue::Int64(Some(val)) => Ok(*val as usize), + ScalarValue::UInt8(Some(val)) => Ok(*val as usize), + ScalarValue::UInt16(Some(val)) => Ok(*val as usize), + ScalarValue::UInt32(Some(val)) => Ok(*val as usize), + ScalarValue::UInt64(Some(val)) => Ok(*val as usize), + _ => Err(ArrowError::ParseError( + "Failed to extract numeric scalar: unsupported type or null value".to_string(), + )), + } + } else { + Err(ArrowError::ParseError( + "Failed to extract scalar value: expected scalar, got array".to_string(), + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{Array, ArrayRef, StringArray}; + use datafusion_expr::ScalarUDF; + use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, RASTER, + }; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = rs_asmatrix_udf().into(); + assert_eq!(udf.name(), "rs_asmatrix"); + assert!(udf.documentation().is_some()); + } + + #[test] + fn udf_invoke() { + let raster_array = create_test_raster_array(); + + let kernel = RsAsMatrix {}; + let args = vec![ + ColumnarValue::Array(raster_array), + ColumnarValue::Scalar(ScalarValue::from(1u64)), + ]; + let arg_types = vec![ + RASTER, + SedonaType::Arrow(DataType::UInt64), + SedonaType::Arrow(DataType::Utf8), + ]; + + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); + + let expected_first = "1.000000 1.000000 1.000000 \n1.000000 1.000000 1.000000 \n"; + let expected_third = "3.000000 3.000000 \n3.000000 3.000000 \n3.000000 3.000000 \n"; + + if let ColumnarValue::Array(result_array) = result { + let string_array = result_array.as_any().downcast_ref::().unwrap(); + + assert_eq!(string_array.len(), 3); + assert_eq!(string_array.value(0), expected_first); + assert!(string_array.is_null(1)); // Second raster is null + assert_eq!(string_array.value(2), expected_third); + } else { + panic!("Expected array result"); + } + } + + /// Create a test raster array with different widths for testing + // TODO: Parameterize the creation of rasters and move the + // function to sedona-testing + fn create_test_raster_array() -> ArrayRef { + let mut builder = RasterBuilder::new(3); + + // First raster: 10x12 + let metadata1 = RasterMetadata { + width: 3, + height: 2, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, + }; + + builder.start_raster(&metadata1, None, None).unwrap(); + let test_data1 = vec![1u8; 3 * 2]; // width * height + builder.band_data_writer().append_value(&test_data1); + builder.finish_band(band_metadata.clone()).unwrap(); + builder.finish_raster().unwrap(); + + // Second raster: null + builder.append_null().unwrap(); + + // Third raster: 30x15 + let metadata3 = RasterMetadata { + width: 2, + height: 3, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata3, None, None).unwrap(); + let test_data3 = vec![3u8; 2 * 3]; // width * height + builder.band_data_writer().append_value(&test_data3); + builder.finish_band(band_metadata).unwrap(); + builder.finish_raster().unwrap(); + + Arc::new(builder.finish().unwrap()) + } +} diff --git a/rust/sedona-functions/src/rs_value.rs b/rust/sedona-functions/src/rs_value.rs new file mode 100644 index 00000000..17b33bcf --- /dev/null +++ b/rust/sedona-functions/src/rs_value.rs @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::vec; + +use arrow_schema::DataType; +use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Volatility}; +use sedona_expr::scalar_udf::SedonaScalarUDF; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +/// RS_Value() scalar UDF implementation +/// +/// Extracts the pixel value at a specified location from a raster band. +/// +/// This function samples a raster at the given column and row coordinates (colX, colY) +/// within the specified band. The coordinates are 0-based pixel indices where: +/// - colX: column index (0 to raster width - 1) +/// - colY: row index (0 to raster height - 1) +/// - band: band number (1-based index, where 1 is the first band) +/// +/// Returns Float64 to provide a unified return type that can represent values from +/// different raster data types (UInt8, UInt16, Float32, etc.). Returns null if: +/// - The input raster is null +/// +/// Throws an exception if: +/// - The coordinates are outside the raster bounds +/// - The specified band does not exist +/// +/// TODO: should we return null if the pixel value is nodata? +/// +/// Future versions may support point geometry input for coordinate specification. +pub fn rs_value_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new_stub( + "rs_value", + ArgMatcher::new( + vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_numeric(), + ArgMatcher::is_numeric(), + ArgMatcher::is_numeric(), + ], + SedonaType::Arrow(DataType::Float64), + ), + Volatility::Immutable, + Some(rs_value_doc()), + ) +} + +fn rs_value_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + format!("Returns the value at the given point in the raster.",), + format!("RS_Value (raster: Raster, colX: Integer, colY: Integer, band: Integer)"), + ) + .with_argument("raster", "Raster: Input raster") + .with_argument("x", "Integer: X coordinate") + .with_argument("y", "Integer: Y coordinate") + .with_argument("band_id", "Integer: Band number (1-based index)") + .with_sql_example(format!("SELECT RS_Value(raster, x, y, band_id)",)) + .build() +} + +#[cfg(test)] +mod tests { + use super::*; + use datafusion_expr::ScalarUDF; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = rs_value_udf().into(); + assert_eq!(udf.name(), "rs_value"); + assert!(udf.documentation().is_some()); + } +} diff --git a/rust/sedona-functions/src/rs_width.rs b/rust/sedona-functions/src/rs_width.rs new file mode 100644 index 00000000..7ee3ab47 --- /dev/null +++ b/rust/sedona-functions/src/rs_width.rs @@ -0,0 +1,189 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::{sync::Arc, vec}; + +use crate::executor::RasterExecutor; +use arrow_array::builder::UInt64Builder; +use arrow_schema::DataType; +use datafusion_common::error::Result; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_schema::{ + datatypes::{RasterRef, SedonaType}, + matchers::ArgMatcher, +}; + +/// RS_Width() scalar UDF implementation +/// +/// Extract the width of the raster +pub fn rs_width_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_width", + vec![Arc::new(RsWidth {})], + Volatility::Immutable, + Some(rs_width_doc()), + ) +} + +fn rs_width_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + format!("Return the width component of a raster",), + format!("RS_Width(raster: Raster)"), + ) + .with_argument("raster", "Raster: Input raster") + .with_sql_example(format!("SELECT RS_Width(raster)",)) + .build() +} + +#[derive(Debug)] +struct RsWidth {} + +impl SedonaScalarKernel for RsWidth { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster()], + SedonaType::Arrow(DataType::UInt64), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let mut builder = UInt64Builder::with_capacity(executor.num_iterations()); + + executor.execute_raster_void(|_i, raster_opt| { + match raster_opt { + None => builder.append_null(), + Some(raster) => { + let width = raster.metadata().width(); + builder.append_value(width); + } + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{Array, ArrayRef, UInt64Array}; + use datafusion_expr::ScalarUDF; + use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, RASTER, + }; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = rs_width_udf().into(); + assert_eq!(udf.name(), "rs_width"); + assert!(udf.documentation().is_some()); + } + + #[test] + fn udf_invoke() { + // Create test rasters with different widths + let raster_array = create_test_raster_array(); + + // Create the UDF and invoke it + let kernel = RsWidth {}; + let args = vec![ColumnarValue::Array(raster_array)]; + let arg_types = vec![RASTER]; + + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); + + // Check the result + if let ColumnarValue::Array(result_array) = result { + let width_array = result_array.as_any().downcast_ref::().unwrap(); + + assert_eq!(width_array.len(), 3); + assert_eq!(width_array.value(0), 10); // First raster width + assert!(width_array.is_null(1)); // Second raster is null + assert_eq!(width_array.value(2), 30); // Third raster width + } else { + panic!("Expected array result"); + } + } + + /// Create a test raster array with different widths for testing + // TODO: Parameterize the creation of rasters and move the + // function to sedona-testing + fn create_test_raster_array() -> ArrayRef { + let mut builder = RasterBuilder::new(3); + + // First raster: 10x12 + let metadata1 = RasterMetadata { + width: 10, + height: 12, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, + }; + + builder.start_raster(&metadata1, None, None).unwrap(); + let test_data1 = vec![1u8; 10 * 12]; // width * height + builder.band_data_writer().append_value(&test_data1); + builder.finish_band(band_metadata.clone()).unwrap(); + builder.finish_raster().unwrap(); + + // Second raster: null + builder.append_null().unwrap(); + + // Third raster: 30x15 + let metadata3 = RasterMetadata { + width: 30, + height: 15, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata3, None, None).unwrap(); + let test_data3 = vec![3u8; 30 * 15]; // width * height + builder.band_data_writer().append_value(&test_data3); + builder.finish_band(band_metadata).unwrap(); + builder.finish_raster().unwrap(); + + Arc::new(builder.finish().unwrap()) + } +} diff --git a/rust/sedona-functions/src/sd_format.rs b/rust/sedona-functions/src/sd_format.rs index b1bb33ad..e060c7f7 100644 --- a/rust/sedona-functions/src/sd_format.rs +++ b/rust/sedona-functions/src/sd_format.rs @@ -127,6 +127,7 @@ impl SedonaScalarKernel for SDFormatDefault { fn sedona_type_to_formatted_type(sedona_type: &SedonaType) -> Result { match sedona_type { SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => Ok(SedonaType::Arrow(DataType::Utf8)), + SedonaType::Raster(_) => Ok(SedonaType::Arrow(DataType::Utf8)), SedonaType::Arrow(arrow_type) => { // dive into the arrow type and translate geospatial types into Utf8 match arrow_type { @@ -166,6 +167,9 @@ fn columnar_value_to_formatted_value( SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => { geospatial_value_to_formatted_value(sedona_type, columnar_value, maybe_width_hint) } + SedonaType::Raster(_) => { + geospatial_value_to_formatted_value(sedona_type, columnar_value, maybe_width_hint) + } SedonaType::Arrow(arrow_type) => match arrow_type { DataType::Struct(fields) => match columnar_value { ColumnarValue::Array(array) => { diff --git a/rust/sedona-gdal/Cargo.lock b/rust/sedona-gdal/Cargo.lock new file mode 100644 index 00000000..8271b820 --- /dev/null +++ b/rust/sedona-gdal/Cargo.lock @@ -0,0 +1,2994 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "arrow" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num", +] + +[[package]] +name = "arrow-array" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.5", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-csv" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b" +dependencies = [ + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", + "zstd", +] + +[[package]] +name = "arrow-json" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", +] + +[[package]] +name = "arrow-ord" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", +] + +[[package]] +name = "arrow-row" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" +dependencies = [ + "bitflags", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-select" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bigdecimal" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "bindgen" +version = "0.71.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.107", +] + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + +[[package]] +name = "cc" +version = "1.2.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +dependencies = [ + "iana-time-zone", + "num-traits", + "windows-link", +] + +[[package]] +name = "chrono-tz" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" +dependencies = [ + "chrono", + "phf", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "comfy-table" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" +dependencies = [ + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "darling" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69dfeda1633bf8ec75b068d9f6c27cdc392ffcf5ff83128d5dbab65b73c1fd02" +dependencies = [ + "arrow", + "arrow-ipc", + "arrow-schema", + "async-trait", + "bytes", + "chrono", + "datafusion-catalog", + "datafusion-catalog-listing", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-table", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "rand", + "regex", + "sqlparser", + "tempfile", + "tokio", + "url", + "uuid", +] + +[[package]] +name = "datafusion-catalog" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2848fd1e85e2953116dab9cc2eb109214b0888d7bbd2230e30c07f1794f642c0" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "051a1634628c2d1296d4e326823e7536640d87a118966cdaff069b68821ad53b" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "log", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765e4ad4ef7a4500e389a3f1e738791b71ff4c29fd00912c2f541d62b25da096" +dependencies = [ + "ahash", + "arrow", + "arrow-ipc", + "base64", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap", + "libc", + "log", + "object_store", + "paste", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common-runtime" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40a2ae8393051ce25d232a6065c4558ab5a535c9637d5373bacfd464ac88ea12" +dependencies = [ + "futures", + "log", + "tokio", +] + +[[package]] +name = "datafusion-datasource" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90cd841a77f378bc1a5c4a1c37345e1885a9203b008203f9f4b3a769729bf330" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "rand", + "tokio", + "url", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77f4a2c64939c6f0dd15b246723a699fa30d59d0133eb36a86e8ff8c6e2a8dc6" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-json" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11387aaf931b2993ad9273c63ddca33f05aef7d02df9b70fb757429b4b71cdae" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures", + "object_store", + "serde_json", + "tokio", +] + +[[package]] +name = "datafusion-doc" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ff336d1d755399753a9e4fbab001180e346fc8bfa063a97f1214b82274c00f8" + +[[package]] +name = "datafusion-execution" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "042ea192757d1b2d7dcf71643e7ff33f6542c7704f00228d8b85b40003fd8e0f" +dependencies = [ + "arrow", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "025222545d6d7fab71e2ae2b356526a1df67a2872222cbae7535e557a42abd2e" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap", + "paste", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d5c267104849d5fa6d81cf5ba88f35ecd58727729c5eb84066c25227b644ae2" +dependencies = [ + "arrow", + "datafusion-common", + "indexmap", + "itertools 0.14.0", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c620d105aa208fcee45c588765483314eb415f5571cfd6c1bae3a59c5b4d15bb" +dependencies = [ + "arrow", + "arrow-buffer", + "base64", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", + "hex", + "itertools 0.14.0", + "log", + "rand", + "regex", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35f61d5198a35ed368bf3aacac74f0d0fa33de7a7cb0c57e9f68ab1346d2f952" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13efdb17362be39b5024f6da0d977ffe49c0212929ec36eec550e07e2bc7812f" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-functions-table" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecf156589cc21ef59fe39c7a9a841b4a97394549643bbfa88cc44e8588cf8fe5" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edcb25e3e369f1366ec9a261456e45b5aad6ea1c0c8b4ce546587207c501ed9e" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8996a8e11174d0bd7c62dc2f316485affc6ae5ffd5b8a68b508137ace2310294" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95ee8d1be549eb7316f437035f2cec7ec42aba8374096d807c4de006a3b5d78a" +dependencies = [ + "datafusion-expr", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "datafusion-optimizer" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9fa98671458254928af854e5f6c915e66b860a8bde505baea0ff2892deab74d" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "indexmap", + "itertools 0.14.0", + "log", + "regex", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3515d51531cca5f7b5a6f3ea22742b71bb36fc378b465df124ff9a2fa349b002" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.14.0", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24485475d9c618a1d33b2a3dad003d946dc7a7bbf0354d125301abc0a5a79e3e" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9da411a0a64702f941a12af2b979434d14ec5d36c6f49296966b2c7639cbb3a" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-pruning", + "itertools 0.14.0", + "log", +] + +[[package]] +name = "datafusion-physical-plan" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6d168282bb7b54880bb3159f89b51c047db4287f5014d60c3ef4c6e1468212b" +dependencies = [ + "ahash", + "arrow", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.14.0", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "datafusion-pruning" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391a457b9d23744c53eeb89edd1027424cba100581488d89800ed841182df905" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-datasource", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", +] + +[[package]] +name = "datafusion-session" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053201c2bb729c7938f85879034df2b5a52cfaba16f1b3b66ab8505c81b2aad3" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "49.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9082779be8ce4882189b229c0cff4393bd0808282a7194130c9f32159f185e25" +dependencies = [ + "arrow", + "bigdecimal", + "datafusion-common", + "datafusion-expr", + "indexmap", + "log", + "regex", + "sqlparser", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "flatbuffers" +version = "25.9.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" +dependencies = [ + "bitflags", + "rustc_version", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "gdal" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e721cea67b420fd4b5cb15ba8145f2f1d3a6931a27fdbfadb46cff02015e1cde" +dependencies = [ + "bitflags", + "chrono", + "gdal-sys", + "geo-types", + "semver", + "thiserror 2.0.17", +] + +[[package]] +name = "gdal-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "febef67dc08a956a9ecb04de2b40dbd15ad56be49421aad9ae0cdcbe9a24166c" +dependencies = [ + "bindgen", + "pkg-config", + "semver", +] + +[[package]] +name = "geo-traits" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7c353d12a704ccfab1ba8bfb1a7fe6cb18b665bf89d37f4f7890edcd260206" +dependencies = [ + "geo-types", +] + +[[package]] +name = "geo-types" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a4dcd69d35b2c87a7c83bce9af69fd65c9d68d3833a0ded568983928f3fc99" +dependencies = [ + "approx", + "num-traits", + "serde", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "http" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "humantime" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" + +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" + +[[package]] +name = "icu_properties" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "potential_utf", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" + +[[package]] +name = "icu_provider" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +dependencies = [ + "displaydoc", + "icu_locale_core", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +dependencies = [ + "equivalent", + "hashbrown 0.16.0", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lexical-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" +dependencies = [ + "lexical-parse-integer", + "lexical-util", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "lexical-util" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" + +[[package]] +name = "lexical-write-float" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" +dependencies = [ + "lexical-util", + "lexical-write-integer", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" +dependencies = [ + "lexical-util", +] + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "litemap" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" + +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "lz4_flex" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "object_store" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "http", + "humantime", + "itertools 0.14.0", + "parking_lot", + "percent-encoding", + "thiserror 2.0.17", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "petgraph" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.5", + "indexmap", + "serde", +] + +[[package]] +name = "phf" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "potential_utf" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.107", +] + +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + +[[package]] +name = "rstest" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03e905296805ab93e13c1ec3a03f4b6c4f35e9498a3d5fa96dc626d22c03cd89" +dependencies = [ + "futures-timer", + "futures-util", + "rstest_macros", + "rustc_version", +] + +[[package]] +name = "rstest_macros" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef0053bbffce09062bee4bcc499b0fbe7a57b879f1efe088d6d8d4c7adcdef9b" +dependencies = [ + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn 2.0.107", + "unicode-ident", +] + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sedona-common" +version = "0.2.0" +dependencies = [ + "datafusion", + "datafusion-common", + "regex", +] + +[[package]] +name = "sedona-expr" +version = "0.2.0" +dependencies = [ + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "geo-traits", + "sedona-common", + "sedona-geometry", + "sedona-schema", + "serde", + "serde_json", +] + +[[package]] +name = "sedona-functions" +version = "0.2.0" +dependencies = [ + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "geo-traits", + "sedona-common", + "sedona-expr", + "sedona-geometry", + "sedona-schema", + "serde_json", + "wkb", + "wkt", +] + +[[package]] +name = "sedona-gdal" +version = "0.2.0" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "gdal", + "rstest", + "sedona-expr", + "sedona-functions", + "sedona-raster", + "sedona-schema", +] + +[[package]] +name = "sedona-geometry" +version = "0.2.0" +dependencies = [ + "geo-traits", + "lru", + "serde", + "serde_with", + "thiserror 2.0.17", + "wkb", +] + +[[package]] +name = "sedona-raster" +version = "0.2.0" +dependencies = [ + "arrow", + "arrow-schema", + "sedona-schema", +] + +[[package]] +name = "sedona-schema" +version = "0.2.0" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "sedona-common", + "serde_json", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + +[[package]] +name = "serde_with" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff" +dependencies = [ + "serde", + "serde_with_macros", +] + +[[package]] +name = "serde_with_macros" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "sqlparser" +version = "0.55.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl 2.0.17", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tokio" +version = "1.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +dependencies = [ + "bytes", + "pin-project-lite", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "toml_datetime" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +dependencies = [ + "winnow", +] + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "tracing-core" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +dependencies = [ + "once_cell", +] + +[[package]] +name = "twox-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" + +[[package]] +name = "unicode-ident" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "url" +version = "2.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "uuid" +version = "1.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +dependencies = [ + "getrandom 0.3.4", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn 2.0.107", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "winnow" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "wkb" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "908e92c755a5f5ff8781c1c7ebcedb60ce5443879d20f4a0b6a1ee8fb3e6dfb6" +dependencies = [ + "byteorder", + "geo-traits", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "wkt" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb2b923ccc882312e559ffaa832a055ba9d1ac0cc8e86b3e25453247e4b81d7" +dependencies = [ + "geo-traits", + "geo-types", + "log", + "num-traits", + "thiserror 1.0.69", +] + +[[package]] +name = "writeable" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" + +[[package]] +name = "yoke" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.107", +] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/rust/sedona-gdal/Cargo.toml b/rust/sedona-gdal/Cargo.toml new file mode 100644 index 00000000..5fe0618d --- /dev/null +++ b/rust/sedona-gdal/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "sedona-gdal" +version.workspace = true +homepage.workspace = true +repository.workspace = true +description.workspace = true +readme.workspace = true +edition.workspace = true +rust-version.workspace = true + +[lints.clippy] +result_large_err = "allow" + +[dev-dependencies] +rstest = { workspace = true } +sedona-testing = { path = "../../rust/sedona-testing", features = ["criterion"] } +criterion = { workspace = true} + +[dependencies] +arrow = { workspace = true } +arrow-array = { workspace = true } +arrow-schema = { workspace = true } +datafusion-common = { workspace = true } +datafusion-expr = { workspace = true } +gdal = {workspace = true} +gdal-sys = {workspace = true} +sedona-expr = { path = "../sedona-expr" } +sedona-functions = { path = "../sedona-functions" } +sedona-raster = { path = "../sedona-raster" } +sedona-schema = { path = "../sedona-schema" } + +[[bench]] +name = "gdal-functions" +harness = false \ No newline at end of file diff --git a/rust/sedona-gdal/benches/gdal-functions.rs b/rust/sedona-gdal/benches/gdal-functions.rs new file mode 100644 index 00000000..b736aec2 --- /dev/null +++ b/rust/sedona-gdal/benches/gdal-functions.rs @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use criterion::{criterion_group, criterion_main, Criterion}; +use sedona_expr::function_set::FunctionSet; +use sedona_testing::benchmark_util::{benchmark, BenchmarkArgSpec::*, BenchmarkArgs}; + +fn criterion_benchmark(c: &mut Criterion) { + let mut f = FunctionSet::new(); + for (name, kernel) in sedona_gdal::register::scalar_kernels() { + f.add_scalar_udf_kernel(name, kernel).unwrap(); + } + + let args = BenchmarkArgs::ArrayScalarScalarScalar( + Raster(128, 128, 1), + Int32(0, 127), + Int32(0, 127), + Int32(1, 2), + ); + + benchmark::scalar(c, &f, "sedona-gdal", "rs_value", args); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/rust/sedona-gdal/src/dataset.rs b/rust/sedona-gdal/src/dataset.rs new file mode 100644 index 00000000..14c0e7e9 --- /dev/null +++ b/rust/sedona-gdal/src/dataset.rs @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use arrow_schema::ArrowError; +use gdal::{Dataset, Metadata}; +use sedona_schema::datatypes::{BandMetadataRef, StorageType}; + +/// Get the out-db dataset reference from a raster band. +pub fn outdb_dataset(metadata: &dyn BandMetadataRef) -> Result { + if metadata.storage_type() != StorageType::OutDbRef { + return Err(ArrowError::ParseError( + "Raster band is not stored out-of-db".to_string(), + )); + } + + let url = match metadata.outdb_url() { + Some(url) => url, + None => { + return Err(ArrowError::ParseError( + "Raster band does not have an out-db URL".to_string(), + )) + } + }; + + // These datasets may appear in multiple rasters and called repeatedly. + // Adding a caching layer here would improve performance. + // Could also consider having a pool of these datasets - gdal dataset has a GetRefCount + // that may be helpful for keeping track of references. + open_outdb_band(&url) +} + +fn open_outdb_band(url: &str) -> Result { + let full_url = format!("/vsicurl/{}", url); + let ds = Dataset::open(full_url).map_err(|e| ArrowError::ParseError(e.to_string()))?; + Ok(ds) +} + +/// Extract geotransform components from a GDAL dataset +/// Returns (upper_left_x, pixel_width, x_skew, upper_left_y, y_skew, pixel_height) +pub fn geotransform_components( + dataset: &Dataset, +) -> Result<(f64, f64, f64, f64, f64, f64), ArrowError> { + let geotransform = dataset + .geo_transform() + .map_err(|e| ArrowError::ParseError(format!("Failed to get geotransform: {e}")))?; + Ok(( + geotransform[0], // Upper-left X coordinate + geotransform[3], // Upper-left Y coordinate + geotransform[1], // Pixel width (scale_x) + geotransform[5], // Pixel height (scale_y, usually negative) + geotransform[2], // X-direction skew + geotransform[4], // Y-direction skew + )) +} + +/// Extract tile size from a GDAL dataset +/// If not provided, defaults to raster size +pub fn tile_size(dataset: &Dataset) -> Result<(usize, usize), ArrowError> { + let raster_width = dataset.raster_size().0; + let raster_height = dataset.raster_size().1; + + let tile_width = match dataset.metadata_item("TILEWIDTH", "") { + Some(val) => val.parse::().unwrap_or(raster_width), + None => raster_width, + }; + let tile_height = match dataset.metadata_item("TILEHEIGHT", "") { + Some(val) => val.parse::().unwrap_or(raster_height), + None => raster_height, + }; + + Ok((tile_width, tile_height)) +} + +#[cfg(test)] +mod test {} diff --git a/rust/sedona-gdal/src/lib.rs b/rust/sedona-gdal/src/lib.rs new file mode 100644 index 00000000..7b860b10 --- /dev/null +++ b/rust/sedona-gdal/src/lib.rs @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub mod dataset; +pub mod readers; +pub mod register; +pub mod rs_value; diff --git a/rust/sedona-gdal/src/readers.rs b/rust/sedona-gdal/src/readers.rs new file mode 100644 index 00000000..898f6a57 --- /dev/null +++ b/rust/sedona-gdal/src/readers.rs @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::dataset::{geotransform_components, tile_size}; +use arrow_array::StructArray; +use arrow_schema::ArrowError; +use gdal::raster::{GdalDataType, RasterBand}; +use gdal::Dataset; +use sedona_raster::datatype_functions::{bytes_per_pixel, f64_to_bandtype_bytes}; +use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, +}; +use std::sync::Arc; + +pub fn read_raster(filepath: &str) -> Result, ArrowError> { + let dataset = Dataset::open(filepath.to_string()) + .map_err(|err| ArrowError::ParseError(err.to_string()))?; + + // Extract geotransform components + let (origin_x, origin_y, pixel_width, pixel_height, rotation_x, rotation_y) = + geotransform_components(&dataset)?; + + let (raster_width, raster_height) = dataset.raster_size(); + + let (tile_width, tile_height) = tile_size(&dataset)?; + + let x_tile_count = (raster_width + tile_width - 1) / tile_width; + let y_tile_count = (raster_height + tile_height - 1) / tile_height; + + let mut raster_builder = RasterBuilder::new(x_tile_count * y_tile_count); + let band_count = dataset.raster_count(); + + for tile_y in 0..y_tile_count { + for tile_x in 0..x_tile_count { + let x_offset = tile_x * tile_width; + let y_offset = tile_y * tile_height; + + // Calculate geographic coordinates for this tile + // using the geotransform from the original raster + let tile_origin_x = + origin_x + (x_offset as f64) * pixel_width + (y_offset as f64) * rotation_x; + let tile_origin_y = + origin_y + (x_offset as f64) * rotation_y + (y_offset as f64) * pixel_height; + + // Create raster metadata for this tile with actual geotransform values + let tile_metadata = RasterMetadata { + width: tile_width as u64, + height: tile_height as u64, + upperleft_x: tile_origin_x, + upperleft_y: tile_origin_y, + scale_x: pixel_width, + scale_y: pixel_height, + skew_x: rotation_x, + skew_y: rotation_y, + bounding_box: None, // TODO: should we calculate bounding box here? + }; + + raster_builder.start_raster(&tile_metadata, None, None)?; + + for band_number in 1..=band_count { + let band: RasterBand = dataset.rasterband(band_number).unwrap(); + // This should be the same as tile width/height, except for edge tiles + // but we would need to update the width/height in the metadata above then. + // For now, fail if sizes don't match. + let (x_size, y_size) = band.size(); + if x_size != tile_width || y_size != tile_height { + return Err(ArrowError::ParseError(format!( + "Band size ({}, {}) does not match expected tile size ({}, {})", + x_size, y_size, tile_width, tile_height + ))); + } + + let data_type = gdaldatatype_to_banddatatype(band.band_type())?; + let data_type_bytes = bytes_per_pixel(data_type.clone())?; + let buffer_size_bytes = x_size * y_size * data_type_bytes.clone(); + + // Get a mutable buffer slice for GDAL to write directly into + let (buffer, slice) = raster_builder.get_band_buffer_slice(buffer_size_bytes); + + // TODO: Do we need resampling? If so set buffer_size to different from window_size + // and have a ResampleAlgorithm. + band.read_into_slice( + (x_offset as isize, y_offset as isize), // window_origin + (x_size, y_size), // window_size + (x_size, y_size), // buffer_size (no resampling) + slice, // buffer + None, // resampling algorithms + ) + .map_err(|e| { + ArrowError::ParseError(format!("Failed to read band {band_number} {e}")) + })?; + + raster_builder.commit_band_buffer(buffer); + + let nodata_value = match band.no_data_value() { + Some(val) => Some(f64_to_bandtype_bytes(val, data_type.clone())?), + None => None, + }; + + let band_metadata = BandMetadata { + nodata_value: nodata_value, + storage_type: StorageType::InDb, + datatype: data_type, + outdb_url: None, + outdb_band_id: None, + }; + + // Finalize the band + raster_builder.finish_band(band_metadata)?; + } + + // Finalize the raster + raster_builder.finish_raster()?; + } + } + + // Finalize the raster struct array + let raster_struct = raster_builder.finish()?; + Ok(Arc::new(raster_struct)) +} + +fn gdaldatatype_to_banddatatype(gdal_data_type: GdalDataType) -> Result { + match gdal_data_type { + GdalDataType::UInt8 => Ok(BandDataType::UInt8), + GdalDataType::UInt16 => Ok(BandDataType::UInt16), + GdalDataType::Int16 => Ok(BandDataType::Int16), + GdalDataType::UInt32 => Ok(BandDataType::UInt32), + GdalDataType::Int32 => Ok(BandDataType::Int32), + GdalDataType::Float32 => Ok(BandDataType::Float32), + GdalDataType::Float64 => Ok(BandDataType::Float64), + _ => Err(ArrowError::InvalidArgumentError(format!( + "Unsupported GDAL data type: {:?}", + gdal_data_type + ))), + } +} + +#[cfg(test)] +mod tests { + // use super::*; + // use sedona_raster::display_functions::pretty_print_indb; + // use sedona_schema::datatypes::raster_iterator; + + #[test] + fn test_load_raster() { + // TODO: Add proper tests here. + // To load a raster and view contents + // for prototyping fun: + // + // let filepath = "/test1.tiff"; + // let result = read_raster(filepath); + // assert!(result.is_ok()); + // + // To view loaded raster: + // let raster_array = result.unwrap(); + // for raster in raster_iterator(&raster_array) { + // println!("{}", pretty_print_indb(&raster, 1, 2).unwrap()); + // } + } +} diff --git a/rust/sedona-gdal/src/register.rs b/rust/sedona-gdal/src/register.rs new file mode 100644 index 00000000..201c5b8f --- /dev/null +++ b/rust/sedona-gdal/src/register.rs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use sedona_expr::aggregate_udf::SedonaAccumulatorRef; +use sedona_expr::scalar_udf::ScalarKernelRef; + +use crate::rs_value::rs_value_impl; + +pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> { + vec![("rs_value", rs_value_impl())] +} + +pub fn aggregate_kernels() -> Vec<(&'static str, SedonaAccumulatorRef)> { + vec![] +} diff --git a/rust/sedona-gdal/src/rs_value.rs b/rust/sedona-gdal/src/rs_value.rs new file mode 100644 index 00000000..45b41588 --- /dev/null +++ b/rust/sedona-gdal/src/rs_value.rs @@ -0,0 +1,443 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::sync::Arc; + +use crate::dataset::outdb_dataset; +use arrow_array::builder::Float64Builder; +use arrow_schema::{ArrowError, DataType}; +use datafusion_common::{error::Result, scalar::ScalarValue}; +use datafusion_expr::ColumnarValue; +use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; +use sedona_functions::executor::RasterExecutor; +use sedona_raster::datatype_functions::{bytes_per_pixel, read_pixel_value}; +use sedona_schema::datatypes::{BandMetadataRef, BandRef, RasterRef, SedonaType, StorageType}; + +/// RS_Value() implementation +pub fn rs_value_impl() -> ScalarKernelRef { + Arc::new(RSValue {}) +} + +#[derive(Debug)] +struct RSValue {} + +impl SedonaScalarKernel for RSValue { + fn return_type( + &self, + _arg_types: &[SedonaType], + ) -> Result, datafusion_common::DataFusionError> { + Ok(Some(SedonaType::Arrow(DataType::Float64))) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + let x = extract_numeric_scalar(&args[1])?; + let y = extract_numeric_scalar(&args[2])?; + let band_number = extract_numeric_scalar(&args[3])?; + + let mut builder = Float64Builder::with_capacity(executor.num_iterations()); + + executor.execute_raster_void(|_i, raster_opt| { + match raster_opt { + None => builder.append_null(), + Some(raster) => { + match invoke_scalar(&raster, x, y, band_number) { + Ok(value) => builder.append_value(value), + // TODO: Error or null on bad index? + Err(_) => builder.append_null(), + } + } + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +fn invoke_scalar( + raster: &dyn RasterRef, + x: usize, + y: usize, + band_number: usize, +) -> Result { + // Extract metadata from the raster + let metadata = raster.metadata(); + let width = metadata.width() as usize; + let height = metadata.height() as usize; + + // Check that x,y are within width/height + if x >= width || y >= height { + return Err(ArrowError::InvalidArgumentError( + "Coordinates are outside raster bounds".to_string(), + )); + } + + // Get the band (using 1-based band numbering) + let bands = raster.bands(); + if band_number == 0 || band_number > bands.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "Band number {} does not exist (valid range: 1-{}, raster has {} bands)", + band_number, + bands.len(), + bands.len() + ))); + } + let band = bands.band(band_number).ok_or_else(|| { + ArrowError::InvalidArgumentError("Failed to get band at index".to_string()) + })?; + let band_metadata = band.metadata(); + + match band_metadata.storage_type() { + StorageType::InDb => indb_pixel(band_metadata, &*band, x, y, width, height), + StorageType::OutDbRef => outdb_pixel(band_metadata, x, y, width, height), + } +} + +fn indb_pixel( + metadata: &dyn BandMetadataRef, + band: &dyn BandRef, + x: usize, + y: usize, + width: usize, + _height: usize, +) -> Result { + if let Some(_nodata_bytes) = metadata.nodata_value() { + // TODO: Compare pixel value against nodata value? + } + + let data_type = metadata.data_type(); + let bytes_per_px = bytes_per_pixel(data_type.clone())?; + // TODO: we may want to consider a different ordering + let offset = (y * width + x) * bytes_per_px; + + let band_data = band.data(); + if offset + bytes_per_px > band_data.len() { + return Err(ArrowError::InvalidArgumentError( + "Pixel offset exceeds band data length".to_string(), + )); + } + + let pixel_bytes = &band_data[offset..offset + bytes_per_px]; + read_pixel_value(pixel_bytes, data_type) +} + +fn outdb_pixel( + metadata: &dyn BandMetadataRef, + x: usize, + y: usize, + _width: usize, + _height: usize, +) -> Result { + let dataset = outdb_dataset(metadata)?; + + let band_number = match metadata.outdb_band_id() { + Some(index) => index, + None => { + return Err(ArrowError::ParseError( + "Raster band does not have a band index".to_string(), + )) + } + }; + + let band = dataset.rasterband(band_number as usize).map_err(|_| { + ArrowError::ParseError("Failed to get raster band from dataset".to_string()) + })?; + + // Read a single pixel at the specified coordinates + let pixel_data = band + .read_as::((x as isize, y as isize), (1, 1), (1, 1), None) + .map_err(|_| ArrowError::ParseError("Failed to read pixel data from GDAL".to_string()))?; + + Ok(pixel_data.data()[0]) +} + +fn extract_numeric_scalar(arg: &ColumnarValue) -> Result { + if let ColumnarValue::Scalar(scalar) = arg { + match scalar { + ScalarValue::Int8(Some(val)) => Ok(*val as usize), + ScalarValue::Int16(Some(val)) => Ok(*val as usize), + ScalarValue::Int32(Some(val)) => Ok(*val as usize), + ScalarValue::Int64(Some(val)) => Ok(*val as usize), + ScalarValue::UInt8(Some(val)) => Ok(*val as usize), + ScalarValue::UInt16(Some(val)) => Ok(*val as usize), + ScalarValue::UInt32(Some(val)) => Ok(*val as usize), + ScalarValue::UInt64(Some(val)) => Ok(*val as usize), + _ => Err(ArrowError::ParseError( + "Failed to extract numeric scalar: unsupported type or null value".to_string(), + )), + } + } else { + Err(ArrowError::ParseError( + "Failed to extract scalar value: expected scalar, got array".to_string(), + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{Array, ArrayRef, Float64Array}; + use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, RASTER, + }; + + #[test] + fn udf_invoke_outdb() { + let raster_array = create_outdb_test_raster_array(); + let kernel = RSValue {}; + // Get pixel at (2,3) in band 1 + let args = vec![ + ColumnarValue::Array(raster_array), + ColumnarValue::Scalar(ScalarValue::from(2i64)), + ColumnarValue::Scalar(ScalarValue::from(3i64)), + ColumnarValue::Scalar(ScalarValue::from(1i64)), + ]; + let arg_types = vec![ + RASTER, + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + ]; + + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); + + // Check the result + if let ColumnarValue::Array(result_array) = result { + let pixel_array = result_array + .as_any() + .downcast_ref::() + .unwrap(); + + assert_eq!(pixel_array.len(), 1); + + let expected_first = 201.0; + assert_eq!(pixel_array.value(0), expected_first,); + } else { + panic!("Expected array result for outdb"); + } + } + + #[test] + fn udf_invoke_indb_all_band_types() { + // Test with different band data types + let band_types = vec![ + BandDataType::UInt8, + BandDataType::Int16, + BandDataType::UInt16, + BandDataType::Int32, + BandDataType::UInt32, + BandDataType::Float32, + BandDataType::Float64, + ]; + + for band_data_type in band_types { + let raster_array = create_indb_test_raster_array(band_data_type.clone()); + + let kernel = RSValue {}; + // Get pixel at (2,3) in band 1 + let args = vec![ + ColumnarValue::Array(raster_array), + ColumnarValue::Scalar(ScalarValue::from(2i64)), + ColumnarValue::Scalar(ScalarValue::from(3i64)), + ColumnarValue::Scalar(ScalarValue::from(1i64)), + ]; + let arg_types = vec![ + RASTER, + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + sedona_schema::datatypes::SedonaType::Arrow(DataType::Int64), + ]; + + let result = kernel.invoke_batch(&arg_types, &args).unwrap(); + + // Check the result + if let ColumnarValue::Array(result_array) = result { + let pixel_array = result_array + .as_any() + .downcast_ref::() + .unwrap(); + + assert_eq!(pixel_array.len(), 3); + + // Expected pixel value at (2,3) for 10x12 raster: row 3 * width 10 + col 2 = 32 + let expected_first = 32.0; + assert_eq!( + pixel_array.value(0), + expected_first, + "Failed for band type {:?}", + band_data_type + ); + assert!( + pixel_array.is_null(1), + "Second raster should be null for band type {:?}", + band_data_type + ); + + // Expected pixel value at (2,3) for 30x15 raster: row 3 * width 30 + col 2 = 92 + let expected_third = 92.0; + assert_eq!( + pixel_array.value(2), + expected_third, + "Failed for band type {:?}", + band_data_type + ); + } else { + panic!("Expected array result for band type {:?}", band_data_type); + } + } + } + + /// Create a test raster array with different widths for testing + // TODO: Parameterize the creation of rasters and move the + // function to sedona-testing + fn create_indb_test_raster_array(band_data_type: BandDataType) -> ArrayRef { + let mut builder = RasterBuilder::new(3); + + // First raster: 10x12 + let metadata1 = RasterMetadata { + width: 10, + height: 12, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: band_data_type.clone(), + outdb_url: None, + outdb_band_id: None, + }; + + builder.start_raster(&metadata1, None, None).unwrap(); + let test_data1 = gen_sequential(10 * 12, band_data_type.clone()); + builder.band_data_writer().append_value(&test_data1); + builder.finish_band(band_metadata.clone()).unwrap(); + builder.finish_raster().unwrap(); + + // Second raster: null + builder.append_null().unwrap(); + + // Third raster: 30x15 + let metadata3 = RasterMetadata { + width: 30, + height: 5, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata3, None, None).unwrap(); + let test_data3 = gen_sequential(30 * 15, band_data_type.clone()); + builder.band_data_writer().append_value(&test_data3); + builder.finish_band(band_metadata).unwrap(); + builder.finish_raster().unwrap(); + + Arc::new(builder.finish().unwrap()) + } + + /// Generates sequential pixel values of BandDataType for testing + /// TODO: Add no-data values for testing + fn gen_sequential(num_pixels: usize, band_data_type: BandDataType) -> Vec { + let bytes_per_px = bytes_per_pixel(band_data_type.clone()).unwrap(); + let total_bytes = num_pixels * bytes_per_px; + let mut data = Vec::with_capacity(total_bytes); + + for i in 0..num_pixels { + match band_data_type { + BandDataType::UInt8 => { + data.push(i as u8); + } + BandDataType::Int16 => { + let bytes = (i as i16).to_le_bytes(); + data.extend_from_slice(&bytes); + } + BandDataType::UInt16 => { + let bytes = (i as u16).to_le_bytes(); + data.extend_from_slice(&bytes); + } + BandDataType::Int32 => { + let bytes = (i as i32).to_le_bytes(); + data.extend_from_slice(&bytes); + } + BandDataType::UInt32 => { + let bytes = (i as u32).to_le_bytes(); + data.extend_from_slice(&bytes); + } + BandDataType::Float32 => { + let bytes = (i as f32).to_le_bytes(); + data.extend_from_slice(&bytes); + } + BandDataType::Float64 => { + let bytes = (i as f64).to_le_bytes(); + data.extend_from_slice(&bytes); + } + } + } + + data + } + + fn create_outdb_test_raster_array() -> ArrayRef { + // TODO: Unit tests should not query external resources. + // This function is for proof-of-concept purposes only. + let url = "https://sentinel-cogs.s3.amazonaws.com/sentinel-s2-l2a-cogs/1/C/CV/2018/10/S2B_1CCV_20181004_0_L2A/AOT.tif"; + let mut builder = RasterBuilder::new(3); + + let metadata = RasterMetadata { + width: 10, + height: 12, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::OutDbRef, + datatype: BandDataType::UInt16, + outdb_url: Some(url.to_string()), + outdb_band_id: Some(1), + }; + + builder.start_raster(&metadata, None, None).unwrap(); + let test_data1 = vec![0u8]; + builder.band_data_writer().append_value(&test_data1); + builder.finish_band(band_metadata.clone()).unwrap(); + builder.finish_raster().unwrap(); + + Arc::new(builder.finish().unwrap()) + } +} diff --git a/rust/sedona-raster/Cargo.toml b/rust/sedona-raster/Cargo.toml new file mode 100644 index 00000000..965ced8f --- /dev/null +++ b/rust/sedona-raster/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "sedona-raster" +version.workspace = true +homepage.workspace = true +repository.workspace = true +description.workspace = true +readme.workspace = true +edition.workspace = true +rust-version.workspace = true + +[lints.clippy] +result_large_err = "allow" + +[dev-dependencies] +rstest = { workspace = true } + + +[dependencies] +arrow = { workspace = true } +arrow-schema = { workspace = true } +sedona-schema = { path = "../sedona-schema" } \ No newline at end of file diff --git a/rust/sedona-raster/src/datatype_functions.rs b/rust/sedona-raster/src/datatype_functions.rs new file mode 100644 index 00000000..4c613eb2 --- /dev/null +++ b/rust/sedona-raster/src/datatype_functions.rs @@ -0,0 +1,132 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_schema::ArrowError; +use sedona_schema::datatypes::BandDataType; + +pub fn bytes_per_pixel(data_type: BandDataType) -> Result { + match data_type { + BandDataType::UInt8 => Ok(1), + BandDataType::Int16 => Ok(2), + BandDataType::UInt16 => Ok(2), + BandDataType::Int32 => Ok(4), + BandDataType::UInt32 => Ok(4), + BandDataType::Float32 => Ok(4), + BandDataType::Float64 => Ok(8), + } +} + +/// Extract a pixel value from raw bytes and convert to f64 +pub fn read_pixel_value(bytes: &[u8], data_type: BandDataType) -> Result { + let expected_bytes = bytes_per_pixel(data_type.clone())?; + if bytes.len() != expected_bytes { + return Err(ArrowError::InvalidArgumentError( + "Invalid byte length for specified data type".to_string(), + )); + } + + match data_type { + BandDataType::UInt8 => Ok(bytes[0] as f64), + BandDataType::Int16 => { + let value = i16::from_le_bytes([bytes[0], bytes[1]]); + Ok(value as f64) + } + BandDataType::UInt16 => { + let value = u16::from_le_bytes([bytes[0], bytes[1]]); + Ok(value as f64) + } + BandDataType::Int32 => { + let value = i32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + Ok(value as f64) + } + BandDataType::UInt32 => { + let value = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + Ok(value as f64) + } + BandDataType::Float32 => { + let value = f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + Ok(value as f64) + } + BandDataType::Float64 => { + let value = f64::from_le_bytes([ + bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ]); + Ok(value) + } + } +} + +pub fn f64_to_bandtype_bytes(value: f64, data_type: BandDataType) -> Result, ArrowError> { + match data_type { + BandDataType::UInt8 => Ok(vec![value as u8]), + BandDataType::Int16 => Ok((value as i16).to_le_bytes().to_vec()), + BandDataType::UInt16 => Ok((value as u16).to_le_bytes().to_vec()), + BandDataType::Int32 => Ok((value as i32).to_le_bytes().to_vec()), + BandDataType::UInt32 => Ok((value as u32).to_le_bytes().to_vec()), + BandDataType::Float32 => Ok((value as f32).to_le_bytes().to_vec()), + BandDataType::Float64 => Ok((value as f64).to_le_bytes().to_vec()), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn basic_bytes_per_pixel_tests() { + assert_eq!(bytes_per_pixel(BandDataType::UInt8).unwrap(), 1); + assert_eq!(bytes_per_pixel(BandDataType::Int16).unwrap(), 2); + assert_eq!(bytes_per_pixel(BandDataType::UInt16).unwrap(), 2); + assert_eq!(bytes_per_pixel(BandDataType::Int32).unwrap(), 4); + assert_eq!(bytes_per_pixel(BandDataType::UInt32).unwrap(), 4); + assert_eq!(bytes_per_pixel(BandDataType::Float32).unwrap(), 4); + assert_eq!(bytes_per_pixel(BandDataType::Float64).unwrap(), 8); + } + + #[test] + fn basic_read_pixel_value_tests() { + let nodataval = 17.0 as f64; + assert_eq!( + read_pixel_value(&[17u8], BandDataType::UInt8).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17i16.to_le_bytes(), BandDataType::Int16).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17u16.to_le_bytes(), BandDataType::UInt16).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17i32.to_le_bytes(), BandDataType::Int32).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17u32.to_le_bytes(), BandDataType::UInt32).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17f32.to_le_bytes(), BandDataType::Float32).unwrap(), + nodataval + ); + assert_eq!( + read_pixel_value(&17f64.to_le_bytes(), BandDataType::Float64).unwrap(), + nodataval + ); + } +} diff --git a/rust/sedona-raster/src/display_functions.rs b/rust/sedona-raster/src/display_functions.rs new file mode 100644 index 00000000..450a99d9 --- /dev/null +++ b/rust/sedona-raster/src/display_functions.rs @@ -0,0 +1,118 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatype_functions::{bytes_per_pixel, read_pixel_value}; +use arrow::error::ArrowError; +use sedona_schema::datatypes::{RasterRef, RasterRefImpl, StorageType}; +use std::fmt::Write; + +/// Write raster band matrix directly to a StringBuilder with specified precision +pub fn write_band_to_builder( + raster: &RasterRefImpl, + band_number: usize, + precision: usize, + out: &mut impl Write, +) -> Result<(), ArrowError> { + let band = raster.bands().band(band_number).unwrap(); + let metadata = raster.metadata(); + let height = metadata.height() as usize; + let width = metadata.width() as usize; + + let slice = band.data() as &[u8]; + let data_type = band.metadata().data_type(); + if band.metadata().storage_type() != StorageType::InDb { + return Err(ArrowError::InvalidArgumentError( + "Pretty print indb not supported for non-InDb storage".to_string(), + )); + } + let bytes_per_pixel = bytes_per_pixel(data_type.clone()).unwrap_or(1); + + for row in 0..height { + for col in 0..width { + let start = (row * width + col) * bytes_per_pixel; + let end = start + bytes_per_pixel; + let pixel_bytes = &slice[start..end]; + + match read_pixel_value(pixel_bytes, data_type.clone()) { + Ok(value) => { + out.write_fmt(format_args!("{:8.*} ", precision, value)) + .unwrap(); + } + Err(_) => out.write_fmt(format_args!("{:>8} ", "?")).unwrap(), + } + } + out.write_fmt(format_args!("\n")).unwrap(); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::StringBuilder; + use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, + }; + + #[test] + fn test_pretty_print() { + let mut raster_builder = RasterBuilder::new(1); + + let metadata1 = RasterMetadata { + width: 3, + height: 2, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + let band_data_type = BandDataType::Float32; + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: band_data_type.clone(), + outdb_url: None, + outdb_band_id: None, + }; + + raster_builder.start_raster(&metadata1, None, None).unwrap(); + let pixel_values: Vec = vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6111]; + let test_data1: Vec = pixel_values + .iter() + .flat_map(|&val| val.to_le_bytes()) + .collect(); + raster_builder.band_data_writer().append_value(&test_data1); + raster_builder.finish_band(band_metadata.clone()).unwrap(); + raster_builder.finish_raster().unwrap(); + + let raster_struct = raster_builder.finish().unwrap(); + let raster = sedona_schema::datatypes::RasterRefImpl::new(&raster_struct, 0); + + let builder = StringBuilder::new(); + write_band_to_builder(&raster, 1, 2, &mut builder).unwrap(); + + let binding = builder.finish(); + let result = binding.value(0); + let expected = " 1.10 2.20 3.30 \n 4.40 5.50 6.61 \n"; + assert_eq!(result, expected); + } +} diff --git a/rust/sedona-raster/src/lib.rs b/rust/sedona-raster/src/lib.rs new file mode 100644 index 00000000..d5e19990 --- /dev/null +++ b/rust/sedona-raster/src/lib.rs @@ -0,0 +1,2 @@ +pub mod datatype_functions; +pub mod display_functions; diff --git a/rust/sedona-schema/Cargo.toml b/rust/sedona-schema/Cargo.toml index 223989df..11d26f38 100644 --- a/rust/sedona-schema/Cargo.toml +++ b/rust/sedona-schema/Cargo.toml @@ -28,6 +28,7 @@ rust-version.workspace = true result_large_err = "allow" [dependencies] +arrow = { workspace = true } arrow-schema = { workspace = true } arrow-array = { workspace = true } datafusion-common = { workspace = true } diff --git a/rust/sedona-schema/src/datatypes.rs b/rust/sedona-schema/src/datatypes.rs index 254ca254..e99912d0 100644 --- a/rust/sedona-schema/src/datatypes.rs +++ b/rust/sedona-schema/src/datatypes.rs @@ -14,11 +14,21 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use arrow_schema::{DataType, Field}; +use arrow::buffer::MutableBuffer; +use arrow_array::{ + builder::{ + BinaryBuilder, Float64Builder, ListBuilder, StringBuilder, StructBuilder, UInt32Builder, + UInt64Builder, + }, + Array, BinaryArray, Float64Array, ListArray, StringArray, StructArray, UInt32Array, + UInt64Array, +}; +use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields}; use datafusion_common::error::{DataFusionError, Result}; use sedona_common::sedona_internal_err; use serde_json::Value; use std::fmt::{Debug, Display}; +use std::sync::LazyLock; use crate::crs::{deserialize_crs, Crs}; use crate::extension_type::ExtensionType; @@ -29,6 +39,7 @@ pub enum SedonaType { Arrow(DataType), Wkb(Edges, Crs), WkbView(Edges, Crs), + Raster(RasterSchema), } impl From for SedonaType { @@ -72,7 +83,15 @@ pub const WKB_GEOGRAPHY: SedonaType = SedonaType::Wkb(Edges::Spherical, Crs::Non /// See [`WKB_GEOGRAPHY`] pub const WKB_VIEW_GEOGRAPHY: SedonaType = SedonaType::WkbView(Edges::Spherical, Crs::None); -// Implementation details +/// Sentinel for [`Sedona::RasterSchema`] +/// +/// The CRS is stored within the raster schema. +pub const RASTER: SedonaType = SedonaType::Raster(RasterSchema); + +/// Create a static value for the [`SedonaType::Raster`] that's initialized exactly once, +/// on first access +static RASTER_DATATYPE: LazyLock = + LazyLock::new(|| DataType::Struct(RasterSchema::fields())); impl SedonaType { /// Given a field as it would appear in an external Schema return the appropriate SedonaType @@ -85,9 +104,12 @@ impl SedonaType { /// Given an [`ExtensionType`], construct a SedonaType pub fn from_extension_type(extension: ExtensionType) -> Result { - let (edges, crs) = deserialize_edges_and_crs(&extension.extension_metadata)?; if extension.extension_name == "geoarrow.wkb" { + let (edges, crs) = deserialize_edges_and_crs(&extension.extension_metadata)?; sedona_type_wkb(edges, crs, extension.storage_type) + } else if extension.extension_name == "sedona.raster" { + // For raster extension types, return the RASTER constant + Ok(RASTER) } else { sedona_internal_err!( "Extension type not implemented: <{}>:{}", @@ -111,6 +133,7 @@ impl SedonaType { SedonaType::Arrow(data_type) => data_type, SedonaType::Wkb(_, _) => &DataType::Binary, SedonaType::WkbView(_, _) => &DataType::BinaryView, + SedonaType::Raster(_) => &RASTER_DATATYPE, } } @@ -119,6 +142,7 @@ impl SedonaType { match self { SedonaType::Arrow(_) => None, SedonaType::Wkb(_, _) | SedonaType::WkbView(_, _) => Some("geoarrow.wkb"), + SedonaType::Raster(_) => Some("sedona.raster"), } } @@ -132,6 +156,11 @@ impl SedonaType { Some(serialize_edges_and_crs(edges, crs)), )) } + SedonaType::Raster(_) => Some(ExtensionType::new( + self.extension_name().unwrap(), + self.storage_type().clone(), + None, + )), _ => None, } } @@ -179,6 +208,7 @@ impl SedonaType { } } }, + SedonaType::Raster(_) => "raster".to_string(), } } @@ -195,6 +225,7 @@ impl SedonaType { (SedonaType::WkbView(edges, _), SedonaType::WkbView(other_edges, _)) => { edges == other_edges } + (SedonaType::Raster(_), SedonaType::Raster(_)) => true, _ => false, } } @@ -208,6 +239,7 @@ impl Display for SedonaType { SedonaType::Arrow(data_type) => Display::fmt(data_type, f), SedonaType::Wkb(edges, crs) => display_geometry("Wkb", edges, crs, f), SedonaType::WkbView(edges, crs) => display_geometry("WkbView", edges, crs, f), + SedonaType::Raster(_) => write!(f, "Raster"), } } } @@ -333,6 +365,1241 @@ fn deserialize_edges(edges: &Value) -> Result { } } +/// Schema for storing raster data in Apache Arrow format. +/// Utilizing nested structs and lists to represent raster metadata and bands. +#[derive(Debug, PartialEq, Clone)] +pub struct RasterSchema; +impl RasterSchema { + // Raster schema: + pub fn fields() -> Fields { + Fields::from(vec![ + Field::new(column::METADATA, Self::metadata_type(), false), + Field::new(column::CRS, Self::crs_type(), true), + Field::new(column::BBOX, Self::bounding_box_type(), true), + Field::new(column::BANDS, Self::bands_type(), true), + ]) + } + + /// Raster metadata schema + pub fn metadata_type() -> DataType { + DataType::Struct(Fields::from(vec![ + // Raster dimensions + Field::new(column::WIDTH, DataType::UInt64, false), + Field::new(column::HEIGHT, DataType::UInt64, false), + // Geospatial transformation parameters + Field::new(column::UPPERLEFT_X, DataType::Float64, false), + Field::new(column::UPPERLEFT_Y, DataType::Float64, false), + Field::new(column::SCALE_X, DataType::Float64, false), + Field::new(column::SCALE_Y, DataType::Float64, false), + Field::new(column::SKEW_X, DataType::Float64, false), + Field::new(column::SKEW_Y, DataType::Float64, false), + ])) + } + + /// Bounding box schema + pub fn bounding_box_type() -> DataType { + DataType::Struct(Fields::from(vec![ + Field::new(column::MIN_X, DataType::Float64, false), + Field::new(column::MIN_Y, DataType::Float64, false), + Field::new(column::MAX_X, DataType::Float64, false), + Field::new(column::MAX_Y, DataType::Float64, false), + ])) + } + + /// Bands list schema + pub fn bands_type() -> DataType { + DataType::List(FieldRef::new(Field::new( + column::BAND, + Self::band_type(), + false, + ))) + } + + /// Individual band schema + pub fn band_type() -> DataType { + DataType::Struct(Fields::from(vec![ + Field::new(column::METADATA, Self::band_metadata_type(), false), + Field::new(column::DATA, Self::band_data_type(), false), + ])) + } + + /// Band metadata schema + pub fn band_metadata_type() -> DataType { + DataType::Struct(Fields::from(vec![ + Field::new(column::NODATAVALUE, DataType::Binary, true), // Allow null nodata values + Field::new(column::STORAGE_TYPE, DataType::UInt32, false), + Field::new(column::DATATYPE, DataType::UInt32, false), + // OutDb reference fields - only used when storage_type == OutDbRef + Field::new(column::OUTDB_URL, DataType::Utf8, true), + Field::new(column::OUTDB_BAND_ID, DataType::UInt32, true), + ])) + } + + /// Band data schema (single binary blob) + pub fn band_data_type() -> DataType { + DataType::Binary // consider switching to BinaryView + } + + /// CRS schema to store json representation + pub fn crs_type() -> DataType { + DataType::Utf8 // TODO: Consider Utf8View + } +} + +#[repr(u16)] +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum BandDataType { + UInt8 = 0, + UInt16 = 1, + Int16 = 2, + UInt32 = 3, + Int32 = 4, + Float32 = 5, + Float64 = 6, + // Consider support for complex types for scientific data +} + +/// Storage strategy for raster band data within Apache Arrow arrays. +/// +/// This enum defines how raster data is physically stored and accessed: +/// +/// **InDb**: Raster data is embedded directly in the Arrow array as binary blobs. +/// - Self-contained, no external dependencies, fast access for small-medium rasters +/// - Increases Arrow array size, memory usage grows and copy times increase with raster size +/// - Best for: Tiles, thumbnails, processed results, small rasters (<10MB per band) +/// +/// **OutDbRef**: Raster data is stored externally with references in the Arrow array. +/// - Keeps Arrow arrays lightweight, supports massive rasters, enables lazy loading +/// - Requires external storage management, potential for broken references +/// - Best for: Large satellite imagery, time series data, cloud-native workflows +/// - Supported backends: S3, GCS, Azure Blob, local filesystem, HTTP endpoints +#[repr(u16)] +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum StorageType { + InDb = 0, + OutDbRef = 1, +} + +/// Builder for constructing raster arrays with zero-copy band data writing +pub struct RasterBuilder { + main_builder: StructBuilder, +} + +impl RasterBuilder { + /// Create a new raster builder with the specified capacity + pub fn new(capacity: usize) -> Self { + // Create individual builders that we know work + let metadata_builder = StructBuilder::from_fields( + match RasterSchema::metadata_type() { + DataType::Struct(fields) => fields, + _ => panic!("Expected struct type for metadata"), + }, + capacity, + ); + + let crs_builder = StringBuilder::new(); + + let bbox_builder = StructBuilder::from_fields( + match RasterSchema::bounding_box_type() { + DataType::Struct(fields) => fields, + _ => panic!("Expected struct type for bounding box"), + }, + capacity, + ); + + let band_struct_builder = StructBuilder::from_fields( + match RasterSchema::band_type() { + DataType::Struct(fields) => fields, + _ => panic!("Expected struct type for band"), + }, + 0, + ); + + let bands_builder = ListBuilder::new(band_struct_builder).with_field(Field::new( + column::BAND, + RasterSchema::band_type(), + false, + )); + + // Now create the main builder with pre-built components + let main_builder = StructBuilder::new( + RasterSchema::fields(), + vec![ + Box::new(metadata_builder), + Box::new(crs_builder), + Box::new(bbox_builder), + Box::new(bands_builder), + ], + ); + + Self { main_builder } + } + + /// Start a new raster with metadata, optional CRS, and optional bounding box + /// + /// This is the unified method for starting a raster with all optional parameters. + /// + /// # Arguments + /// * `metadata` - Raster metadata (dimensions, geotransform parameters) + /// * `crs` - Optional coordinate reference system as string + /// * `bbox` - Optional bounding box coordinates + /// + /// # Examples + /// // From iterator - copy all fields from existing raster + /// builder.start_raster(raster.metadata(), raster.crs(), raster.bounding_box(0).as_ref())?; + /// + /// // From RasterMetadata struct with all fields + /// builder.start_raster(&metadata, Some("EPSG:4326"), metadata.bounding_box.as_ref())?; + /// + /// // Minimal - just metadata + /// builder.start_raster(&metadata, None, None)?; + /// ``` + pub fn start_raster( + &mut self, + metadata: &dyn MetadataRef, + crs: Option<&str>, + bbox: Option<&BoundingBox>, + ) -> Result<(), ArrowError> { + self.append_metadata_from_ref(metadata)?; + self.set_crs(crs)?; + self.append_bounding_box(bbox)?; + Ok(()) + } + + /// Get direct access to the BinaryBuilder for writing the current band's data + pub fn band_data_writer(&mut self) -> &mut BinaryBuilder { + let bands_builder = self + .main_builder + .field_builder::>(raster_indices::BANDS) + .unwrap(); + let band_builder = bands_builder.values(); + // Ensure we have at least one field (band metadata and data) + // Field 0 = metadata (StructBuilder), Field 1 = data (BinaryBuilder) + band_builder.field_builder::(1).unwrap() + } + + /// Create a MutableBuffer that can be written to directly + pub fn create_band_buffer( + &mut self, + capacity: usize, + ) -> (MutableBuffer, impl FnOnce(MutableBuffer) + '_) { + let mut buffer = MutableBuffer::with_capacity(capacity); + + // Pre-allocate the buffer to the exact size + buffer.resize(capacity, 0); + + let commit = move |buffer: MutableBuffer| { + // Convert MutableBuffer to &[u8] and append to BinaryBuilder + let data = buffer.as_slice(); + self.band_data_writer().append_value(data); + }; + + (buffer, commit) + } + + /// Alternative: Get a mutable slice from a MutableBuffer for GDAL + /// This provides the most direct access for zero-copy operations + /// TODO: have this 3 different way.... pick one!! + pub fn get_band_buffer_slice(&mut self, size: usize) -> (MutableBuffer, &mut [u8]) { + let mut buffer = MutableBuffer::with_capacity(size); + buffer.resize(size, 0); + + // Get mutable slice that GDAL can write to + let slice = unsafe { + // This is safe because we just allocated the buffer with the exact size + std::slice::from_raw_parts_mut(buffer.as_mut_ptr(), size) + }; + + (buffer, slice) + } + + /// Commit a MutableBuffer to the band data + pub fn commit_band_buffer(&mut self, buffer: MutableBuffer) { + let data = buffer.as_slice(); + self.band_data_writer().append_value(data); + } + + /// Finish writing the current band with its metadata + /// TODO: The band_metadata is in the finish in the band call, but in the + /// start in the raster call. Make it consistent. + pub fn finish_band(&mut self, band_metadata: BandMetadata) -> Result<(), ArrowError> { + let bands_builder = self + .main_builder + .field_builder::>(raster_indices::BANDS) + .unwrap(); + let band_builder = bands_builder.values(); + + let metadata_builder = band_builder.field_builder::(0).unwrap(); + + if let Some(nodata) = band_metadata.nodata_value { + metadata_builder + .field_builder::(band_metadata_indices::NODATAVALUE) + .unwrap() + .append_value(&nodata); + } else { + metadata_builder + .field_builder::(band_metadata_indices::NODATAVALUE) + .unwrap() + .append_null(); + } + + metadata_builder + .field_builder::(band_metadata_indices::STORAGE_TYPE) + .unwrap() + .append_value(band_metadata.storage_type as u32); + + metadata_builder + .field_builder::(band_metadata_indices::DATATYPE) + .unwrap() + .append_value(band_metadata.datatype as u32); + + // Handle OutDb URL + if let Some(url) = band_metadata.outdb_url { + metadata_builder + .field_builder::(band_metadata_indices::OUTDB_URL) + .unwrap() + .append_value(&url); + } else { + metadata_builder + .field_builder::(band_metadata_indices::OUTDB_URL) + .unwrap() + .append_null(); + } + + // Handle OutDb band ID + if let Some(band_id) = band_metadata.outdb_band_id { + metadata_builder + .field_builder::(band_metadata_indices::OUTDB_BAND_ID) + .unwrap() + .append_value(band_id); + } else { + metadata_builder + .field_builder::(band_metadata_indices::OUTDB_BAND_ID) + .unwrap() + .append_null(); + } + + metadata_builder.append(true); + + // Finish the band + band_builder.append(true); + Ok(()) + } + + /// Finish all bands for the current raster + pub fn finish_raster(&mut self) -> Result<(), ArrowError> { + let bands_builder = self + .main_builder + .field_builder::>(raster_indices::BANDS) + .unwrap(); + bands_builder.append(true); + // Mark this raster as valid (not null) in the main struct + self.main_builder.append(true); + Ok(()) + } + + /// Append raster metadata from a MetadataRef trait object + fn append_metadata_from_ref(&mut self, metadata: &dyn MetadataRef) -> Result<(), ArrowError> { + let metadata_builder = self + .main_builder + .field_builder::(raster_indices::METADATA) + .unwrap(); + + // Width + metadata_builder + .field_builder::(metadata_indices::WIDTH) + .unwrap() + .append_value(metadata.width()); + + // Height + metadata_builder + .field_builder::(metadata_indices::HEIGHT) + .unwrap() + .append_value(metadata.height()); + + // Geotransform parameters + metadata_builder + .field_builder::(metadata_indices::UPPERLEFT_X) + .unwrap() + .append_value(metadata.upper_left_x()); + + metadata_builder + .field_builder::(metadata_indices::UPPERLEFT_Y) + .unwrap() + .append_value(metadata.upper_left_y()); + + metadata_builder + .field_builder::(metadata_indices::SCALE_X) + .unwrap() + .append_value(metadata.scale_x()); + + metadata_builder + .field_builder::(metadata_indices::SCALE_Y) + .unwrap() + .append_value(metadata.scale_y()); + + metadata_builder + .field_builder::(metadata_indices::SKEW_X) + .unwrap() + .append_value(metadata.skew_x()); + + metadata_builder + .field_builder::(metadata_indices::SKEW_Y) + .unwrap() + .append_value(metadata.skew_y()); + + metadata_builder.append(true); + + Ok(()) + } + + /// Set the CRS for the current raster + pub fn set_crs(&mut self, crs: Option<&str>) -> Result<(), ArrowError> { + let crs_builder = self + .main_builder + .field_builder::(raster_indices::CRS) + .unwrap(); + match crs { + Some(crs_data) => crs_builder.append_value(crs_data), + None => crs_builder.append_null(), + } + Ok(()) + } + + /// Append a bounding box to the current raster + pub fn append_bounding_box(&mut self, bbox: Option<&BoundingBox>) -> Result<(), ArrowError> { + let bbox_builder = self + .main_builder + .field_builder::(raster_indices::BBOX) + .unwrap(); + + if let Some(bbox) = bbox { + bbox_builder + .field_builder::(bounding_box_indices::MIN_X) + .unwrap() + .append_value(bbox.min_x); + + bbox_builder + .field_builder::(bounding_box_indices::MIN_Y) + .unwrap() + .append_value(bbox.min_y); + + bbox_builder + .field_builder::(bounding_box_indices::MAX_X) + .unwrap() + .append_value(bbox.max_x); + + bbox_builder + .field_builder::(bounding_box_indices::MAX_Y) + .unwrap() + .append_value(bbox.max_y); + + bbox_builder.append(true); + } else { + // Append null bounding box - need to fill in null values for all fields + bbox_builder + .field_builder::(bounding_box_indices::MIN_X) + .unwrap() + .append_null(); + + bbox_builder + .field_builder::(bounding_box_indices::MIN_Y) + .unwrap() + .append_null(); + + bbox_builder + .field_builder::(bounding_box_indices::MAX_X) + .unwrap() + .append_null(); + + bbox_builder + .field_builder::(bounding_box_indices::MAX_Y) + .unwrap() + .append_null(); + + bbox_builder.append(false); + } + Ok(()) + } + + /// Append a null raster + pub fn append_null(&mut self) -> Result<(), ArrowError> { + // Since metadata fields are non-nullable, provide default values + let metadata_builder = self + .main_builder + .field_builder::(raster_indices::METADATA) + .unwrap(); + + metadata_builder + .field_builder::(metadata_indices::WIDTH) + .unwrap() + .append_value(0u64); + + metadata_builder + .field_builder::(metadata_indices::HEIGHT) + .unwrap() + .append_value(0u64); + + metadata_builder + .field_builder::(metadata_indices::UPPERLEFT_X) + .unwrap() + .append_value(0.0f64); + + metadata_builder + .field_builder::(metadata_indices::UPPERLEFT_Y) + .unwrap() + .append_value(0.0f64); + + metadata_builder + .field_builder::(metadata_indices::SCALE_X) + .unwrap() + .append_value(0.0f64); + + metadata_builder + .field_builder::(metadata_indices::SCALE_Y) + .unwrap() + .append_value(0.0f64); + + metadata_builder + .field_builder::(metadata_indices::SKEW_X) + .unwrap() + .append_value(0.0f64); + + metadata_builder + .field_builder::(metadata_indices::SKEW_Y) + .unwrap() + .append_value(0.0f64); + + // Mark the metadata struct as valid since it has valid values + metadata_builder.append(true); + + // Append null CRS (now using StringBuilder instead of StringViewBuilder) + let crs_builder = self + .main_builder + .field_builder::(raster_indices::CRS) + .unwrap(); + crs_builder.append_null(); + + // Append null bounding box + self.append_bounding_box(None)?; + + // Append null bands + let bands_builder = self + .main_builder + .field_builder::>(raster_indices::BANDS) + .unwrap(); + bands_builder.append(false); + + // Mark this raster as null in the main struct + self.main_builder.append(false); + + Ok(()) + } + + /// Finish building and return the constructed StructArray + pub fn finish(mut self) -> Result { + Ok(self.main_builder.finish()) + } +} + +/// Convenience wrapper for the zero-copy band writing approach +impl RasterBuilder { + /// High-level method that allows for zero-copy with a callback approach + pub fn append_raster_with_callback( + &mut self, + metadata: RasterMetadata, + band_count: usize, + mut write_bands: F, + ) -> Result<(), ArrowError> + where + F: FnMut(usize, &mut BinaryBuilder) -> Result, + { + self.start_raster(&metadata, None, metadata.bounding_box.as_ref())?; + + for band_index in 0..band_count { + let band_metadata = { + let binary_builder = self.band_data_writer(); + write_bands(band_index, binary_builder)? + }; + self.finish_band(band_metadata)?; + } + + self.finish_raster()?; + Ok(()) + } +} + +/// Iterator and accessor traits for reading raster data from Arrow arrays. +/// +/// These traits provide a zero-copy interface for accessing raster metadata and band data +/// from the Arrow-based storage format. The implementation handles both InDb and OutDbRef +/// storage types seamlessly. + +/// Trait for accessing raster metadata (dimensions, geotransform, bounding box, etc.) +pub trait MetadataRef { + /// Width of the raster in pixels (using u64 to match schema) + fn width(&self) -> u64; + /// Height of the raster in pixels (using u64 to match schema) + fn height(&self) -> u64; + /// X coordinate of the upper-left corner + fn upper_left_x(&self) -> f64; + /// Y coordinate of the upper-left corner + fn upper_left_y(&self) -> f64; + /// X-direction pixel size (scale) + fn scale_x(&self) -> f64; + /// Y-direction pixel size (scale) + fn scale_y(&self) -> f64; + /// X-direction skew/rotation + fn skew_x(&self) -> f64; + /// Y-direction skew/rotation + fn skew_y(&self) -> f64; +} + +/// Implement MetadataRef for RasterMetadata to allow direct use with builder +impl MetadataRef for RasterMetadata { + fn width(&self) -> u64 { + self.width + } + fn height(&self) -> u64 { + self.height + } + fn upper_left_x(&self) -> f64 { + self.upperleft_x + } + fn upper_left_y(&self) -> f64 { + self.upperleft_y + } + fn scale_x(&self) -> f64 { + self.scale_x + } + fn scale_y(&self) -> f64 { + self.scale_y + } + fn skew_x(&self) -> f64 { + self.skew_x + } + fn skew_y(&self) -> f64 { + self.skew_y + } +} + +/// Trait for accessing individual band metadata +pub trait BandMetadataRef { + /// No-data value as raw bytes (None if null) + fn nodata_value(&self) -> Option<&[u8]>; + /// Storage type (InDb, OutDbRef, etc) + fn storage_type(&self) -> StorageType; + /// Band data type (UInt8, Float32, etc.) + fn data_type(&self) -> BandDataType; + /// OutDb URL (only used when storage_type == OutDbRef) + fn outdb_url(&self) -> Option<&str>; + /// OutDb band ID (only used when storage_type == OutDbRef) + fn outdb_band_id(&self) -> Option; +} + +/// Trait for accessing individual band data +pub trait BandRef { + /// Band metadata accessor + fn metadata(&self) -> &dyn BandMetadataRef; + /// Raw band data as bytes (zero-copy access) + fn data(&self) -> &[u8]; +} + +/// Trait for accessing all bands in a raster +pub trait BandsRef { + /// Number of bands in the raster + fn len(&self) -> usize; + /// Check if no bands are present + fn is_empty(&self) -> bool { + self.len() == 0 + } + /// Get a specific band by number (returns None if out of bounds) + /// By convention, band numbers are 1-based + fn band(&self, number: usize) -> Option>; + /// Iterator over all bands + fn iter(&self) -> BandIterator<'_>; +} + +/// Trait for accessing complete raster data +pub trait RasterRef { + /// Raster metadata accessor + fn metadata(&self) -> &dyn MetadataRef; + /// CRS accessor + fn crs(&self) -> Option<&str>; + /// Bands accessor + fn bands(&self) -> &dyn BandsRef; +} + +/// Implementation of MetadataRef for Arrow StructArray +struct MetadataRefImpl<'a> { + metadata_struct: &'a StructArray, + index: usize, +} + +impl<'a> MetadataRef for MetadataRefImpl<'a> { + fn width(&self) -> u64 { + self.metadata_struct + .column(metadata_indices::WIDTH) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn height(&self) -> u64 { + self.metadata_struct + .column(metadata_indices::HEIGHT) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn upper_left_x(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::UPPERLEFT_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn upper_left_y(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::UPPERLEFT_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn scale_x(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::SCALE_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn scale_y(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::SCALE_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn skew_x(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::SKEW_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } + + fn skew_y(&self) -> f64 { + self.metadata_struct + .column(metadata_indices::SKEW_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(self.index) + } +} + +/// Implementation of BandMetadataRef for Arrow StructArray +struct BandMetadataRefImpl<'a> { + metadata_struct: &'a StructArray, + band_index: usize, +} + +impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> { + fn nodata_value(&self) -> Option<&[u8]> { + let nodata_array = self + .metadata_struct + .column(band_metadata_indices::NODATAVALUE) + .as_any() + .downcast_ref::() + .expect("Expected BinaryArray for nodata"); + + if nodata_array.is_null(self.band_index) { + None + } else { + Some(nodata_array.value(self.band_index)) + } + } + + fn storage_type(&self) -> StorageType { + let storage_type_array = self + .metadata_struct + .column(band_metadata_indices::STORAGE_TYPE) + .as_any() + .downcast_ref::() + .expect("Expected UInt32Array for storage_type"); + + match storage_type_array.value(self.band_index) { + 0 => StorageType::InDb, + 1 => StorageType::OutDbRef, + _ => panic!( + "Unknown storage type: {}", + storage_type_array.value(self.band_index) + ), + } + } + + fn data_type(&self) -> BandDataType { + let datatype_array = self + .metadata_struct + .column(band_metadata_indices::DATATYPE) + .as_any() + .downcast_ref::() + .expect("Expected UInt32Array for datatype"); + + match datatype_array.value(self.band_index) { + 0 => BandDataType::UInt8, + 1 => BandDataType::UInt16, + 2 => BandDataType::Int16, + 3 => BandDataType::UInt32, + 4 => BandDataType::Int32, + 5 => BandDataType::Float32, + 6 => BandDataType::Float64, + _ => panic!( + "Unknown band data type: {}", + datatype_array.value(self.band_index) + ), + } + } + + fn outdb_url(&self) -> Option<&str> { + let url_array = self + .metadata_struct + .column(band_metadata_indices::OUTDB_URL) + .as_any() + .downcast_ref::() + .expect("Expected StringArray for outdb_url"); + + if url_array.is_null(self.band_index) { + None + } else { + Some(url_array.value(self.band_index)) + } + } + + fn outdb_band_id(&self) -> Option { + let band_id_array = self + .metadata_struct + .column(band_metadata_indices::OUTDB_BAND_ID) + .as_any() + .downcast_ref::() + .expect("Expected UInt32Array for outdb_band_id"); + + if band_id_array.is_null(self.band_index) { + None + } else { + Some(band_id_array.value(self.band_index)) + } + } +} + +/// Implementation of BandRef for accessing individual band data +struct BandRefImpl<'a> { + band_metadata: BandMetadataRefImpl<'a>, + band_data: &'a [u8], +} + +impl<'a> BandRef for BandRefImpl<'a> { + fn metadata(&self) -> &dyn BandMetadataRef { + &self.band_metadata + } + + fn data(&self) -> &[u8] { + self.band_data + } +} + +/// Implementation of BandsRef for accessing all bands in a raster +struct BandsRefImpl<'a> { + bands_list: &'a ListArray, + raster_index: usize, +} + +impl<'a> BandsRef for BandsRefImpl<'a> { + fn len(&self) -> usize { + let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let end = self.bands_list.value_offsets()[self.raster_index + 1] as usize; + end - start + } + + /// Get a specific band by number (1-based index) + fn band(&self, number: usize) -> Option> { + // TODO: Error instead of returning None for out-of-bounds + if number == 0 { + return None; // Band numbers are 1-based + } + // By convention, band numbers are 1-based. + // Convert to zero-based index. + let index = number - 1; + if index >= self.len() { + return None; + } + + let start = self.bands_list.value_offsets()[self.raster_index] as usize; + let band_row = start + index; + + let bands_struct = self + .bands_list + .values() + .as_any() + .downcast_ref::()?; + + // Get the metadata substructure from the band struct + let band_metadata_struct = bands_struct + .column(band_indices::METADATA) + .as_any() + .downcast_ref::()?; + + let band_metadata = BandMetadataRefImpl { + metadata_struct: band_metadata_struct, + band_index: band_row, + }; + + // Get band data from the Binary column within the band struct + let band_data_array = bands_struct + .column(band_indices::DATA) + .as_any() + .downcast_ref::()?; + + let band_data = band_data_array.value(band_row); + + Some(Box::new(BandRefImpl { + band_metadata, + band_data, + })) + } + + fn iter(&self) -> BandIterator<'_> { + BandIterator { + bands: self, + current: 1, // Start at 1 for 1-based band numbering + } + } +} + +/// Iterator for bands within a raster +pub struct BandIterator<'a> { + bands: &'a dyn BandsRef, + current: usize, +} + +impl<'a> Iterator for BandIterator<'a> { + type Item = Box; + + fn next(&mut self) -> Option { + // current is 1-based, compare against len() + 1 + if self.current <= self.bands.len() { + let band = self.bands.band(self.current); + self.current += 1; + band + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + // current is 1-based, so remaining calculation needs adjustment + let remaining = self.bands.len().saturating_sub(self.current - 1); + (remaining, Some(remaining)) + } +} + +impl ExactSizeIterator for BandIterator<'_> {} + +/// Implementation of RasterRef for complete raster access +pub struct RasterRefImpl<'a> { + metadata: MetadataRefImpl<'a>, + crs: &'a StringArray, + bbox: &'a StructArray, + bands: BandsRefImpl<'a>, +} + +impl<'a> RasterRefImpl<'a> { + /// Create a new RasterRefImpl from a struct array and index using hard-coded indices + pub fn new(raster_struct: &'a StructArray, raster_index: usize) -> Self { + let metadata_struct = raster_struct + .column(raster_indices::METADATA) + .as_any() + .downcast_ref::() + .unwrap(); + + let crs = raster_struct + .column(raster_indices::CRS) + .as_any() + .downcast_ref::() + .unwrap(); + + let bbox = raster_struct + .column(raster_indices::BBOX) + .as_any() + .downcast_ref::() + .unwrap(); + + let bands_list = raster_struct + .column(raster_indices::BANDS) + .as_any() + .downcast_ref::() + .unwrap(); + + let metadata = MetadataRefImpl { + metadata_struct, + index: raster_index, + }; + + let bands = BandsRefImpl { + bands_list, + raster_index, + }; + + Self { + metadata, + crs, + bbox, + bands, + } + } + + /// Access the bounding box for this raster + pub fn bounding_box(&self, raster_index: usize) -> Option { + if self.bbox.is_null(raster_index) { + None + } else { + Some(BoundingBox { + min_x: self + .bbox + .column(bounding_box_indices::MIN_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + min_y: self + .bbox + .column(bounding_box_indices::MIN_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + max_x: self + .bbox + .column(bounding_box_indices::MAX_X) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + max_y: self + .bbox + .column(bounding_box_indices::MAX_Y) + .as_any() + .downcast_ref::() + .unwrap() + .value(raster_index), + }) + } + } +} + +impl<'a> RasterRef for RasterRefImpl<'a> { + fn metadata(&self) -> &dyn MetadataRef { + &self.metadata + } + + fn crs(&self) -> Option<&str> { + if self.crs.is_null(self.bands.raster_index) { + None + } else { + Some(&self.crs.value(self.bands.raster_index)) + } + } + + fn bands(&self) -> &dyn BandsRef { + &self.bands + } +} + +/// Iterator over raster structs in an Arrow StructArray +/// +/// This provides efficient, zero-copy access to raster data stored in Arrow format. +/// Each iteration yields a `RasterRefImpl` that provides access to both metadata and band data. +pub struct RasterStructIterator<'a> { + raster_array: &'a StructArray, + current_row: usize, +} + +impl<'a> RasterStructIterator<'a> { + /// Create a new iterator over the raster struct array + pub fn new(raster_array: &'a StructArray) -> Self { + Self { + raster_array, + current_row: 0, + } + } + + /// Get the total number of rasters in the array + pub fn len(&self) -> usize { + self.raster_array.len() + } + + /// Check if the array is empty + pub fn is_empty(&self) -> bool { + self.raster_array.is_empty() + } + + /// Get a specific raster by index without consuming the iterator + pub fn get(&self, index: usize) -> Option> { + if index >= self.raster_array.len() { + return None; + } + + Some(RasterRefImpl::new(self.raster_array, index)) + } +} + +impl<'a> Iterator for RasterStructIterator<'a> { + type Item = RasterRefImpl<'a>; + + fn next(&mut self) -> Option { + if self.current_row < self.raster_array.len() { + let result = self.get(self.current_row)?; + self.current_row += 1; + Some(result) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = self.raster_array.len().saturating_sub(self.current_row); + (remaining, Some(remaining)) + } +} + +impl ExactSizeIterator for RasterStructIterator<'_> {} + +/// Convenience constructor function for creating a raster iterator +pub fn raster_iterator(raster_struct: &StructArray) -> RasterStructIterator<'_> { + RasterStructIterator::new(raster_struct) +} + +/// Metadata for a raster +#[derive(Debug, Clone)] +pub struct RasterMetadata { + pub width: u64, + pub height: u64, + pub upperleft_x: f64, + pub upperleft_y: f64, + pub scale_x: f64, + pub scale_y: f64, + pub skew_x: f64, + pub skew_y: f64, + pub bounding_box: Option, +} + +/// Bounding box coordinates +#[derive(Debug, Clone)] +pub struct BoundingBox { + pub min_x: f64, + pub min_y: f64, + pub max_x: f64, + pub max_y: f64, +} + +/// Metadata for a single band +#[derive(Debug, Clone)] +pub struct BandMetadata { + pub nodata_value: Option>, + pub storage_type: StorageType, + pub datatype: BandDataType, + /// URL for OutDb reference (only used when storage_type == OutDbRef) + pub outdb_url: Option, + /// Band ID within the OutDb resource (only used when storage_type == OutDbRef) + pub outdb_band_id: Option, +} + +// Private field column name and index constants +// used across schema, builders and iterators +mod column { + pub const METADATA: &str = "metadata"; + pub const BANDS: &str = "bands"; + pub const BAND: &str = "band"; + pub const DATA: &str = "data"; + + // Raster metadata fields + pub const WIDTH: &str = "width"; + pub const HEIGHT: &str = "height"; + pub const UPPERLEFT_X: &str = "upperleft_x"; + pub const UPPERLEFT_Y: &str = "upperleft_y"; + pub const SCALE_X: &str = "scale_x"; + pub const SCALE_Y: &str = "scale_y"; + pub const SKEW_X: &str = "skew_x"; + pub const SKEW_Y: &str = "skew_y"; + pub const BBOX: &str = "bbox"; + pub const CRS: &str = "crs"; + + // Bounding box fields + pub const MIN_X: &str = "min_x"; + pub const MIN_Y: &str = "min_y"; + pub const MAX_X: &str = "max_x"; + pub const MAX_Y: &str = "max_y"; + + // Band metadata fields + pub const NODATAVALUE: &str = "nodata_value"; + pub const STORAGE_TYPE: &str = "storage_type"; + pub const DATATYPE: &str = "data_type"; + pub const OUTDB_URL: &str = "outdb_url"; + pub const OUTDB_BAND_ID: &str = "outdb_band_id"; +} + +/// Hard-coded column indices for maximum performance +/// These must match the exact order defined in RasterSchema::metadata_type() +mod metadata_indices { + pub const WIDTH: usize = 0; + pub const HEIGHT: usize = 1; + pub const UPPERLEFT_X: usize = 2; + pub const UPPERLEFT_Y: usize = 3; + pub const SCALE_X: usize = 4; + pub const SCALE_Y: usize = 5; + pub const SKEW_X: usize = 6; + pub const SKEW_Y: usize = 7; +} + +mod bounding_box_indices { + pub const MIN_X: usize = 0; + pub const MIN_Y: usize = 1; + pub const MAX_X: usize = 2; + pub const MAX_Y: usize = 3; +} + +mod band_metadata_indices { + pub const NODATAVALUE: usize = 0; + pub const STORAGE_TYPE: usize = 1; + pub const DATATYPE: usize = 2; + pub const OUTDB_URL: usize = 3; + pub const OUTDB_BAND_ID: usize = 4; +} + +mod band_indices { + pub const METADATA: usize = 0; + pub const DATA: usize = 1; +} + +mod raster_indices { + pub const METADATA: usize = 0; + pub const CRS: usize = 1; + pub const BBOX: usize = 2; + pub const BANDS: usize = 3; +} + #[cfg(test)] mod tests { use crate::crs::lnglat; @@ -549,4 +1816,666 @@ mod tests { .message() .contains("Unsupported edges value")); } + + #[test] + fn test_iterator_basic_functionality() { + // Create a simple raster for testing using the correct API + let mut builder = RasterBuilder::new(10); // capacity + + let metadata = RasterMetadata { + width: 10, + height: 10, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: Some(BoundingBox { + min_x: 0.0, + min_y: -10.0, + max_x: 10.0, + max_y: 0.0, + }), + }; + + let epsg4326 = "EPSG:4326"; + builder + .start_raster(&metadata, Some(&epsg4326), metadata.bounding_box.as_ref()) + .unwrap(); + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, + }; + + // Add a single band with some test data using the correct API + let test_data = vec![1u8; 100]; // 10x10 raster with value 1 + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); + let result = builder.finish_raster(); + assert!(result.is_ok()); + + let raster_array = builder.finish().unwrap(); + + // Test the iterator + let mut iterator = raster_iterator(&raster_array); + + assert_eq!(iterator.len(), 1); + assert!(!iterator.is_empty()); + + let raster = iterator.next().unwrap(); + let metadata = raster.metadata(); + + assert_eq!(metadata.width(), 10); + assert_eq!(metadata.height(), 10); + assert_eq!(metadata.scale_x(), 1.0); + assert_eq!(metadata.scale_y(), -1.0); + + let bbox = raster.bounding_box(0).unwrap(); + assert_eq!(bbox.min_x, 0.0); + assert_eq!(bbox.max_x, 10.0); + + let bands = raster.bands(); + assert_eq!(bands.len(), 1); + assert!(!bands.is_empty()); + + // Access band with 1-based band_number + let band = bands.band(1).unwrap(); + assert_eq!(band.data().len(), 100); + assert_eq!(band.data()[0], 1u8); + + let band_meta = band.metadata(); + assert_eq!(band_meta.storage_type(), StorageType::InDb); + assert_eq!(band_meta.data_type(), BandDataType::UInt8); + + let crs = raster.crs().unwrap(); + assert_eq!(crs, epsg4326); + + // Test iterator over bands + let band_iter: Vec<_> = bands.iter().collect(); + assert_eq!(band_iter.len(), 1); + } + + #[test] + fn test_multi_band_iterator() { + let mut builder = RasterBuilder::new(10); + + let metadata = RasterMetadata { + width: 5, + height: 5, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata, None, None).unwrap(); + + // Add three bands using the correct API + for band_idx in 0..3 { + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, + }; + + let test_data = vec![band_idx as u8; 25]; // 5x5 raster + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); + } + + let result = builder.finish_raster(); + assert!(result.is_ok()); + + let raster_array = builder.finish().unwrap(); + + let mut iterator = raster_iterator(&raster_array); + let raster = iterator.next().unwrap(); + let bands = raster.bands(); + + assert_eq!(bands.len(), 3); + + // Test each band has different data + // Use 1-based band numbers + for i in 0..3 { + // Access band with 1-based band_number + let band = bands.band(i + 1).unwrap(); + let expected_value = i as u8; + assert!(band.data().iter().all(|&x| x == expected_value)); + } + + // Test iterator + let band_values: Vec = bands + .iter() + .enumerate() + .map(|(i, band)| { + assert_eq!(band.data()[0], i as u8); + band.data()[0] + }) + .collect(); + + assert_eq!(band_values, vec![0, 1, 2]); + } + + #[test] + fn test_copy_metadata_from_iterator() { + // Create an original raster + let mut source_builder = RasterBuilder::new(10); + + let original_metadata = RasterMetadata { + width: 42, + height: 24, + upperleft_x: -122.0, + upperleft_y: 37.8, + scale_x: 0.1, + scale_y: -0.1, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: Some(BoundingBox { + min_x: -122.0, + min_y: 35.4, + max_x: -120.0, + max_y: 37.8, + }), + }; + + source_builder + .start_raster( + &original_metadata, + None, + original_metadata.bounding_box.as_ref(), + ) + .unwrap(); + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, + }; + + let test_data = vec![42u8; 1008]; // 42x24 raster + source_builder.band_data_writer().append_value(&test_data); + source_builder.finish_band(band_metadata).unwrap(); + source_builder.finish_raster().unwrap(); + + let source_array = source_builder.finish().unwrap(); + + // Now create a new raster using metadata from the iterator - this is the key feature! + let mut target_builder = RasterBuilder::new(10); + let iterator = raster_iterator(&source_array); + let source_raster = iterator.get(0).unwrap(); + + // Use metadata directly from the iterator (zero-copy!) + target_builder + .start_raster( + source_raster.metadata(), + source_raster.crs(), + source_raster.bounding_box(0).as_ref(), + ) + .unwrap(); + + // Add new band data while preserving original metadata + let new_band_metadata = BandMetadata { + nodata_value: None, + storage_type: StorageType::InDb, + datatype: BandDataType::UInt16, + outdb_url: None, + outdb_band_id: None, + }; + + let new_data = vec![100u16; 1008]; // Different data, same dimensions + let new_data_bytes: Vec = new_data.iter().flat_map(|&x| x.to_le_bytes()).collect(); + + target_builder + .band_data_writer() + .append_value(&new_data_bytes); + target_builder.finish_band(new_band_metadata).unwrap(); + target_builder.finish_raster().unwrap(); + + let target_array = target_builder.finish().unwrap(); + + // Verify the metadata was copied correctly + let target_iterator = raster_iterator(&target_array); + let target_raster = target_iterator.get(0).unwrap(); + let target_metadata = target_raster.metadata(); + + // All metadata should match the original + assert_eq!(target_metadata.width(), 42); + assert_eq!(target_metadata.height(), 24); + assert_eq!(target_metadata.upper_left_x(), -122.0); + assert_eq!(target_metadata.upper_left_y(), 37.8); + assert_eq!(target_metadata.scale_x(), 0.1); + assert_eq!(target_metadata.scale_y(), -0.1); + + let target_bbox = target_raster.bounding_box(0).unwrap(); + assert_eq!(target_bbox.min_x, -122.0); + assert_eq!(target_bbox.max_x, -120.0); + + // But band data and metadata should be different + let target_band = target_raster.bands().band(1).unwrap(); + let target_band_meta = target_band.metadata(); + assert_eq!(target_band_meta.data_type(), BandDataType::UInt16); + assert!(target_band_meta.nodata_value().is_none()); + assert_eq!(target_band.data().len(), 2016); // 1008 * 2 bytes per u16 + } + + #[test] + fn test_random_access() { + let mut builder = RasterBuilder::new(10); + + // Add multiple rasters + for raster_idx in 0..3 { + let metadata = RasterMetadata { + width: raster_idx as u64 + 1, + height: raster_idx as u64 + 1, + upperleft_x: raster_idx as f64, + upperleft_y: raster_idx as f64, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata, None, None).unwrap(); + + let band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, + }; + + let size = (raster_idx + 1) * (raster_idx + 1); + let test_data = vec![raster_idx as u8; size]; + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); + let result = builder.finish_raster(); + assert!(result.is_ok()); + } + + let raster_array = builder.finish().unwrap(); + let iterator = raster_iterator(&raster_array); + + assert_eq!(iterator.len(), 3); + + // Test random access + let raster_2 = iterator.get(2).unwrap(); + assert_eq!(raster_2.metadata().width(), 3); + assert_eq!(raster_2.metadata().height(), 3); + assert_eq!(raster_2.metadata().upper_left_x(), 2.0); + + // Access band data with 1-based band_number + let band = raster_2.bands().band(1).unwrap(); + assert_eq!(band.data().len(), 9); + assert!(band.data().iter().all(|&x| x == 2u8)); + + // Test out of bounds + assert!(iterator.get(10).is_none()); + } + + /// Comprehensive test to verify all hard-coded indices match the actual schema + #[test] + fn test_hardcoded_indices_match_schema() { + // Test raster-level indices + let raster_fields = RasterSchema::fields(); + assert_eq!(raster_fields.len(), 4, "Expected exactly 4 raster fields"); + assert_eq!( + raster_fields[raster_indices::METADATA].name(), + column::METADATA, + "Raster metadata index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::CRS].name(), + column::CRS, + "Raster CRS index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::BBOX].name(), + column::BBOX, + "Raster BBOX index mismatch" + ); + assert_eq!( + raster_fields[raster_indices::BANDS].name(), + column::BANDS, + "Raster bands index mismatch" + ); + + // Test metadata indices + let metadata_type = RasterSchema::metadata_type(); + if let DataType::Struct(metadata_fields) = metadata_type { + assert_eq!( + metadata_fields.len(), + 8, + "Expected exactly 8 metadata fields" + ); + assert_eq!( + metadata_fields[metadata_indices::WIDTH].name(), + column::WIDTH, + "Metadata width index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::HEIGHT].name(), + column::HEIGHT, + "Metadata height index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::UPPERLEFT_X].name(), + column::UPPERLEFT_X, + "Metadata upperleft_x index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::UPPERLEFT_Y].name(), + column::UPPERLEFT_Y, + "Metadata upperleft_y index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SCALE_X].name(), + column::SCALE_X, + "Metadata scale_x index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SCALE_Y].name(), + column::SCALE_Y, + "Metadata scale_y index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SKEW_X].name(), + column::SKEW_X, + "Metadata skew_x index mismatch" + ); + assert_eq!( + metadata_fields[metadata_indices::SKEW_Y].name(), + column::SKEW_Y, + "Metadata skew_y index mismatch" + ); + } else { + panic!("Expected Struct type for metadata"); + } + + // Test bounding box indices + let bbox_type = RasterSchema::bounding_box_type(); + if let DataType::Struct(bbox_fields) = bbox_type { + assert_eq!( + bbox_fields.len(), + 4, + "Expected exactly 4 bounding box fields" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MIN_X].name(), + column::MIN_X, + "Bounding box min_x index mismatch" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MIN_Y].name(), + column::MIN_Y, + "Bounding box min_y index mismatch" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MAX_X].name(), + column::MAX_X, + "Bounding box max_x index mismatch" + ); + assert_eq!( + bbox_fields[bounding_box_indices::MAX_Y].name(), + column::MAX_Y, + "Bounding box max_y index mismatch" + ); + } else { + panic!("Expected Struct type for bounding box"); + } + + // Test band metadata indices + let band_metadata_type = RasterSchema::band_metadata_type(); + if let DataType::Struct(band_metadata_fields) = band_metadata_type { + assert_eq!( + band_metadata_fields.len(), + 5, + "Expected exactly 5 band metadata fields" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::NODATAVALUE].name(), + column::NODATAVALUE, + "Band metadata nodatavalue index mismatch" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::STORAGE_TYPE].name(), + column::STORAGE_TYPE, + "Band metadata storage_type index mismatch" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::DATATYPE].name(), + column::DATATYPE, + "Band metadata datatype index mismatch" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::OUTDB_URL].name(), + column::OUTDB_URL, + "Band metadata outdb_url index mismatch" + ); + assert_eq!( + band_metadata_fields[band_metadata_indices::OUTDB_BAND_ID].name(), + column::OUTDB_BAND_ID, + "Band metadata outdb_band_id index mismatch" + ); + } else { + panic!("Expected Struct type for band metadata"); + } + + // Test band indices + let band_type = RasterSchema::band_type(); + if let DataType::Struct(band_fields) = band_type { + assert_eq!(band_fields.len(), 2, "Expected exactly 2 band fields"); + assert_eq!( + band_fields[band_indices::METADATA].name(), + column::METADATA, + "Band metadata index mismatch" + ); + assert_eq!( + band_fields[band_indices::DATA].name(), + column::DATA, + "Band data index mismatch" + ); + } else { + panic!("Expected Struct type for band"); + } + } + + #[test] + fn test_band_data_type_conversion() { + // Create a test raster with bands of different data types + let mut builder = RasterBuilder::new(10); + + let metadata = RasterMetadata { + width: 2, + height: 2, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata, None, None).unwrap(); + + // Test all BandDataType variants + let test_cases = vec![ + (BandDataType::UInt8, vec![1u8, 2u8, 3u8, 4u8]), + ( + BandDataType::UInt16, + vec![1u8, 0u8, 2u8, 0u8, 3u8, 0u8, 4u8, 0u8], + ), // little-endian u16 + ( + BandDataType::Int16, + vec![255u8, 255u8, 254u8, 255u8, 253u8, 255u8, 252u8, 255u8], + ), // little-endian i16 + ( + BandDataType::UInt32, + vec![ + 1u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8, 0u8, 3u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8, + ], + ), // little-endian u32 + ( + BandDataType::Int32, + vec![ + 255u8, 255u8, 255u8, 255u8, 254u8, 255u8, 255u8, 255u8, 253u8, 255u8, 255u8, + 255u8, 252u8, 255u8, 255u8, 255u8, + ], + ), // little-endian i32 + ( + BandDataType::Float32, + vec![ + 0u8, 0u8, 128u8, 63u8, 0u8, 0u8, 0u8, 64u8, 0u8, 0u8, 64u8, 64u8, 0u8, 0u8, + 128u8, 64u8, + ], + ), // little-endian f32: 1.0, 2.0, 3.0, 4.0 + ( + BandDataType::Float64, + vec![ + 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 240u8, 63u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, + 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 8u8, 64u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, + 16u8, 64u8, + ], + ), // little-endian f64: 1.0, 2.0, 3.0, 4.0 + ]; + + for (expected_data_type, test_data) in test_cases { + let band_metadata = BandMetadata { + nodata_value: None, + storage_type: StorageType::InDb, + datatype: expected_data_type.clone(), + outdb_url: None, + outdb_band_id: None, + }; + + builder.band_data_writer().append_value(&test_data); + builder.finish_band(band_metadata).unwrap(); + } + + builder.finish_raster().unwrap(); + let raster_array = builder.finish().unwrap(); + + // Test the data type conversion for each band + let iterator = raster_iterator(&raster_array); + let raster = iterator.get(0).unwrap(); + let bands = raster.bands(); + + assert_eq!(bands.len(), 7, "Expected 7 bands for all data types"); + + // Verify each band returns the correct data type + let expected_types = vec![ + BandDataType::UInt8, + BandDataType::UInt16, + BandDataType::Int16, + BandDataType::UInt32, + BandDataType::Int32, + BandDataType::Float32, + BandDataType::Float64, + ]; + + // i is zero-based index + for (i, expected_type) in expected_types.iter().enumerate() { + // Bands are 1-based band_number + let band = bands.band(i + 1).unwrap(); + let band_metadata = band.metadata(); + let actual_type = band_metadata.data_type(); + + assert_eq!( + actual_type, *expected_type, + "Band {} expected data type {:?}, got {:?}", + i, expected_type, actual_type + ); + } + } + + #[test] + fn test_outdb_metadata_fields() { + // Test creating raster with OutDb reference metadata + let mut builder = RasterBuilder::new(10); + + let metadata = RasterMetadata { + width: 1024, + height: 1024, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata, None, None).unwrap(); + + // Test InDb band (should have null OutDb fields) + let indb_band_metadata = BandMetadata { + nodata_value: Some(vec![255u8]), + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, + }; + + let test_data = vec![1u8; 100]; + builder.band_data_writer().append_value(&test_data); + builder.finish_band(indb_band_metadata).unwrap(); + + // Test OutDbRef band (should have OutDb fields populated) + let outdb_band_metadata = BandMetadata { + nodata_value: None, + storage_type: StorageType::OutDbRef, + datatype: BandDataType::Float32, + outdb_url: Some("s3://mybucket/satellite_image.tif".to_string()), + outdb_band_id: Some(2), + }; + + // For OutDbRef, data field could be empty or contain metadata/thumbnail + builder.band_data_writer().append_value(&[]); + builder.finish_band(outdb_band_metadata).unwrap(); + + builder.finish_raster().unwrap(); + let raster_array = builder.finish().unwrap(); + + // Verify the band metadata + let iterator = raster_iterator(&raster_array); + let raster = iterator.get(0).unwrap(); + let bands = raster.bands(); + + assert_eq!(bands.len(), 2); + + // Test InDb band + let indb_band = bands.band(1).unwrap(); + let indb_metadata = indb_band.metadata(); + assert_eq!(indb_metadata.storage_type(), StorageType::InDb); + assert_eq!(indb_metadata.data_type(), BandDataType::UInt8); + assert!(indb_metadata.outdb_url().is_none()); + assert!(indb_metadata.outdb_band_id().is_none()); + assert_eq!(indb_band.data().len(), 100); + + // Test OutDbRef band + let outdb_band = bands.band(2).unwrap(); + let outdb_metadata = outdb_band.metadata(); + assert_eq!(outdb_metadata.storage_type(), StorageType::OutDbRef); + assert_eq!(outdb_metadata.data_type(), BandDataType::Float32); + assert_eq!( + outdb_metadata.outdb_url().unwrap(), + "s3://mybucket/satellite_image.tif" + ); + assert_eq!(outdb_metadata.outdb_band_id().unwrap(), 2); + assert_eq!(outdb_band.data().len(), 0); // Empty data for OutDbRef + } } diff --git a/rust/sedona-schema/src/matchers.rs b/rust/sedona-schema/src/matchers.rs index 57a74ddc..faa39827 100644 --- a/rust/sedona-schema/src/matchers.rs +++ b/rust/sedona-schema/src/matchers.rs @@ -21,7 +21,7 @@ use arrow_schema::DataType; use datafusion_common::{plan_err, Result}; use sedona_common::sedona_internal_err; -use crate::datatypes::{Edges, SedonaType, WKB_GEOGRAPHY, WKB_GEOMETRY}; +use crate::datatypes::{Edges, SedonaType, RASTER, WKB_GEOGRAPHY, WKB_GEOMETRY}; /// Helper to match arguments and compute return types #[derive(Debug)] @@ -170,6 +170,10 @@ impl ArgMatcher { Arc::new(IsGeography {}) } + pub fn is_raster() -> Arc { + Arc::new(IsExact { exact_type: RASTER }) + } + /// Matches a null argument pub fn is_null() -> Arc { Arc::new(IsNull {}) @@ -478,6 +482,10 @@ mod tests { ArgMatcher::is_boolean().type_if_null(), Some(SedonaType::Arrow(DataType::Boolean)) ); + + assert!(ArgMatcher::is_raster().match_type(&RASTER)); + assert!(!ArgMatcher::is_raster().match_type(&SedonaType::Arrow(DataType::Int32))); + assert!(!ArgMatcher::is_raster().match_type(&WKB_GEOMETRY)); } #[test] diff --git a/rust/sedona-testing/src/benchmark_util.rs b/rust/sedona-testing/src/benchmark_util.rs index 6b55848d..8ee45b10 100644 --- a/rust/sedona-testing/src/benchmark_util.rs +++ b/rust/sedona-testing/src/benchmark_util.rs @@ -26,7 +26,7 @@ use rand::{distributions::Uniform, rngs::StdRng, Rng, SeedableRng}; use sedona_common::sedona_internal_err; use sedona_geometry::types::GeometryTypeId; -use sedona_schema::datatypes::{SedonaType, WKB_GEOMETRY}; +use sedona_schema::datatypes::{SedonaType, RASTER, WKB_GEOMETRY}; use crate::{ datagen::RandomPartitionedDataBuilder, @@ -169,6 +169,13 @@ pub enum BenchmarkArgs { ArrayArray(BenchmarkArgSpec, BenchmarkArgSpec), /// Invoke a function with an array and two scalar inputs ArrayScalarScalar(BenchmarkArgSpec, BenchmarkArgSpec, BenchmarkArgSpec), + /// Invoke a function with an array and three scalar inputs + ArrayScalarScalarScalar( + BenchmarkArgSpec, + BenchmarkArgSpec, + BenchmarkArgSpec, + BenchmarkArgSpec, + ), /// Invoke a ternary function with two arrays and a scalar ArrayArrayScalar(BenchmarkArgSpec, BenchmarkArgSpec, BenchmarkArgSpec), /// Invoke a ternary function with three arrays @@ -204,7 +211,8 @@ impl BenchmarkArgs { | BenchmarkArgs::ArrayArrayArrayArray(_, _, _, _) => self.specs(), BenchmarkArgs::ScalarArray(_, col) | BenchmarkArgs::ArrayScalar(col, _) - | BenchmarkArgs::ArrayScalarScalar(col, _, _) => { + | BenchmarkArgs::ArrayScalarScalar(col, _, _) + | BenchmarkArgs::ArrayScalarScalarScalar(col, _, _, _) => { vec![col.clone()] } }; @@ -217,6 +225,9 @@ impl BenchmarkArgs { BenchmarkArgs::ArrayScalarScalar(_, col0, col1) => { vec![col0.clone(), col1.clone()] } + BenchmarkArgs::ArrayScalarScalarScalar(_, col0, col1, col2) => { + vec![col0.clone(), col1.clone(), col2.clone()] + } _ => vec![], }; @@ -253,7 +264,8 @@ impl BenchmarkArgs { | BenchmarkArgs::ArrayArrayArray(col0, col1, col2) => { vec![col0.clone(), col1.clone(), col2.clone()] } - BenchmarkArgs::ArrayArrayArrayArray(col0, col1, col2, col3) => { + BenchmarkArgs::ArrayArrayArrayArray(col0, col1, col2, col3) + | BenchmarkArgs::ArrayScalarScalarScalar(col0, col1, col2, col3) => { vec![col0.clone(), col1.clone(), col2.clone(), col3.clone()] } } @@ -276,11 +288,16 @@ pub enum BenchmarkArgSpec { MultiPoint(usize), /// Randomly generated floating point input with a given range of values Float64(f64, f64), + /// Randomly generated integer input with a given range of values + Int32(i32, i32), /// A transformation of any of the above based on a [ScalarUDF] accepting /// a single argument Transformed(Box, ScalarUDF), /// A string that will be a constant String(String), + /// Randomly generated raster input with a specified width, height and number + // of bands. + Raster(usize, usize, usize), } // Custom implementation of Debug because otherwise the output of Transformed() @@ -293,8 +310,15 @@ impl Debug for BenchmarkArgSpec { Self::Polygon(arg0) => f.debug_tuple("Polygon").field(arg0).finish(), Self::MultiPoint(arg0) => f.debug_tuple("MultiPoint").field(arg0).finish(), Self::Float64(arg0, arg1) => f.debug_tuple("Float64").field(arg0).field(arg1).finish(), + Self::Int32(arg0, arg1) => f.debug_tuple("Int32").field(arg0).field(arg1).finish(), Self::Transformed(inner, t) => write!(f, "{}({:?})", t.name(), inner), Self::String(s) => write!(f, "String({s})"), + Self::Raster(width, height, bands) => f + .debug_tuple("Raster") + .field(width) + .field(height) + .field(bands) + .finish(), } } } @@ -308,11 +332,13 @@ impl BenchmarkArgSpec { | BenchmarkArgSpec::LineString(_) | BenchmarkArgSpec::MultiPoint(_) => WKB_GEOMETRY, BenchmarkArgSpec::Float64(_, _) => SedonaType::Arrow(DataType::Float64), + BenchmarkArgSpec::Int32(_, _) => SedonaType::Arrow(DataType::Int32), BenchmarkArgSpec::Transformed(inner, t) => { let tester = ScalarUdfTester::new(t.clone(), vec![inner.sedona_type()]); tester.return_type().unwrap() } BenchmarkArgSpec::String(_) => SedonaType::Arrow(DataType::Utf8), + BenchmarkArgSpec::Raster(_, _, _) => RASTER, } } @@ -374,6 +400,17 @@ impl BenchmarkArgSpec { }) .collect() } + BenchmarkArgSpec::Int32(lo, hi) => { + let mut rng = self.rng(i); + let dist = Uniform::new(*lo, *hi); + (0..num_batches) + .map(|_| -> Result { + let int32_array: arrow_array::Int32Array = + (0..rows_per_batch).map(|_| rng.sample(dist)).collect(); + Ok(Arc::new(int32_array)) + }) + .collect() + } BenchmarkArgSpec::Transformed(inner, t) => { let inner_type = inner.sedona_type(); let inner_arrays = inner.build_arrays(i, num_batches, rows_per_batch)?; @@ -394,6 +431,15 @@ impl BenchmarkArgSpec { .collect::>>()?; Ok(string_array) } + BenchmarkArgSpec::Raster(width, height, band_count) => self.build_raster( + i, + num_batches, + rows_per_batch, + width, + height, + band_count, + rows_per_batch, + ), } } @@ -426,6 +472,72 @@ impl BenchmarkArgSpec { .collect() } + fn build_raster( + &self, + i: usize, + num_batches: usize, + rows_per_batch: usize, + width: &usize, + height: &usize, + band_count: &usize, + _rows_per_raster: usize, + ) -> Result> { + use rand::distributions::Standard; + use sedona_schema::datatypes::{ + BandDataType, BandMetadata, RasterBuilder, RasterMetadata, StorageType, + }; + + let mut rng = self.rng(i); + let mut batches = Vec::with_capacity(num_batches); + + for _ in 0..num_batches { + let mut builder = RasterBuilder::new(rows_per_batch); + + for _ in 0..rows_per_batch { + // Keep metadata constant across all rasters + let metadata = RasterMetadata { + width: *width as u64, + height: *height as u64, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + bounding_box: None, + }; + + builder.start_raster(&metadata, None, None)?; + + // Generate random data for each band + for _ in 0..*band_count { + let band_metadata = BandMetadata { + nodata_value: None, + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, // Use UInt8 for simplicity + outdb_url: None, + outdb_band_id: None, + }; + + // Generate random pixel data + let num_pixels = width * height; + let pixel_data: Vec = + (0..num_pixels).map(|_| rng.sample(Standard)).collect(); + + builder.band_data_writer().append_value(&pixel_data); + builder.finish_band(band_metadata)?; + } + + builder.finish_raster()?; + } + + let struct_array = builder.finish()?; + batches.push(Arc::new(struct_array) as ArrayRef); + } + + Ok(batches) + } + fn rng(&self, i: usize) -> impl Rng { StdRng::seed_from_u64(42 + i as u64) } @@ -488,6 +600,19 @@ impl BenchmarkData { )?; } } + BenchmarkArgs::ArrayScalarScalarScalar(_, _, _, _) => { + let scalar0 = &self.scalars[0]; + let scalar1 = &self.scalars[1]; + let scalar2 = &self.scalars[2]; + for i in 0..self.num_batches { + tester.invoke_array_scalar_scalar_scalar( + self.arrays[0][i].clone(), + scalar0.clone(), + scalar1.clone(), + scalar2.clone(), + )?; + } + } BenchmarkArgs::ArrayArrayScalar(_, _, _) => { for i in 0..self.num_batches { tester.invoke_array_array_scalar( @@ -701,6 +826,35 @@ mod test { assert_eq!(data.scalars[0].data_type(), DataType::Float64); } + #[test] + fn args_array_scalar_scalar_scalar() { + let spec = BenchmarkArgs::ArrayScalarScalarScalar( + BenchmarkArgSpec::Point, + BenchmarkArgSpec::Float64(1.0, 2.0), + BenchmarkArgSpec::String("test".to_string()), + BenchmarkArgSpec::Int32(1, 10), + ); + assert_eq!( + spec.sedona_types(), + [ + WKB_GEOMETRY, + SedonaType::Arrow(DataType::Float64), + SedonaType::Arrow(DataType::Utf8), + SedonaType::Arrow(DataType::Int32) + ] + ); + + let data = spec.build_data(2, ROWS_PER_BATCH).unwrap(); + assert_eq!(data.num_batches, 2); + assert_eq!(data.arrays.len(), 1); + assert_eq!(data.scalars.len(), 3); + assert_eq!(data.arrays[0].len(), 2); + assert_eq!(WKB_GEOMETRY.storage_type(), data.arrays[0][0].data_type()); + assert_eq!(data.scalars[0].data_type(), DataType::Float64); + assert_eq!(data.scalars[1].data_type(), DataType::Utf8); + assert_eq!(data.scalars[2].data_type(), DataType::Int32); + } + #[test] fn args_scalar_array() { let spec = BenchmarkArgs::ScalarArray( @@ -857,4 +1011,42 @@ mod test { assert_eq!(data.arrays[3].len(), 2); assert_eq!(data.arrays[3][0].data_type(), &DataType::Float64); } + + #[test] + fn test_raster_generation() { + use sedona_schema::datatypes::{raster_iterator, RasterRef, RASTER}; + + let spec = BenchmarkArgs::Array(BenchmarkArgSpec::Raster(10, 5, 3)); + + assert_eq!(spec.sedona_types(), [RASTER]); + + let data = spec.build_data(2, 4).unwrap(); // 2 batches, 4 rasters per batch + assert_eq!(data.num_batches, 2); + assert_eq!(data.arrays.len(), 1); + assert_eq!(data.scalars.len(), 0); + assert_eq!(data.arrays[0].len(), 2); // 2 batches + + // Check that it's a raster type + assert_eq!(data.arrays[0][0].data_type(), RASTER.storage_type()); + + // Check the first batch has the right structure + let first_batch = data.arrays[0][0].clone(); + let raster_array = first_batch + .as_any() + .downcast_ref::() + .unwrap(); + + let iterator = raster_iterator(raster_array); + assert_eq!(iterator.len(), 4); // 4 rasters per batch + + // Check first raster metadata + let first_raster = iterator.get(0).unwrap(); + assert_eq!(first_raster.metadata().width(), 10); + assert_eq!(first_raster.metadata().height(), 5); + assert_eq!(first_raster.bands().len(), 3); + + // Check that band data exists and has the right size + let first_band = first_raster.bands().band(1).unwrap(); + assert_eq!(first_band.data().len(), 50); // 10 * 5 pixels + } } diff --git a/rust/sedona-testing/src/testers.rs b/rust/sedona-testing/src/testers.rs index a97bbeee..16113d71 100644 --- a/rust/sedona-testing/src/testers.rs +++ b/rust/sedona-testing/src/testers.rs @@ -296,6 +296,17 @@ impl ScalarUdfTester { self.invoke_arrays_scalar_scalar(vec![array], arg0, arg1) } + /// Invoke a binary function with an array and three scalars + pub fn invoke_array_scalar_scalar_scalar( + &self, + array: ArrayRef, + arg0: impl Literal, + arg1: impl Literal, + arg2: impl Literal, + ) -> Result { + self.invoke_arrays_scalar_scalar_scalar(vec![array], arg0, arg1, arg2) + } + /// Invoke a binary function with a scalar and an array pub fn invoke_scalar_array(&self, arg: impl Literal, array: ArrayRef) -> Result { self.invoke_scalar_arrays(arg, vec![array]) @@ -370,6 +381,30 @@ impl ScalarUdfTester { } } + fn invoke_arrays_scalar_scalar_scalar( + &self, + arrays: Vec, + arg0: impl Literal, + arg1: impl Literal, + arg2: impl Literal, + ) -> Result { + let mut args = zip(arrays, &self.arg_types) + .map(|(array, sedona_type)| { + ColumnarValue::Array(array).cast_to(sedona_type.storage_type(), None) + }) + .collect::>>()?; + let index = args.len(); + args.push(Self::scalar_arg(arg0, &self.arg_types[index])?); + args.push(Self::scalar_arg(arg1, &self.arg_types[index + 1])?); + args.push(Self::scalar_arg(arg2, &self.arg_types[index + 2])?); + + if let ColumnarValue::Array(array) = self.invoke(args)? { + Ok(array) + } else { + sedona_internal_err!("Expected array result from array/scalar invoke") + } + } + // Invoke a function with a set of arrays pub fn invoke_arrays(&self, arrays: Vec) -> Result { let args = zip(arrays, &self.arg_types)