From 0c5972c438376443096a3fa0884e1c324d2d2664 Mon Sep 17 00:00:00 2001 From: joonaspessi Date: Sat, 8 Nov 2025 06:46:10 +0100 Subject: [PATCH 01/12] st_polygonize --- c/sedona-geos/src/lib.rs | 1 + c/sedona-geos/src/register.rs | 6 + c/sedona-geos/src/st_polygonize.rs | 311 ++++++++++++++++++ .../tests/functions/test_aggregate.py | 60 ++++ rust/sedona-functions/src/lib.rs | 1 + rust/sedona-functions/src/register.rs | 1 + rust/sedona-functions/src/st_polygonize.rs | 65 ++++ rust/sedona/src/context.rs | 4 + 8 files changed, 449 insertions(+) create mode 100644 c/sedona-geos/src/st_polygonize.rs create mode 100644 rust/sedona-functions/src/st_polygonize.rs diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs index fc589a1e..5f5ed6e5 100644 --- a/c/sedona-geos/src/lib.rs +++ b/c/sedona-geos/src/lib.rs @@ -31,6 +31,7 @@ mod st_isvalid; mod st_isvalidreason; mod st_length; mod st_perimeter; +mod st_polygonize; mod st_reverse; mod st_simplifypreservetopology; mod st_unaryunion; diff --git a/c/sedona-geos/src/register.rs b/c/sedona-geos/src/register.rs index f24404ef..d29091a2 100644 --- a/c/sedona-geos/src/register.rs +++ b/c/sedona-geos/src/register.rs @@ -14,6 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. +use sedona_expr::aggregate_udf::SedonaAccumulatorRef; use sedona_expr::scalar_udf::ScalarKernelRef; use crate::{ @@ -29,6 +30,7 @@ use crate::{ st_isvalidreason::st_is_valid_reason_impl, st_length::st_length_impl, st_perimeter::st_perimeter_impl, + st_polygonize::st_polygonize_impl, st_reverse::st_reverse_impl, st_simplifypreservetopology::st_simplify_preserve_topology_impl, st_unaryunion::st_unary_union_impl, @@ -80,3 +82,7 @@ pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> { ("st_within", st_within_impl()), ] } + +pub fn aggregate_kernels() -> Vec<(&'static str, SedonaAccumulatorRef)> { + vec![("st_polygonize", st_polygonize_impl())] +} diff --git a/c/sedona-geos/src/st_polygonize.rs b/c/sedona-geos/src/st_polygonize.rs new file mode 100644 index 00000000..35edd768 --- /dev/null +++ b/c/sedona-geos/src/st_polygonize.rs @@ -0,0 +1,311 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_array::{ + builder::OffsetBufferBuilder, cast::as_list_array, Array, ArrayRef, BinaryArray, +}; +use arrow_schema::{DataType, Field, FieldRef}; +use datafusion_common::{cast::as_binary_array, error::Result, DataFusionError, ScalarValue}; +use datafusion_expr::{Accumulator, ColumnarValue}; +use geos::Geom; +use sedona_expr::aggregate_udf::{SedonaAccumulator, SedonaAccumulatorRef}; +use sedona_schema::{ + datatypes::{SedonaType, WKB_GEOMETRY}, + matchers::ArgMatcher, +}; + +/// ST_Polygonize() aggregate implementation using GEOS +pub fn st_polygonize_impl() -> SedonaAccumulatorRef { + Arc::new(STPolygonize {}) +} + +#[derive(Debug)] +struct STPolygonize {} + +impl SedonaAccumulator for STPolygonize { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY); + matcher.match_args(args) + } + + fn accumulator( + &self, + args: &[SedonaType], + _output_type: &SedonaType, + ) -> Result> { + Ok(Box::new(PolygonizeAccumulator::new(args[0].clone()))) + } + + fn state_fields(&self, _args: &[SedonaType]) -> Result> { + Ok(vec![Arc::new(Field::new( + "geometries", + DataType::List(Arc::new(Field::new("item", DataType::Binary, true))), + false, + ))]) + } +} + +#[derive(Debug)] +struct PolygonizeAccumulator { + input_type: SedonaType, + geometries: Vec>, +} + +impl PolygonizeAccumulator { + pub fn new(input_type: SedonaType) -> Self { + Self { + input_type, + geometries: Vec::new(), + } + } + + fn make_wkb_result(&self) -> Result>> { + if self.geometries.is_empty() { + return Ok(None); + } + + let mut geos_geoms = Vec::with_capacity(self.geometries.len()); + for wkb in &self.geometries { + let geom = geos::Geometry::new_from_wkb(wkb).map_err(|e| { + DataFusionError::Execution(format!("Failed to convert WKB to GEOS: {e}")) + })?; + geos_geoms.push(geom); + } + + let result = geos::Geometry::polygonize(&geos_geoms) + .map_err(|e| DataFusionError::Execution(format!("Failed to polygonize: {e}")))?; + + let wkb = result.to_wkb().map_err(|e| { + DataFusionError::Execution(format!("Failed to convert result to WKB: {e}")) + })?; + + Ok(Some(wkb.into())) + } +} + +impl Accumulator for PolygonizeAccumulator { + fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { + if values.is_empty() { + return Err(DataFusionError::Internal( + "No input arrays provided to accumulator in update_batch".to_string(), + )); + } + + let arg_types = [self.input_type.clone()]; + let args = [ColumnarValue::Array(values[0].clone())]; + let executor = sedona_functions::executor::WkbExecutor::new(&arg_types, &args); + + executor.execute_wkb_void(|maybe_item| { + if let Some(item) = maybe_item { + self.geometries.push(item.buf().to_vec()); + } + Ok(()) + })?; + + Ok(()) + } + + fn evaluate(&mut self) -> Result { + let wkb = self.make_wkb_result()?; + Ok(ScalarValue::Binary(wkb)) + } + + fn size(&self) -> usize { + std::mem::size_of::() + self.geometries.iter().map(|g| g.capacity()).sum::() + } + + fn state(&mut self) -> Result> { + let binary_array = + BinaryArray::from_iter(self.geometries.iter().map(|g| Some(g.as_slice()))); + let mut offsets_builder = OffsetBufferBuilder::new(1); + offsets_builder.push_length(binary_array.len()); + let offsets = offsets_builder.finish(); + + let list_array = arrow_array::ListArray::new( + Arc::new(Field::new("item", DataType::Binary, true)), + offsets, + Arc::new(binary_array), + None, + ); + + Ok(vec![ScalarValue::List(Arc::new(list_array))]) + } + + fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { + if states.is_empty() { + return Err(DataFusionError::Internal( + "No input arrays provided to accumulator in merge_batch".to_string(), + )); + } + + let list_array = as_list_array(&states[0]); + + for i in 0..list_array.len() { + if list_array.is_null(i) { + continue; + } + + let value_ref = list_array.value(i); + let binary_array = as_binary_array(&value_ref)?; + for j in 0..binary_array.len() { + if !binary_array.is_null(j) { + self.geometries.push(binary_array.value(j).to_vec()); + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use datafusion_expr::AggregateUDF; + use rstest::rstest; + use sedona_expr::aggregate_udf::SedonaAggregateUDF; + use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY}; + use sedona_testing::{compare::assert_scalar_equal_wkb_geometry, testers::AggregateUdfTester}; + + use super::*; + + fn create_udf() -> SedonaAggregateUDF { + SedonaAggregateUDF::new( + "st_polygonize", + vec![st_polygonize_impl()], + datafusion_expr::Volatility::Immutable, + None, + ) + } + + #[test] + fn udf_metadata() { + let udf = create_udf(); + let aggregate_udf: AggregateUDF = udf.into(); + assert_eq!(aggregate_udf.name(), "st_polygonize"); + } + + #[rstest] + fn basic_triangle(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + assert_eq!(tester.return_type().unwrap(), WKB_GEOMETRY); + + let batches = vec![vec![ + Some("LINESTRING (0 0, 10 0)"), + Some("LINESTRING (10 0, 10 10)"), + Some("LINESTRING (10 10, 0 0)"), + ]]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry( + &result, + Some("GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 10 10, 10 0)))"), + ); + } + + #[rstest] + fn polygonize_with_nulls(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![vec![ + Some("LINESTRING (0 0, 10 0)"), + None, + Some("LINESTRING (10 0, 10 10)"), + None, + Some("LINESTRING (10 10, 0 0)"), + ]]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry( + &result, + Some("GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 10 10, 10 0)))"), + ); + } + + #[rstest] + fn polygonize_empty_input(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches: Vec>> = vec![]; + assert_scalar_equal_wkb_geometry(&tester.aggregate_wkt(batches).unwrap(), None); + } + + #[rstest] + fn polygonize_no_polygons_formed( + #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType, + ) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![vec![ + Some("LINESTRING (0 0, 10 0)"), + Some("LINESTRING (20 0, 30 0)"), + ]]; + assert_scalar_equal_wkb_geometry( + &tester.aggregate_wkt(batches).unwrap(), + Some("GEOMETRYCOLLECTION EMPTY"), + ); + } + + #[rstest] + fn polygonize_multiple_polygons( + #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType, + ) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![vec![ + Some("LINESTRING (0 0, 10 0)"), + Some("LINESTRING (10 0, 5 10)"), + Some("LINESTRING (5 10, 0 0)"), + Some("LINESTRING (20 0, 30 0)"), + Some("LINESTRING (30 0, 25 10)"), + Some("LINESTRING (25 10, 20 0)"), + ]]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry( + &result, + Some("GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 5 10, 10 0)), POLYGON ((30 0, 20 0, 25 10, 30 0)))"), + ); + } + + #[rstest] + fn polygonize_multiple_batches( + #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType, + ) { + // Testing merge_batch + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![ + vec![Some("LINESTRING (0 0, 10 0)")], + vec![Some("LINESTRING (10 0, 10 10)")], + vec![Some("LINESTRING (10 10, 0 0)")], + ]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry( + &result, + Some("GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 10 10, 10 0)))"), + ); + } +} diff --git a/python/sedonadb/tests/functions/test_aggregate.py b/python/sedonadb/tests/functions/test_aggregate.py index 15defddf..87652e3e 100644 --- a/python/sedonadb/tests/functions/test_aggregate.py +++ b/python/sedonadb/tests/functions/test_aggregate.py @@ -115,3 +115,63 @@ def test_st_collect_zero_input(eng): ) AS t(geom) WHERE false""", None, ) + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +def test_st_polygonize_basic_triangle(eng): + eng = eng.create_or_skip() + eng.assert_query_result( + """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM ( + VALUES + ('LINESTRING (0 0, 10 0)'), + ('LINESTRING (10 0, 10 10)'), + ('LINESTRING (10 10, 0 0)') + ) AS t(geom)""", + "GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 10 10, 10 0)))", + ) + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +def test_st_polygonize_with_nulls(eng): + eng = eng.create_or_skip() + eng.assert_query_result( + """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM ( + VALUES + ('LINESTRING (0 0, 10 0)'), + (NULL), + ('LINESTRING (10 0, 10 10)'), + (NULL), + ('LINESTRING (10 10, 0 0)') + ) AS t(geom)""", + "GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 10 10, 10 0)))", + ) + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +def test_st_polygonize_no_polygons_formed(eng): + eng = eng.create_or_skip() + eng.assert_query_result( + """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM ( + VALUES + ('LINESTRING (0 0, 10 0)'), + ('LINESTRING (20 0, 30 0)') + ) AS t(geom)""", + "GEOMETRYCOLLECTION EMPTY", + ) + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +def test_st_polygonize_multiple_polygons(eng): + eng = eng.create_or_skip() + eng.assert_query_result( + """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM ( + VALUES + ('LINESTRING (0 0, 10 0)'), + ('LINESTRING (10 0, 5 10)'), + ('LINESTRING (5 10, 0 0)'), + ('LINESTRING (20 0, 30 0)'), + ('LINESTRING (30 0, 25 10)'), + ('LINESTRING (25 10, 20 0)') + ) AS t(geom)""", + "GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 5 10, 10 0)), POLYGON ((30 0, 20 0, 25 10, 30 0)))", + ) diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 0014629e..aa421c9b 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -52,6 +52,7 @@ mod st_point; mod st_pointn; mod st_points; mod st_pointzm; +mod st_polygonize; mod st_setsrid; mod st_srid; mod st_start_point; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index ba9f82d0..12312dc2 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -123,6 +123,7 @@ pub fn default_function_set() -> FunctionSet { crate::st_collect::st_collect_udf, crate::st_envelope_aggr::st_envelope_aggr_udf, crate::st_intersection_aggr::st_intersection_aggr_udf, + crate::st_polygonize::st_polygonize_udf, crate::st_union_aggr::st_union_aggr_udf, ); diff --git a/rust/sedona-functions/src/st_polygonize.rs b/rust/sedona-functions/src/st_polygonize.rs new file mode 100644 index 00000000..023265ed --- /dev/null +++ b/rust/sedona-functions/src/st_polygonize.rs @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::vec; + +use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Volatility}; +use sedona_expr::aggregate_udf::SedonaAggregateUDF; +use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher}; + +/// ST_Polygonize() aggregate UDF implementation +/// +/// Creates polygons from a set of linework that forms closed rings. +pub fn st_polygonize_udf() -> SedonaAggregateUDF { + SedonaAggregateUDF::new_stub( + "st_polygonize", + ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY), + Volatility::Immutable, + Some(st_polygonize_doc()), + ) +} + +fn st_polygonize_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Creates a GeometryCollection containing polygons formed from the linework of a set of geometries. \ + Returns an empty GeometryCollection if no polygons can be formed.", + "ST_Polygonize (geom: Geometry)", + ) + .with_argument("geom", "geometry: Input geometry (typically linestrings that form closed rings)") + .with_sql_example( + "SELECT ST_AsText(ST_Polygonize(geom)) FROM (VALUES \ + (ST_GeomFromText('LINESTRING (0 0, 10 0)')), \ + (ST_GeomFromText('LINESTRING (10 0, 10 10)')), \ + (ST_GeomFromText('LINESTRING (10 10, 0 0)')) \ + ) AS t(geom)" + ) + .build() +} + +#[cfg(test)] +mod test { + use datafusion_expr::AggregateUDF; + + use super::*; + + #[test] + fn udf_metadata() { + let udf: AggregateUDF = st_polygonize_udf().into(); + assert_eq!(udf.name(), "st_polygonize"); + assert!(udf.documentation().is_some()); + } +} diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs index 744dca22..bd503e5d 100644 --- a/rust/sedona/src/context.rs +++ b/rust/sedona/src/context.rs @@ -129,6 +129,10 @@ impl SedonaContext { #[cfg(feature = "geos")] out.register_scalar_kernels(sedona_geos::register::scalar_kernels().into_iter())?; + // Register geos aggregate kernels if built with geos support + #[cfg(feature = "geos")] + out.register_aggregate_kernels(sedona_geos::register::aggregate_kernels().into_iter())?; + // Register geo kernels if built with geo support #[cfg(feature = "geo")] out.register_scalar_kernels(sedona_geo::register::scalar_kernels().into_iter())?; From 4f50e2b78fc07dde91f29083305a192f02127231 Mon Sep 17 00:00:00 2001 From: joonaspessi Date: Sun, 9 Nov 2025 07:35:05 +0100 Subject: [PATCH 02/12] add single geometry tests --- c/sedona-geos/src/st_polygonize.rs | 115 ++++++++++++++++++ .../tests/functions/test_aggregate.py | 33 +++++ 2 files changed, 148 insertions(+) diff --git a/c/sedona-geos/src/st_polygonize.rs b/c/sedona-geos/src/st_polygonize.rs index 35edd768..12c715c9 100644 --- a/c/sedona-geos/src/st_polygonize.rs +++ b/c/sedona-geos/src/st_polygonize.rs @@ -308,4 +308,119 @@ mod tests { Some("GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 10 10, 10 0)))"), ); } + + #[rstest] + fn polygonize_single_polygon( + #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType, + ) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![vec![Some("POLYGON ((10 0, 0 0, 10 10, 10 0))")]]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry( + &result, + Some("GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 10 10, 10 0)))"), + ); + } + + #[rstest] + fn polygonize_multipolygon(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![vec![Some( + "MULTIPOLYGON (((0 0, 1 0, 0 1, 0 0)), ((10 10, 11 10, 10 11, 10 10)))", + )]]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry( + &result, + Some("GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 0, 0 0)), POLYGON ((10 10, 10 11, 11 10, 10 10)))"), + ); + } + + #[rstest] + fn polygonize_closed_ring_linestring( + #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType, + ) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![vec![Some("LINESTRING (0 0, 0 1, 1 1, 1 0, 0 0)")]]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry( + &result, + Some("GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)))"), + ); + } + + #[rstest] + fn polygonize_point_returns_empty( + #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType, + ) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![vec![Some("POINT (0 0)")]]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry(&result, Some("GEOMETRYCOLLECTION EMPTY")); + } + + #[rstest] + fn polygonize_multipoint_returns_empty( + #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType, + ) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![vec![Some("MULTIPOINT ((0 0), (1 1))")]]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry(&result, Some("GEOMETRYCOLLECTION EMPTY")); + } + + #[rstest] + fn polygonize_multilinestring_returns_empty( + #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType, + ) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![vec![Some("MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))")]]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry(&result, Some("GEOMETRYCOLLECTION EMPTY")); + } + + #[rstest] + fn polygonize_geometrycollection_returns_empty( + #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType, + ) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![vec![Some( + "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))", + )]]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry(&result, Some("GEOMETRYCOLLECTION EMPTY")); + } + + #[rstest] + fn polygonize_empty_linestring_returns_empty( + #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType, + ) { + let udf = create_udf(); + let tester = AggregateUdfTester::new(udf.into(), vec![sedona_type.clone()]); + + let batches = vec![vec![Some("LINESTRING EMPTY")]]; + + let result = tester.aggregate_wkt(batches).unwrap(); + assert_scalar_equal_wkb_geometry(&result, Some("GEOMETRYCOLLECTION EMPTY")); + } } diff --git a/python/sedonadb/tests/functions/test_aggregate.py b/python/sedonadb/tests/functions/test_aggregate.py index 87652e3e..e7b1c127 100644 --- a/python/sedonadb/tests/functions/test_aggregate.py +++ b/python/sedonadb/tests/functions/test_aggregate.py @@ -175,3 +175,36 @@ def test_st_polygonize_multiple_polygons(eng): ) AS t(geom)""", "GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 5 10, 10 0)), POLYGON ((30 0, 20 0, 25 10, 30 0)))", ) + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom", "expected"), + [ + ( + "POLYGON ((10 0, 0 0, 10 10, 10 0))", + "GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 10 10, 10 0)))", + ), + ( + "LINESTRING (0 0, 0 1, 1 1, 1 0, 0 0)", + "GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)))", + ), + ("POINT (0 0)", "GEOMETRYCOLLECTION EMPTY"), + ("MULTIPOINT ((0 0), (1 1))", "GEOMETRYCOLLECTION EMPTY"), + ("MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))", "GEOMETRYCOLLECTION EMPTY"), + ( + "MULTIPOLYGON (((0 0, 1 0, 0 1, 0 0)), ((10 10, 11 10, 10 11, 10 10)))", + "GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 0, 0 0)), POLYGON ((10 10, 10 11, 11 10, 10 10)))", + ), + ("GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))", "GEOMETRYCOLLECTION EMPTY"), + ("LINESTRING EMPTY", "GEOMETRYCOLLECTION EMPTY"), + ], +) +def test_st_polygonize_single_geom(eng, geom, expected): + eng = eng.create_or_skip() + eng.assert_query_result( + f"""SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM ( + VALUES ('{geom}') + ) AS t(geom)""", + expected, + ) From 693196ec4a8feebfd5f363c29b20f8a74bdfc3f1 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sat, 8 Nov 2025 09:16:13 -0800 Subject: [PATCH 03/12] Avoid unnecessary .to_vec() calls that copy --- c/sedona-geos/src/st_polygonize.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/c/sedona-geos/src/st_polygonize.rs b/c/sedona-geos/src/st_polygonize.rs index 12c715c9..0a3f01f3 100644 --- a/c/sedona-geos/src/st_polygonize.rs +++ b/c/sedona-geos/src/st_polygonize.rs @@ -64,7 +64,7 @@ impl SedonaAccumulator for STPolygonize { #[derive(Debug)] struct PolygonizeAccumulator { input_type: SedonaType, - geometries: Vec>, + geometries: Vec>, } impl PolygonizeAccumulator { @@ -107,13 +107,13 @@ impl Accumulator for PolygonizeAccumulator { )); } - let arg_types = [self.input_type.clone()]; + let arg_types = std::slice::from_ref(&self.input_type); let args = [ColumnarValue::Array(values[0].clone())]; - let executor = sedona_functions::executor::WkbExecutor::new(&arg_types, &args); + let executor = sedona_functions::executor::WkbExecutor::new(arg_types, &args); executor.execute_wkb_void(|maybe_item| { if let Some(item) = maybe_item { - self.geometries.push(item.buf().to_vec()); + self.geometries.push(item.buf().into()); } Ok(()) })?; @@ -127,12 +127,11 @@ impl Accumulator for PolygonizeAccumulator { } fn size(&self) -> usize { - std::mem::size_of::() + self.geometries.iter().map(|g| g.capacity()).sum::() + std::mem::size_of::() + self.geometries.iter().map(|g| g.len()).sum::() } fn state(&mut self) -> Result> { - let binary_array = - BinaryArray::from_iter(self.geometries.iter().map(|g| Some(g.as_slice()))); + let binary_array = BinaryArray::from_iter(self.geometries.iter().map(|g| Some(g.as_ref()))); let mut offsets_builder = OffsetBufferBuilder::new(1); offsets_builder.push_length(binary_array.len()); let offsets = offsets_builder.finish(); @@ -165,7 +164,7 @@ impl Accumulator for PolygonizeAccumulator { let binary_array = as_binary_array(&value_ref)?; for j in 0..binary_array.len() { if !binary_array.is_null(j) { - self.geometries.push(binary_array.value(j).to_vec()); + self.geometries.push(binary_array.value(j).into()); } } } From 870f87168cc933e5f92bdb87488d497ff818bf33 Mon Sep 17 00:00:00 2001 From: joonaspessi Date: Sun, 9 Nov 2025 13:44:02 +0100 Subject: [PATCH 04/12] use GEOS_WKB_FACTORY --- c/sedona-geos/src/st_polygonize.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/c/sedona-geos/src/st_polygonize.rs b/c/sedona-geos/src/st_polygonize.rs index 0a3f01f3..9dd0888e 100644 --- a/c/sedona-geos/src/st_polygonize.rs +++ b/c/sedona-geos/src/st_polygonize.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use crate::wkb_to_geos::GEOSWkbFactory; use arrow_array::{ builder::OffsetBufferBuilder, cast::as_list_array, Array, ArrayRef, BinaryArray, }; @@ -29,6 +30,19 @@ use sedona_schema::{ datatypes::{SedonaType, WKB_GEOMETRY}, matchers::ArgMatcher, }; +use wkb::reader::{read_wkb, Wkb}; + +thread_local! { + static GEOS_WKB_FACTORY: GEOSWkbFactory = GEOSWkbFactory::new(); +} + +fn wkb_to_geos_geometry(wkb: &Wkb) -> Result { + GEOS_WKB_FACTORY.with(|factory| { + factory.create(wkb).map_err(|e| { + DataFusionError::Execution(format!("Failed to create geometry from WKB: {e}")) + }) + }) +} /// ST_Polygonize() aggregate implementation using GEOS pub fn st_polygonize_impl() -> SedonaAccumulatorRef { @@ -81,10 +95,10 @@ impl PolygonizeAccumulator { } let mut geos_geoms = Vec::with_capacity(self.geometries.len()); - for wkb in &self.geometries { - let geom = geos::Geometry::new_from_wkb(wkb).map_err(|e| { - DataFusionError::Execution(format!("Failed to convert WKB to GEOS: {e}")) - })?; + for wkb_bytes in &self.geometries { + let wkb = read_wkb(wkb_bytes) + .map_err(|e| DataFusionError::Execution(format!("Failed to read WKB: {e}")))?; + let geom = wkb_to_geos_geometry(&wkb)?; geos_geoms.push(geom); } From ec9a3680017efbb9dd645dd2dfd03d36b2c1ac62 Mon Sep 17 00:00:00 2001 From: joonaspessi Date: Mon, 10 Nov 2025 06:27:49 +0100 Subject: [PATCH 05/12] fix pre-commit --- python/sedonadb/tests/functions/test_aggregate.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/sedonadb/tests/functions/test_aggregate.py b/python/sedonadb/tests/functions/test_aggregate.py index e7b1c127..d79336c3 100644 --- a/python/sedonadb/tests/functions/test_aggregate.py +++ b/python/sedonadb/tests/functions/test_aggregate.py @@ -196,7 +196,10 @@ def test_st_polygonize_multiple_polygons(eng): "MULTIPOLYGON (((0 0, 1 0, 0 1, 0 0)), ((10 10, 11 10, 10 11, 10 10)))", "GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 0, 0 0)), POLYGON ((10 10, 10 11, 11 10, 10 10)))", ), - ("GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))", "GEOMETRYCOLLECTION EMPTY"), + ( + "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))", + "GEOMETRYCOLLECTION EMPTY", + ), ("LINESTRING EMPTY", "GEOMETRYCOLLECTION EMPTY"), ], ) From 0269bdff3aa8c70c41517c0ebd06d588a7935f42 Mon Sep 17 00:00:00 2001 From: joonaspessi Date: Tue, 11 Nov 2025 05:54:44 +0100 Subject: [PATCH 06/12] remove thread local --- c/sedona-geos/src/st_polygonize.rs | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/c/sedona-geos/src/st_polygonize.rs b/c/sedona-geos/src/st_polygonize.rs index 9dd0888e..3ce53929 100644 --- a/c/sedona-geos/src/st_polygonize.rs +++ b/c/sedona-geos/src/st_polygonize.rs @@ -30,19 +30,7 @@ use sedona_schema::{ datatypes::{SedonaType, WKB_GEOMETRY}, matchers::ArgMatcher, }; -use wkb::reader::{read_wkb, Wkb}; - -thread_local! { - static GEOS_WKB_FACTORY: GEOSWkbFactory = GEOSWkbFactory::new(); -} - -fn wkb_to_geos_geometry(wkb: &Wkb) -> Result { - GEOS_WKB_FACTORY.with(|factory| { - factory.create(wkb).map_err(|e| { - DataFusionError::Execution(format!("Failed to create geometry from WKB: {e}")) - }) - }) -} +use wkb::reader::read_wkb; /// ST_Polygonize() aggregate implementation using GEOS pub fn st_polygonize_impl() -> SedonaAccumulatorRef { @@ -94,11 +82,14 @@ impl PolygonizeAccumulator { return Ok(None); } + let factory = GEOSWkbFactory::new(); let mut geos_geoms = Vec::with_capacity(self.geometries.len()); for wkb_bytes in &self.geometries { let wkb = read_wkb(wkb_bytes) .map_err(|e| DataFusionError::Execution(format!("Failed to read WKB: {e}")))?; - let geom = wkb_to_geos_geometry(&wkb)?; + let geom = factory + .create(&wkb) + .map_err(|e| DataFusionError::Execution(format!("Failed to create geometry from WKB: {e}")))?; geos_geoms.push(geom); } From 96ff34407cf43251a565b9eae96810c001198780 Mon Sep 17 00:00:00 2001 From: joonaspessi Date: Tue, 11 Nov 2025 07:38:46 +0100 Subject: [PATCH 07/12] use single buffer implementation --- c/sedona-geos/src/st_polygonize.rs | 125 ++++++++++++++++++----------- 1 file changed, 77 insertions(+), 48 deletions(-) diff --git a/c/sedona-geos/src/st_polygonize.rs b/c/sedona-geos/src/st_polygonize.rs index 3ce53929..df328d6e 100644 --- a/c/sedona-geos/src/st_polygonize.rs +++ b/c/sedona-geos/src/st_polygonize.rs @@ -18,14 +18,14 @@ use std::sync::Arc; use crate::wkb_to_geos::GEOSWkbFactory; -use arrow_array::{ - builder::OffsetBufferBuilder, cast::as_list_array, Array, ArrayRef, BinaryArray, -}; +use arrow_array::{cast::AsArray, types::UInt64Type, Array, ArrayRef}; use arrow_schema::{DataType, Field, FieldRef}; use datafusion_common::{cast::as_binary_array, error::Result, DataFusionError, ScalarValue}; use datafusion_expr::{Accumulator, ColumnarValue}; +use geo_traits::Dimensions; use geos::Geom; use sedona_expr::aggregate_udf::{SedonaAccumulator, SedonaAccumulatorRef}; +use sedona_geometry::wkb_factory::write_wkb_geometrycollection_header; use sedona_schema::{ datatypes::{SedonaType, WKB_GEOMETRY}, matchers::ArgMatcher, @@ -55,42 +55,67 @@ impl SedonaAccumulator for STPolygonize { } fn state_fields(&self, _args: &[SedonaType]) -> Result> { - Ok(vec![Arc::new(Field::new( - "geometries", - DataType::List(Arc::new(Field::new("item", DataType::Binary, true))), - false, - ))]) + Ok(vec![ + Arc::new(Field::new("count", DataType::UInt64, false)), + Arc::new(Field::new("item", DataType::Binary, true)), + ]) } } #[derive(Debug)] struct PolygonizeAccumulator { input_type: SedonaType, - geometries: Vec>, + item: Option>, + count: usize, } +const WKB_HEADER_SIZE: usize = 1 + 4 + 4; + impl PolygonizeAccumulator { pub fn new(input_type: SedonaType) -> Self { + let mut item = Vec::new(); + write_wkb_geometrycollection_header(&mut item, Dimensions::Xy, 0) + .expect("Failed to write initial GeometryCollection header"); + Self { input_type, - geometries: Vec::new(), + item: Some(item), + count: 0, } } - fn make_wkb_result(&self) -> Result>> { - if self.geometries.is_empty() { + fn make_wkb_result(&mut self) -> Result>> { + if self.count == 0 { return Ok(None); } + let collection_wkb = self.item.as_mut().unwrap(); + let mut header = Vec::new(); + write_wkb_geometrycollection_header(&mut header, Dimensions::Xy, self.count) + .map_err(|e| DataFusionError::Execution(format!("Failed to write header: {e}")))?; + collection_wkb[0..WKB_HEADER_SIZE].copy_from_slice(&header); + + let wkb = read_wkb(collection_wkb) + .map_err(|e| DataFusionError::Execution(format!("Failed to read WKB: {e}")))?; + let factory = GEOSWkbFactory::new(); - let mut geos_geoms = Vec::with_capacity(self.geometries.len()); - for wkb_bytes in &self.geometries { - let wkb = read_wkb(wkb_bytes) - .map_err(|e| DataFusionError::Execution(format!("Failed to read WKB: {e}")))?; - let geom = factory - .create(&wkb) - .map_err(|e| DataFusionError::Execution(format!("Failed to create geometry from WKB: {e}")))?; - geos_geoms.push(geom); + let collection = factory.create(&wkb).map_err(|e| { + DataFusionError::Execution(format!("Failed to create geometry from WKB: {e}")) + })?; + + let num_geoms = collection.get_num_geometries().map_err(|e| { + DataFusionError::Execution(format!("Failed to get number of geometries: {e}")) + })?; + + let mut geos_geoms = Vec::with_capacity(num_geoms); + for i in 0..num_geoms { + let geom = collection.get_geometry_n(i).map_err(|e| { + DataFusionError::Execution(format!("Failed to get geometry {}: {e}", i)) + })?; + // Clone is necessary: get_geometry_n() returns ConstGeometry<'_> which doesn't + // implement Borrow. The GEOS polygonize() function signature requires + // T: Borrow, so we must clone to get owned Geometry instances. + geos_geoms.push(geom.clone()); } let result = geos::Geometry::polygonize(&geos_geoms) @@ -116,9 +141,11 @@ impl Accumulator for PolygonizeAccumulator { let args = [ColumnarValue::Array(values[0].clone())]; let executor = sedona_functions::executor::WkbExecutor::new(arg_types, &args); + let item_ref = self.item.as_mut().unwrap(); executor.execute_wkb_void(|maybe_item| { if let Some(item) = maybe_item { - self.geometries.push(item.buf().into()); + item_ref.extend_from_slice(item.buf()); + self.count += 1; } Ok(()) })?; @@ -132,44 +159,46 @@ impl Accumulator for PolygonizeAccumulator { } fn size(&self) -> usize { - std::mem::size_of::() + self.geometries.iter().map(|g| g.len()).sum::() + let item_capacity = self.item.as_ref().map(|e| e.capacity()).unwrap_or(0); + size_of::() + item_capacity } fn state(&mut self) -> Result> { - let binary_array = BinaryArray::from_iter(self.geometries.iter().map(|g| Some(g.as_ref()))); - let mut offsets_builder = OffsetBufferBuilder::new(1); - offsets_builder.push_length(binary_array.len()); - let offsets = offsets_builder.finish(); + let serialized_count = ScalarValue::UInt64(Some(self.count as u64)); + let serialized_item = ScalarValue::Binary(self.item.take()); - let list_array = arrow_array::ListArray::new( - Arc::new(Field::new("item", DataType::Binary, true)), - offsets, - Arc::new(binary_array), - None, - ); + let mut item = Vec::new(); + write_wkb_geometrycollection_header(&mut item, Dimensions::Xy, 0) + .expect("Failed to write initial GeometryCollection header"); + self.item = Some(item); + self.count = 0; - Ok(vec![ScalarValue::List(Arc::new(list_array))]) + Ok(vec![serialized_count, serialized_item]) } fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { - if states.is_empty() { - return Err(DataFusionError::Internal( - "No input arrays provided to accumulator in merge_batch".to_string(), - )); + if states.len() != 2 { + return Err(DataFusionError::Internal(format!( + "Unexpected number of state fields for st_polygonize() (expected 2, got {})", + states.len() + ))); } - let list_array = as_list_array(&states[0]); - - for i in 0..list_array.len() { - if list_array.is_null(i) { - continue; - } + let item_ref = self.item.as_mut().ok_or_else(|| { + DataFusionError::Internal("Unexpected internal state in ST_Polygonize()".to_string()) + })?; - let value_ref = list_array.value(i); - let binary_array = as_binary_array(&value_ref)?; - for j in 0..binary_array.len() { - if !binary_array.is_null(j) { - self.geometries.push(binary_array.value(j).into()); + let count_array = states[0].as_primitive::(); + let item_array = as_binary_array(&states[1])?; + + for i in 0..count_array.len() { + let count = count_array.value(i) as usize; + if count > 0 { + if !item_array.is_null(i) { + let item = item_array.value(i); + // Skip the header and append the geometry data + item_ref.extend_from_slice(&item[WKB_HEADER_SIZE..]); + self.count += count; } } } From d2e5c9b271e07a8c54c0c68257636310a0eadbc4 Mon Sep 17 00:00:00 2001 From: joonaspessi Date: Tue, 11 Nov 2025 17:48:55 +0100 Subject: [PATCH 08/12] fix clippy --- c/sedona-geos/src/st_polygonize.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/c/sedona-geos/src/st_polygonize.rs b/c/sedona-geos/src/st_polygonize.rs index df328d6e..bf641eb3 100644 --- a/c/sedona-geos/src/st_polygonize.rs +++ b/c/sedona-geos/src/st_polygonize.rs @@ -193,14 +193,13 @@ impl Accumulator for PolygonizeAccumulator { for i in 0..count_array.len() { let count = count_array.value(i) as usize; - if count > 0 { - if !item_array.is_null(i) { + if count > 0 + && !item_array.is_null(i) { let item = item_array.value(i); // Skip the header and append the geometry data item_ref.extend_from_slice(&item[WKB_HEADER_SIZE..]); self.count += count; } - } } Ok(()) From efbdfccd1b09911859e628fc8c629d1725b3c04f Mon Sep 17 00:00:00 2001 From: joonaspessi Date: Tue, 11 Nov 2025 18:05:30 +0100 Subject: [PATCH 09/12] fix clippy2 --- c/sedona-geos/src/st_polygonize.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/c/sedona-geos/src/st_polygonize.rs b/c/sedona-geos/src/st_polygonize.rs index bf641eb3..eae89d9a 100644 --- a/c/sedona-geos/src/st_polygonize.rs +++ b/c/sedona-geos/src/st_polygonize.rs @@ -193,13 +193,12 @@ impl Accumulator for PolygonizeAccumulator { for i in 0..count_array.len() { let count = count_array.value(i) as usize; - if count > 0 - && !item_array.is_null(i) { - let item = item_array.value(i); - // Skip the header and append the geometry data - item_ref.extend_from_slice(&item[WKB_HEADER_SIZE..]); - self.count += count; - } + if count > 0 && !item_array.is_null(i) { + let item = item_array.value(i); + // Skip the header and append the geometry data + item_ref.extend_from_slice(&item[WKB_HEADER_SIZE..]); + self.count += count; + } } Ok(()) From 08cb6fba57ce56c9c7e48c31d4c480e916f10e7b Mon Sep 17 00:00:00 2001 From: joonaspessi Date: Tue, 11 Nov 2025 18:07:12 +0100 Subject: [PATCH 10/12] remove comments --- c/sedona-geos/src/st_polygonize.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/c/sedona-geos/src/st_polygonize.rs b/c/sedona-geos/src/st_polygonize.rs index eae89d9a..54d24857 100644 --- a/c/sedona-geos/src/st_polygonize.rs +++ b/c/sedona-geos/src/st_polygonize.rs @@ -112,9 +112,6 @@ impl PolygonizeAccumulator { let geom = collection.get_geometry_n(i).map_err(|e| { DataFusionError::Execution(format!("Failed to get geometry {}: {e}", i)) })?; - // Clone is necessary: get_geometry_n() returns ConstGeometry<'_> which doesn't - // implement Borrow. The GEOS polygonize() function signature requires - // T: Borrow, so we must clone to get owned Geometry instances. geos_geoms.push(geom.clone()); } From af3aac04d06fbb03548712a16fb74035773dacc6 Mon Sep 17 00:00:00 2001 From: joonaspessi Date: Wed, 12 Nov 2025 06:18:00 +0100 Subject: [PATCH 11/12] use sedona_internal_err macro instead of DdataFusionError::Internal --- Cargo.lock | 1 + c/sedona-geos/Cargo.toml | 1 + c/sedona-geos/src/st_polygonize.rs | 16 ++++++++-------- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c0af964a..20e02b3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5108,6 +5108,7 @@ dependencies = [ "geos", "rstest", "sedona", + "sedona-common", "sedona-expr", "sedona-functions", "sedona-geometry", diff --git a/c/sedona-geos/Cargo.toml b/c/sedona-geos/Cargo.toml index d92f076f..549a76bd 100644 --- a/c/sedona-geos/Cargo.toml +++ b/c/sedona-geos/Cargo.toml @@ -39,6 +39,7 @@ arrow-array = { workspace = true } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } geos = { workspace = true } +sedona-common = { path = "../../rust/sedona-common" } sedona-expr = { path = "../../rust/sedona-expr" } sedona-functions = { path = "../../rust/sedona-functions" } sedona-geometry = { path = "../../rust/sedona-geometry" } diff --git a/c/sedona-geos/src/st_polygonize.rs b/c/sedona-geos/src/st_polygonize.rs index 54d24857..eb41c32f 100644 --- a/c/sedona-geos/src/st_polygonize.rs +++ b/c/sedona-geos/src/st_polygonize.rs @@ -24,6 +24,7 @@ use datafusion_common::{cast::as_binary_array, error::Result, DataFusionError, S use datafusion_expr::{Accumulator, ColumnarValue}; use geo_traits::Dimensions; use geos::Geom; +use sedona_common::sedona_internal_err; use sedona_expr::aggregate_udf::{SedonaAccumulator, SedonaAccumulatorRef}; use sedona_geometry::wkb_factory::write_wkb_geometrycollection_header; use sedona_schema::{ @@ -129,9 +130,7 @@ impl PolygonizeAccumulator { impl Accumulator for PolygonizeAccumulator { fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { if values.is_empty() { - return Err(DataFusionError::Internal( - "No input arrays provided to accumulator in update_batch".to_string(), - )); + return sedona_internal_err!("No input arrays provided to accumulator in update_batch"); } let arg_types = std::slice::from_ref(&self.input_type); @@ -175,15 +174,16 @@ impl Accumulator for PolygonizeAccumulator { fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { if states.len() != 2 { - return Err(DataFusionError::Internal(format!( + return sedona_internal_err!( "Unexpected number of state fields for st_polygonize() (expected 2, got {})", states.len() - ))); + ); } - let item_ref = self.item.as_mut().ok_or_else(|| { - DataFusionError::Internal("Unexpected internal state in ST_Polygonize()".to_string()) - })?; + let item_ref = match self.item.as_mut() { + Some(item) => item, + None => return sedona_internal_err!("Unexpected internal state in ST_Polygonize()"), + }; let count_array = states[0].as_primitive::(); let item_array = as_binary_array(&states[1])?; From be9364fa914cb64695750a4a82035f7aa5c4cb62 Mon Sep 17 00:00:00 2001 From: joonaspessi Date: Wed, 12 Nov 2025 06:51:42 +0100 Subject: [PATCH 12/12] change st_polygonize to st_polygonize_agg --- c/sedona-geos/src/lib.rs | 2 +- c/sedona-geos/src/register.rs | 4 ++-- ...{st_polygonize.rs => st_polygonize_agg.rs} | 16 +++++++-------- .../tests/functions/test_aggregate.py | 20 ++++++++++++++----- rust/sedona-functions/src/lib.rs | 2 +- rust/sedona-functions/src/register.rs | 2 +- ...{st_polygonize.rs => st_polygonize_agg.rs} | 18 ++++++++--------- 7 files changed, 37 insertions(+), 27 deletions(-) rename c/sedona-geos/src/{st_polygonize.rs => st_polygonize_agg.rs} (97%) rename rust/sedona-functions/src/{st_polygonize.rs => st_polygonize_agg.rs} (81%) diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs index 5f5ed6e5..282c7687 100644 --- a/c/sedona-geos/src/lib.rs +++ b/c/sedona-geos/src/lib.rs @@ -31,7 +31,7 @@ mod st_isvalid; mod st_isvalidreason; mod st_length; mod st_perimeter; -mod st_polygonize; +mod st_polygonize_agg; mod st_reverse; mod st_simplifypreservetopology; mod st_unaryunion; diff --git a/c/sedona-geos/src/register.rs b/c/sedona-geos/src/register.rs index d29091a2..705f234b 100644 --- a/c/sedona-geos/src/register.rs +++ b/c/sedona-geos/src/register.rs @@ -30,7 +30,7 @@ use crate::{ st_isvalidreason::st_is_valid_reason_impl, st_length::st_length_impl, st_perimeter::st_perimeter_impl, - st_polygonize::st_polygonize_impl, + st_polygonize_agg::st_polygonize_agg_impl, st_reverse::st_reverse_impl, st_simplifypreservetopology::st_simplify_preserve_topology_impl, st_unaryunion::st_unary_union_impl, @@ -84,5 +84,5 @@ pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> { } pub fn aggregate_kernels() -> Vec<(&'static str, SedonaAccumulatorRef)> { - vec![("st_polygonize", st_polygonize_impl())] + vec![("st_polygonize_agg", st_polygonize_agg_impl())] } diff --git a/c/sedona-geos/src/st_polygonize.rs b/c/sedona-geos/src/st_polygonize_agg.rs similarity index 97% rename from c/sedona-geos/src/st_polygonize.rs rename to c/sedona-geos/src/st_polygonize_agg.rs index eb41c32f..13e35d02 100644 --- a/c/sedona-geos/src/st_polygonize.rs +++ b/c/sedona-geos/src/st_polygonize_agg.rs @@ -33,15 +33,15 @@ use sedona_schema::{ }; use wkb::reader::read_wkb; -/// ST_Polygonize() aggregate implementation using GEOS -pub fn st_polygonize_impl() -> SedonaAccumulatorRef { - Arc::new(STPolygonize {}) +/// ST_Polygonize_Agg() aggregate implementation using GEOS +pub fn st_polygonize_agg_impl() -> SedonaAccumulatorRef { + Arc::new(STPolygonizeAgg {}) } #[derive(Debug)] -struct STPolygonize {} +struct STPolygonizeAgg {} -impl SedonaAccumulator for STPolygonize { +impl SedonaAccumulator for STPolygonizeAgg { fn return_type(&self, args: &[SedonaType]) -> Result> { let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY); matcher.match_args(args) @@ -214,8 +214,8 @@ mod tests { fn create_udf() -> SedonaAggregateUDF { SedonaAggregateUDF::new( - "st_polygonize", - vec![st_polygonize_impl()], + "st_polygonize_agg", + vec![st_polygonize_agg_impl()], datafusion_expr::Volatility::Immutable, None, ) @@ -225,7 +225,7 @@ mod tests { fn udf_metadata() { let udf = create_udf(); let aggregate_udf: AggregateUDF = udf.into(); - assert_eq!(aggregate_udf.name(), "st_polygonize"); + assert_eq!(aggregate_udf.name(), "st_polygonize_agg"); } #[rstest] diff --git a/python/sedonadb/tests/functions/test_aggregate.py b/python/sedonadb/tests/functions/test_aggregate.py index d79336c3..5fddd9ad 100644 --- a/python/sedonadb/tests/functions/test_aggregate.py +++ b/python/sedonadb/tests/functions/test_aggregate.py @@ -19,6 +19,11 @@ from sedonadb.testing import PostGIS, SedonaDB +def polygonize_fn_suffix(eng): + """Return the appropriate suffix for the polygonize function for the given engine.""" + return "" if isinstance(eng, PostGIS) else "_Agg" + + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) def test_st_collect_points(eng): eng = eng.create_or_skip() @@ -120,8 +125,9 @@ def test_st_collect_zero_input(eng): @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) def test_st_polygonize_basic_triangle(eng): eng = eng.create_or_skip() + suffix = polygonize_fn_suffix(eng) eng.assert_query_result( - """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM ( + f"""SELECT ST_Polygonize{suffix}(ST_GeomFromText(geom)) FROM ( VALUES ('LINESTRING (0 0, 10 0)'), ('LINESTRING (10 0, 10 10)'), @@ -134,8 +140,9 @@ def test_st_polygonize_basic_triangle(eng): @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) def test_st_polygonize_with_nulls(eng): eng = eng.create_or_skip() + suffix = polygonize_fn_suffix(eng) eng.assert_query_result( - """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM ( + f"""SELECT ST_Polygonize{suffix}(ST_GeomFromText(geom)) FROM ( VALUES ('LINESTRING (0 0, 10 0)'), (NULL), @@ -150,8 +157,9 @@ def test_st_polygonize_with_nulls(eng): @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) def test_st_polygonize_no_polygons_formed(eng): eng = eng.create_or_skip() + suffix = polygonize_fn_suffix(eng) eng.assert_query_result( - """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM ( + f"""SELECT ST_Polygonize{suffix}(ST_GeomFromText(geom)) FROM ( VALUES ('LINESTRING (0 0, 10 0)'), ('LINESTRING (20 0, 30 0)') @@ -163,8 +171,9 @@ def test_st_polygonize_no_polygons_formed(eng): @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) def test_st_polygonize_multiple_polygons(eng): eng = eng.create_or_skip() + suffix = polygonize_fn_suffix(eng) eng.assert_query_result( - """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM ( + f"""SELECT ST_Polygonize{suffix}(ST_GeomFromText(geom)) FROM ( VALUES ('LINESTRING (0 0, 10 0)'), ('LINESTRING (10 0, 5 10)'), @@ -205,8 +214,9 @@ def test_st_polygonize_multiple_polygons(eng): ) def test_st_polygonize_single_geom(eng, geom, expected): eng = eng.create_or_skip() + suffix = polygonize_fn_suffix(eng) eng.assert_query_result( - f"""SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM ( + f"""SELECT ST_Polygonize{suffix}(ST_GeomFromText(geom)) FROM ( VALUES ('{geom}') ) AS t(geom)""", expected, diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index aa421c9b..25417e06 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -52,7 +52,7 @@ mod st_point; mod st_pointn; mod st_points; mod st_pointzm; -mod st_polygonize; +mod st_polygonize_agg; mod st_setsrid; mod st_srid; mod st_start_point; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index 12312dc2..9a06f258 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -123,7 +123,7 @@ pub fn default_function_set() -> FunctionSet { crate::st_collect::st_collect_udf, crate::st_envelope_aggr::st_envelope_aggr_udf, crate::st_intersection_aggr::st_intersection_aggr_udf, - crate::st_polygonize::st_polygonize_udf, + crate::st_polygonize_agg::st_polygonize_agg_udf, crate::st_union_aggr::st_union_aggr_udf, ); diff --git a/rust/sedona-functions/src/st_polygonize.rs b/rust/sedona-functions/src/st_polygonize_agg.rs similarity index 81% rename from rust/sedona-functions/src/st_polygonize.rs rename to rust/sedona-functions/src/st_polygonize_agg.rs index 023265ed..477a6523 100644 --- a/rust/sedona-functions/src/st_polygonize.rs +++ b/rust/sedona-functions/src/st_polygonize_agg.rs @@ -20,28 +20,28 @@ use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Vol use sedona_expr::aggregate_udf::SedonaAggregateUDF; use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher}; -/// ST_Polygonize() aggregate UDF implementation +/// ST_Polygonize_Agg() aggregate UDF implementation /// /// Creates polygons from a set of linework that forms closed rings. -pub fn st_polygonize_udf() -> SedonaAggregateUDF { +pub fn st_polygonize_agg_udf() -> SedonaAggregateUDF { SedonaAggregateUDF::new_stub( - "st_polygonize", + "st_polygonize_agg", ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY), Volatility::Immutable, - Some(st_polygonize_doc()), + Some(st_polygonize_agg_doc()), ) } -fn st_polygonize_doc() -> Documentation { +fn st_polygonize_agg_doc() -> Documentation { Documentation::builder( DOC_SECTION_OTHER, "Creates a GeometryCollection containing polygons formed from the linework of a set of geometries. \ Returns an empty GeometryCollection if no polygons can be formed.", - "ST_Polygonize (geom: Geometry)", + "ST_Polygonize_Agg (geom: Geometry)", ) .with_argument("geom", "geometry: Input geometry (typically linestrings that form closed rings)") .with_sql_example( - "SELECT ST_AsText(ST_Polygonize(geom)) FROM (VALUES \ + "SELECT ST_AsText(ST_Polygonize_Agg(geom)) FROM (VALUES \ (ST_GeomFromText('LINESTRING (0 0, 10 0)')), \ (ST_GeomFromText('LINESTRING (10 0, 10 10)')), \ (ST_GeomFromText('LINESTRING (10 10, 0 0)')) \ @@ -58,8 +58,8 @@ mod test { #[test] fn udf_metadata() { - let udf: AggregateUDF = st_polygonize_udf().into(); - assert_eq!(udf.name(), "st_polygonize"); + let udf: AggregateUDF = st_polygonize_agg_udf().into(); + assert_eq!(udf.name(), "st_polygonize_agg"); assert!(udf.documentation().is_some()); } }