Skip to content

Commit ca6ddf8

Browse files
committed
refactor: reorganize array expressions in proto for LLM-friendly development
- Group all array expressions together with inline documentation - Add comments guiding future array function additions - Preserve existing field numbers for backwards compatibility - No functional changes, pure organization
1 parent 8c8e108 commit ca6ddf8

File tree

3 files changed

+496
-213
lines changed

3 files changed

+496
-213
lines changed

protos/logical_plan/v1/expressions.proto

Lines changed: 120 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,24 +9,65 @@ import "logical_plan/v1/enums.proto";
99
// Base message for all logical expressions
1010
message LogicalExpr {
1111
oneof expr_type {
12-
// Basic expressions
12+
// ============================================================================
13+
// Core Basic Expressions
14+
// ============================================================================
1315
ColumnExpr column = 1;
1416
LiteralExpr literal = 2;
1517
AliasExpr alias = 3;
1618
SortExpr sort = 4;
1719
IndexExpr index = 5;
18-
ArrayExpr array = 6;
1920
StructExpr struct = 7;
2021
CastExpr cast = 8;
2122
NotExpr not_expr = 9; //keyword
2223
CoalesceExpr coalesce = 10;
2324
InExpr in_expr = 11; //keyword
2425
IsNullExpr is_null = 12;
25-
ArrayLengthExpr array_length = 13;
26-
ArrayContainsExpr array_contains = 14;
26+
UnresolvedLiteralExpr unresolved_literal = 17;
27+
28+
// ============================================================================
29+
// Array Expressions
30+
// When adding new array functions:
31+
// - Add field definition here in this grouped section
32+
// - Use next available number in 170-199 range
33+
// - Follow naming pattern: array_<operation> or <operation> for general ops
34+
// - Add corresponding message definition below (search for "Array message definitions")
35+
// - Add serde in: src/fenic/core/_serde/proto/expressions/basic.py
36+
// ============================================================================
37+
38+
// Array construction and introspection
39+
ArrayExpr array = 6; // Construct array from elements
40+
ArrayLengthExpr array_length = 13; // Get array length (size)
41+
42+
// Array element operations
43+
ArrayContainsExpr array_contains = 14; // Check if array contains element
44+
ElementAtExpr element_at = 182; // Get element at index (1-based, PySpark compat)
45+
ArraySliceExpr array_slice = 181; // Extract subarray (start, length)
46+
47+
// Array transformation operations
48+
ArrayDistinctExpr array_distinct = 18; // Remove duplicates
49+
ArrayMaxExpr array_max = 170; // Get max element (primitives only)
50+
ArrayMinExpr array_min = 171; // Get min element (primitives only)
51+
ArraySortExpr array_sort = 172; // Sort ascending (primitives only, no comparator)
52+
ArrayReverseExpr array_reverse = 173; // Reverse array order
53+
ArrayRemoveExpr array_remove = 174; // Remove all occurrences of element
54+
ArrayCompactExpr array_compact = 178; // Remove null values
55+
ArrayRepeatExpr array_repeat = 179; // Create array by repeating element n times
56+
FlattenExpr flatten = 180; // Flatten array of arrays (one level)
57+
58+
// Array set operations (return distinct elements)
59+
ArrayUnionExpr array_union = 175; // Union of two arrays (distinct)
60+
ArrayIntersectExpr array_intersect = 176; // Intersection of two arrays
61+
ArrayExceptExpr array_except = 177; // Elements in first but not second
62+
ArraysOverlapExpr arrays_overlap = 183; // Check if arrays have common elements
63+
64+
// Reserved for future array functions: 184-199
65+
66+
// ============================================================================
67+
// Comparison and Logic Expressions
68+
// ============================================================================
2769
GreatestExpr greatest = 15;
2870
LeastExpr least = 16;
29-
UnresolvedLiteralExpr unresolved_literal = 17;
3071

3172
// Binary expressions
3273
ArithmeticExpr arithmetic = 20;
@@ -190,15 +231,89 @@ message IsNullExpr {
190231
bool is_null = 2;
191232
}
192233

234+
// =============================================================================
235+
// Array Expression Message Definitions
236+
// When adding a new array function, add the message definition here.
237+
// =============================================================================
238+
193239
message ArrayLengthExpr {
194240
LogicalExpr expr = 1;
195241
}
196242

243+
message ArrayDistinctExpr {
244+
LogicalExpr expr = 1;
245+
}
246+
197247
message ArrayContainsExpr {
198248
LogicalExpr expr = 1;
199249
LogicalExpr other = 2;
200250
}
201251

252+
message ArrayMaxExpr {
253+
LogicalExpr expr = 1;
254+
}
255+
256+
message ArrayMinExpr {
257+
LogicalExpr expr = 1;
258+
}
259+
260+
message ArraySortExpr {
261+
LogicalExpr expr = 1;
262+
}
263+
264+
message ArrayReverseExpr {
265+
LogicalExpr expr = 1;
266+
}
267+
268+
message ArrayRemoveExpr {
269+
LogicalExpr expr = 1;
270+
LogicalExpr element = 2;
271+
}
272+
273+
message ArrayUnionExpr {
274+
LogicalExpr left = 1;
275+
LogicalExpr right = 2;
276+
}
277+
278+
message ArrayIntersectExpr {
279+
LogicalExpr left = 1;
280+
LogicalExpr right = 2;
281+
}
282+
283+
message ArrayExceptExpr {
284+
LogicalExpr left = 1;
285+
LogicalExpr right = 2;
286+
}
287+
288+
message ArrayCompactExpr {
289+
LogicalExpr expr = 1;
290+
}
291+
292+
message ArrayRepeatExpr {
293+
LogicalExpr element = 1;
294+
LogicalExpr count = 2;
295+
}
296+
297+
message FlattenExpr {
298+
LogicalExpr expr = 1;
299+
}
300+
301+
message ArraySliceExpr {
302+
LogicalExpr expr = 1;
303+
LogicalExpr start = 2;
304+
LogicalExpr length = 3;
305+
}
306+
307+
message ElementAtExpr {
308+
LogicalExpr expr = 1;
309+
LogicalExpr index = 2;
310+
}
311+
312+
message ArraysOverlapExpr {
313+
LogicalExpr left = 1;
314+
LogicalExpr right = 2;
315+
}
316+
202317
// Binary expressions
203318
message ArithmeticExpr {
204319
LogicalExpr left = 1;

0 commit comments

Comments
 (0)