From 87a801ebec5c6f7adab500fed20f24200043453a Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Oct 2025 09:18:21 +0530 Subject: [PATCH 01/23] metadata proto and code generation Signed-off-by: Sreekanth --- packages/pynumaflow/Makefile | 15 +++--- .../pynumaflow/proto/common/metadata.proto | 37 ++++++++++++++ .../pynumaflow/proto/common/metadata_pb2.py | 51 +++++++++++++++++++ .../pynumaflow/proto/common/metadata_pb2.pyi | 44 ++++++++++++++++ .../proto/common/metadata_pb2_grpc.py | 24 +++++++++ 5 files changed, 164 insertions(+), 7 deletions(-) create mode 100644 packages/pynumaflow/pynumaflow/proto/common/metadata.proto create mode 100644 packages/pynumaflow/pynumaflow/proto/common/metadata_pb2.py create mode 100644 packages/pynumaflow/pynumaflow/proto/common/metadata_pb2.pyi create mode 100644 packages/pynumaflow/pynumaflow/proto/common/metadata_pb2_grpc.py diff --git a/packages/pynumaflow/Makefile b/packages/pynumaflow/Makefile index 65e62b70..29a454be 100644 --- a/packages/pynumaflow/Makefile +++ b/packages/pynumaflow/Makefile @@ -26,10 +26,11 @@ setup: poetry install --with dev --no-root proto: - poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/sinker=pynumaflow/proto/sinker --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/sinker/*.proto - poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/mapper=pynumaflow/proto/mapper --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/mapper/*.proto - poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/reducer=pynumaflow/proto/reducer --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/reducer/*.proto - poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/sourcetransformer=pynumaflow/proto/sourcetransformer --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/sourcetransformer/*.proto - poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/sideinput=pynumaflow/proto/sideinput --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/sideinput/*.proto - poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/sourcer=pynumaflow/proto/sourcer --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/sourcer/*.proto - poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/accumulator=pynumaflow/proto/accumulator --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/accumulator/*.proto + poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/common=pynumaflow/proto/common --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/common/*.proto + poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/sinker=pynumaflow/proto/sinker -Ipynumaflow/proto/common=pynumaflow/proto/common --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/sinker/*.proto + poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/mapper=pynumaflow/proto/mapper -Ipynumaflow/proto/common=pynumaflow/proto/common --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/mapper/*.proto + poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/reducer=pynumaflow/proto/reducer -Ipynumaflow/proto/common=pynumaflow/proto/common --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/reducer/*.proto + poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/sourcetransformer=pynumaflow/proto/sourcetransformer -Ipynumaflow/proto/common=pynumaflow/proto/common --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/sourcetransformer/*.proto + poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/sideinput=pynumaflow/proto/sideinput -Ipynumaflow/proto/common=pynumaflow/proto/common --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/sideinput/*.proto + poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/sourcer=pynumaflow/proto/sourcer -Ipynumaflow/proto/common=pynumaflow/proto/common --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/sourcer/*.proto + poetry run python3 -m grpc_tools.protoc -Ipynumaflow/proto/accumulator=pynumaflow/proto/accumulator -Ipynumaflow/proto/common=pynumaflow/proto/common --pyi_out=. --python_out=. --grpc_python_out=. pynumaflow/proto/accumulator/*.proto diff --git a/packages/pynumaflow/pynumaflow/proto/common/metadata.proto b/packages/pynumaflow/pynumaflow/proto/common/metadata.proto new file mode 100644 index 00000000..6cd16df8 --- /dev/null +++ b/packages/pynumaflow/pynumaflow/proto/common/metadata.proto @@ -0,0 +1,37 @@ +/* +Copyright 2022 The Numaproj Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +syntax = "proto3"; +option go_package = "github.com/numaproj/numaflow-go/pkg/apis/proto/common"; + +package common; + +// Metadata is the metadata of the message +message Metadata { + // PreviousVertex is the name of the previous vertex + string previous_vertex = 1; + // SystemMetadata is the system metadata of the message + // Key of the map is the group name + map sys_metadata = 2; + // UserMetadata is the user metadata of the message + // Key of the map is the group name + map user_metadata = 3; +} + +// KeyValueGroup is a group of key-value pairs for a given group. +message KeyValueGroup { + map key_value = 1; +} \ No newline at end of file diff --git a/packages/pynumaflow/pynumaflow/proto/common/metadata_pb2.py b/packages/pynumaflow/pynumaflow/proto/common/metadata_pb2.py new file mode 100644 index 00000000..5ee288e1 --- /dev/null +++ b/packages/pynumaflow/pynumaflow/proto/common/metadata_pb2.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: pynumaflow/proto/common/metadata.proto +# Protobuf Python Version: 6.31.1 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 6, + 31, + 1, + '', + 'pynumaflow/proto/common/metadata.proto' +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n&pynumaflow/proto/common/metadata.proto\x12\x06\x63ommon\"\xae\x02\n\x08Metadata\x12\x17\n\x0fprevious_vertex\x18\x01 \x01(\t\x12\x37\n\x0csys_metadata\x18\x02 \x03(\x0b\x32!.common.Metadata.SysMetadataEntry\x12\x39\n\ruser_metadata\x18\x03 \x03(\x0b\x32\".common.Metadata.UserMetadataEntry\x1aI\n\x10SysMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12$\n\x05value\x18\x02 \x01(\x0b\x32\x15.common.KeyValueGroup:\x02\x38\x01\x1aJ\n\x11UserMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12$\n\x05value\x18\x02 \x01(\x0b\x32\x15.common.KeyValueGroup:\x02\x38\x01\"x\n\rKeyValueGroup\x12\x36\n\tkey_value\x18\x01 \x03(\x0b\x32#.common.KeyValueGroup.KeyValueEntry\x1a/\n\rKeyValueEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x37Z5github.com/numaproj/numaflow-go/pkg/apis/proto/commonb\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'pynumaflow.proto.common.metadata_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + _globals['DESCRIPTOR']._loaded_options = None + _globals['DESCRIPTOR']._serialized_options = b'Z5github.com/numaproj/numaflow-go/pkg/apis/proto/common' + _globals['_METADATA_SYSMETADATAENTRY']._loaded_options = None + _globals['_METADATA_SYSMETADATAENTRY']._serialized_options = b'8\001' + _globals['_METADATA_USERMETADATAENTRY']._loaded_options = None + _globals['_METADATA_USERMETADATAENTRY']._serialized_options = b'8\001' + _globals['_KEYVALUEGROUP_KEYVALUEENTRY']._loaded_options = None + _globals['_KEYVALUEGROUP_KEYVALUEENTRY']._serialized_options = b'8\001' + _globals['_METADATA']._serialized_start=51 + _globals['_METADATA']._serialized_end=353 + _globals['_METADATA_SYSMETADATAENTRY']._serialized_start=204 + _globals['_METADATA_SYSMETADATAENTRY']._serialized_end=277 + _globals['_METADATA_USERMETADATAENTRY']._serialized_start=279 + _globals['_METADATA_USERMETADATAENTRY']._serialized_end=353 + _globals['_KEYVALUEGROUP']._serialized_start=355 + _globals['_KEYVALUEGROUP']._serialized_end=475 + _globals['_KEYVALUEGROUP_KEYVALUEENTRY']._serialized_start=428 + _globals['_KEYVALUEGROUP_KEYVALUEENTRY']._serialized_end=475 +# @@protoc_insertion_point(module_scope) diff --git a/packages/pynumaflow/pynumaflow/proto/common/metadata_pb2.pyi b/packages/pynumaflow/pynumaflow/proto/common/metadata_pb2.pyi new file mode 100644 index 00000000..c664af2f --- /dev/null +++ b/packages/pynumaflow/pynumaflow/proto/common/metadata_pb2.pyi @@ -0,0 +1,44 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from collections.abc import Mapping as _Mapping +from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union + +DESCRIPTOR: _descriptor.FileDescriptor + +class Metadata(_message.Message): + __slots__ = ("previous_vertex", "sys_metadata", "user_metadata") + class SysMetadataEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: KeyValueGroup + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[KeyValueGroup, _Mapping]] = ...) -> None: ... + class UserMetadataEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: KeyValueGroup + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[KeyValueGroup, _Mapping]] = ...) -> None: ... + PREVIOUS_VERTEX_FIELD_NUMBER: _ClassVar[int] + SYS_METADATA_FIELD_NUMBER: _ClassVar[int] + USER_METADATA_FIELD_NUMBER: _ClassVar[int] + previous_vertex: str + sys_metadata: _containers.MessageMap[str, KeyValueGroup] + user_metadata: _containers.MessageMap[str, KeyValueGroup] + def __init__(self, previous_vertex: _Optional[str] = ..., sys_metadata: _Optional[_Mapping[str, KeyValueGroup]] = ..., user_metadata: _Optional[_Mapping[str, KeyValueGroup]] = ...) -> None: ... + +class KeyValueGroup(_message.Message): + __slots__ = ("key_value",) + class KeyValueEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: bytes + def __init__(self, key: _Optional[str] = ..., value: _Optional[bytes] = ...) -> None: ... + KEY_VALUE_FIELD_NUMBER: _ClassVar[int] + key_value: _containers.ScalarMap[str, bytes] + def __init__(self, key_value: _Optional[_Mapping[str, bytes]] = ...) -> None: ... diff --git a/packages/pynumaflow/pynumaflow/proto/common/metadata_pb2_grpc.py b/packages/pynumaflow/pynumaflow/proto/common/metadata_pb2_grpc.py new file mode 100644 index 00000000..2e60d47a --- /dev/null +++ b/packages/pynumaflow/pynumaflow/proto/common/metadata_pb2_grpc.py @@ -0,0 +1,24 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc +import warnings + + +GRPC_GENERATED_VERSION = '1.75.0' +GRPC_VERSION = grpc.__version__ +_version_not_supported = False + +try: + from grpc._utilities import first_version_is_lower + _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) +except ImportError: + _version_not_supported = True + +if _version_not_supported: + raise RuntimeError( + f'The grpc package installed is at version {GRPC_VERSION},' + + f' but the generated code in pynumaflow/proto/common/metadata_pb2_grpc.py depends on' + + f' grpcio>={GRPC_GENERATED_VERSION}.' + + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' + + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' + ) From 7e285275b3c6dd373114758b8d837e6ff5f4f0b2 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Oct 2025 09:20:47 +0530 Subject: [PATCH 02/23] include metadata in accumulator Signed-off-by: Sreekanth --- .../proto/accumulator/accumulator.proto | 3 ++ .../proto/accumulator/accumulator_pb2.py | 39 ++++++++++--------- .../proto/accumulator/accumulator_pb2.pyi | 7 +++- 3 files changed, 28 insertions(+), 21 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator.proto b/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator.proto index acde986b..111f2f9e 100644 --- a/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator.proto +++ b/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator.proto @@ -21,6 +21,7 @@ option java_package = "io.numaproj.numaflow.accumulator.v1"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; +import "pynumaflow/proto/common/metadata.proto"; package accumulator.v1; @@ -44,6 +45,8 @@ message Payload { google.protobuf.Timestamp watermark = 4; string id = 5; map headers = 6; + // metadata of the message + common.Metadata metadata = 7; } // AccumulatorRequest represents a request element. diff --git a/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator_pb2.py b/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator_pb2.py index f12f012f..d173e34e 100644 --- a/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator_pb2.py +++ b/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator_pb2.py @@ -24,9 +24,10 @@ from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 +from pynumaflow.proto.common import metadata_pb2 as pynumaflow_dot_proto_dot_common_dot_metadata__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n.pynumaflow/proto/accumulator/accumulator.proto\x12\x0e\x61\x63\x63umulator.v1\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"\xf8\x01\n\x07Payload\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12-\n\twatermark\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\n\n\x02id\x18\x05 \x01(\t\x12\x35\n\x07headers\x18\x06 \x03(\x0b\x32$.accumulator.v1.Payload.HeadersEntry\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xbe\x02\n\x12\x41\x63\x63umulatorRequest\x12(\n\x07payload\x18\x01 \x01(\x0b\x32\x17.accumulator.v1.Payload\x12\x45\n\toperation\x18\x02 \x01(\x0b\x32\x32.accumulator.v1.AccumulatorRequest.WindowOperation\x1a\xb6\x01\n\x0fWindowOperation\x12G\n\x05\x65vent\x18\x01 \x01(\x0e\x32\x38.accumulator.v1.AccumulatorRequest.WindowOperation.Event\x12\x30\n\x0bkeyedWindow\x18\x02 \x01(\x0b\x32\x1b.accumulator.v1.KeyedWindow\"(\n\x05\x45vent\x12\x08\n\x04OPEN\x10\x00\x12\t\n\x05\x43LOSE\x10\x01\x12\n\n\x06\x41PPEND\x10\x02\"}\n\x0bKeyedWindow\x12)\n\x05start\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\'\n\x03\x65nd\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04slot\x18\x03 \x01(\t\x12\x0c\n\x04keys\x18\x04 \x03(\t\"\x87\x01\n\x13\x41\x63\x63umulatorResponse\x12(\n\x07payload\x18\x01 \x01(\x0b\x32\x17.accumulator.v1.Payload\x12+\n\x06window\x18\x02 \x01(\x0b\x32\x1b.accumulator.v1.KeyedWindow\x12\x0c\n\x04tags\x18\x03 \x03(\t\x12\x0b\n\x03\x45OF\x18\x04 \x01(\x08\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\x32\xac\x01\n\x0b\x41\x63\x63umulator\x12[\n\x0c\x41\x63\x63umulateFn\x12\".accumulator.v1.AccumulatorRequest\x1a#.accumulator.v1.AccumulatorResponse(\x01\x30\x01\x12@\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x1d.accumulator.v1.ReadyResponseBd\n#io.numaproj.numaflow.accumulator.v1Z=github.com/numaproj/numaflow-go/pkg/apis/proto/accumulator/v1b\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n.pynumaflow/proto/accumulator/accumulator.proto\x12\x0e\x61\x63\x63umulator.v1\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&pynumaflow/proto/common/metadata.proto\"\x9c\x02\n\x07Payload\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12-\n\twatermark\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\n\n\x02id\x18\x05 \x01(\t\x12\x35\n\x07headers\x18\x06 \x03(\x0b\x32$.accumulator.v1.Payload.HeadersEntry\x12\"\n\x08metadata\x18\x07 \x01(\x0b\x32\x10.common.Metadata\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xbe\x02\n\x12\x41\x63\x63umulatorRequest\x12(\n\x07payload\x18\x01 \x01(\x0b\x32\x17.accumulator.v1.Payload\x12\x45\n\toperation\x18\x02 \x01(\x0b\x32\x32.accumulator.v1.AccumulatorRequest.WindowOperation\x1a\xb6\x01\n\x0fWindowOperation\x12G\n\x05\x65vent\x18\x01 \x01(\x0e\x32\x38.accumulator.v1.AccumulatorRequest.WindowOperation.Event\x12\x30\n\x0bkeyedWindow\x18\x02 \x01(\x0b\x32\x1b.accumulator.v1.KeyedWindow\"(\n\x05\x45vent\x12\x08\n\x04OPEN\x10\x00\x12\t\n\x05\x43LOSE\x10\x01\x12\n\n\x06\x41PPEND\x10\x02\"}\n\x0bKeyedWindow\x12)\n\x05start\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\'\n\x03\x65nd\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04slot\x18\x03 \x01(\t\x12\x0c\n\x04keys\x18\x04 \x03(\t\"\x87\x01\n\x13\x41\x63\x63umulatorResponse\x12(\n\x07payload\x18\x01 \x01(\x0b\x32\x17.accumulator.v1.Payload\x12+\n\x06window\x18\x02 \x01(\x0b\x32\x1b.accumulator.v1.KeyedWindow\x12\x0c\n\x04tags\x18\x03 \x03(\t\x12\x0b\n\x03\x45OF\x18\x04 \x01(\x08\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\x32\xac\x01\n\x0b\x41\x63\x63umulator\x12[\n\x0c\x41\x63\x63umulateFn\x12\".accumulator.v1.AccumulatorRequest\x1a#.accumulator.v1.AccumulatorResponse(\x01\x30\x01\x12@\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x1d.accumulator.v1.ReadyResponseBd\n#io.numaproj.numaflow.accumulator.v1Z=github.com/numaproj/numaflow-go/pkg/apis/proto/accumulator/v1b\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -36,22 +37,22 @@ _globals['DESCRIPTOR']._serialized_options = b'\n#io.numaproj.numaflow.accumulator.v1Z=github.com/numaproj/numaflow-go/pkg/apis/proto/accumulator/v1' _globals['_PAYLOAD_HEADERSENTRY']._loaded_options = None _globals['_PAYLOAD_HEADERSENTRY']._serialized_options = b'8\001' - _globals['_PAYLOAD']._serialized_start=129 - _globals['_PAYLOAD']._serialized_end=377 - _globals['_PAYLOAD_HEADERSENTRY']._serialized_start=331 - _globals['_PAYLOAD_HEADERSENTRY']._serialized_end=377 - _globals['_ACCUMULATORREQUEST']._serialized_start=380 - _globals['_ACCUMULATORREQUEST']._serialized_end=698 - _globals['_ACCUMULATORREQUEST_WINDOWOPERATION']._serialized_start=516 - _globals['_ACCUMULATORREQUEST_WINDOWOPERATION']._serialized_end=698 - _globals['_ACCUMULATORREQUEST_WINDOWOPERATION_EVENT']._serialized_start=658 - _globals['_ACCUMULATORREQUEST_WINDOWOPERATION_EVENT']._serialized_end=698 - _globals['_KEYEDWINDOW']._serialized_start=700 - _globals['_KEYEDWINDOW']._serialized_end=825 - _globals['_ACCUMULATORRESPONSE']._serialized_start=828 - _globals['_ACCUMULATORRESPONSE']._serialized_end=963 - _globals['_READYRESPONSE']._serialized_start=965 - _globals['_READYRESPONSE']._serialized_end=995 - _globals['_ACCUMULATOR']._serialized_start=998 - _globals['_ACCUMULATOR']._serialized_end=1170 + _globals['_PAYLOAD']._serialized_start=169 + _globals['_PAYLOAD']._serialized_end=453 + _globals['_PAYLOAD_HEADERSENTRY']._serialized_start=407 + _globals['_PAYLOAD_HEADERSENTRY']._serialized_end=453 + _globals['_ACCUMULATORREQUEST']._serialized_start=456 + _globals['_ACCUMULATORREQUEST']._serialized_end=774 + _globals['_ACCUMULATORREQUEST_WINDOWOPERATION']._serialized_start=592 + _globals['_ACCUMULATORREQUEST_WINDOWOPERATION']._serialized_end=774 + _globals['_ACCUMULATORREQUEST_WINDOWOPERATION_EVENT']._serialized_start=734 + _globals['_ACCUMULATORREQUEST_WINDOWOPERATION_EVENT']._serialized_end=774 + _globals['_KEYEDWINDOW']._serialized_start=776 + _globals['_KEYEDWINDOW']._serialized_end=901 + _globals['_ACCUMULATORRESPONSE']._serialized_start=904 + _globals['_ACCUMULATORRESPONSE']._serialized_end=1039 + _globals['_READYRESPONSE']._serialized_start=1041 + _globals['_READYRESPONSE']._serialized_end=1071 + _globals['_ACCUMULATOR']._serialized_start=1074 + _globals['_ACCUMULATOR']._serialized_end=1246 # @@protoc_insertion_point(module_scope) diff --git a/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator_pb2.pyi b/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator_pb2.pyi index e0630819..722c2b90 100644 --- a/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator_pb2.pyi +++ b/packages/pynumaflow/pynumaflow/proto/accumulator/accumulator_pb2.pyi @@ -2,6 +2,7 @@ import datetime from google.protobuf import empty_pb2 as _empty_pb2 from google.protobuf import timestamp_pb2 as _timestamp_pb2 +from pynumaflow.proto.common import metadata_pb2 as _metadata_pb2 from google.protobuf.internal import containers as _containers from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor @@ -12,7 +13,7 @@ from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union DESCRIPTOR: _descriptor.FileDescriptor class Payload(_message.Message): - __slots__ = ("keys", "value", "event_time", "watermark", "id", "headers") + __slots__ = ("keys", "value", "event_time", "watermark", "id", "headers", "metadata") class HeadersEntry(_message.Message): __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] @@ -26,13 +27,15 @@ class Payload(_message.Message): WATERMARK_FIELD_NUMBER: _ClassVar[int] ID_FIELD_NUMBER: _ClassVar[int] HEADERS_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] keys: _containers.RepeatedScalarFieldContainer[str] value: bytes event_time: _timestamp_pb2.Timestamp watermark: _timestamp_pb2.Timestamp id: str headers: _containers.ScalarMap[str, str] - def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., watermark: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., id: _Optional[str] = ..., headers: _Optional[_Mapping[str, str]] = ...) -> None: ... + metadata: _metadata_pb2.Metadata + def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., watermark: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., id: _Optional[str] = ..., headers: _Optional[_Mapping[str, str]] = ..., metadata: _Optional[_Union[_metadata_pb2.Metadata, _Mapping]] = ...) -> None: ... class AccumulatorRequest(_message.Message): __slots__ = ("payload", "operation") From 940b19829d2c659392cdad1704b2a2fba677f0c9 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Oct 2025 09:24:33 +0530 Subject: [PATCH 03/23] include metadata in map request/response Signed-off-by: Sreekanth --- .../pynumaflow/proto/mapper/map.proto | 5 +++ .../pynumaflow/proto/mapper/map_pb2.py | 39 ++++++++++--------- .../pynumaflow/proto/mapper/map_pb2.pyi | 13 +++++-- 3 files changed, 34 insertions(+), 23 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/proto/mapper/map.proto b/packages/pynumaflow/pynumaflow/proto/mapper/map.proto index 6f8c78d5..58518d94 100644 --- a/packages/pynumaflow/pynumaflow/proto/mapper/map.proto +++ b/packages/pynumaflow/pynumaflow/proto/mapper/map.proto @@ -4,6 +4,7 @@ option go_package = "github.com/numaproj/numaflow-go/pkg/apis/proto/map/v1"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; +import "pynumaflow/proto/common/metadata.proto"; package map.v1; @@ -25,6 +26,8 @@ message MapRequest { google.protobuf.Timestamp event_time = 3; google.protobuf.Timestamp watermark = 4; map headers = 5; + // metadata of the message + common.Metadata metadata = 6; } Request request = 1; // This ID is used to uniquely identify a map request @@ -56,6 +59,8 @@ message MapResponse { repeated string keys = 1; bytes value = 2; repeated string tags = 3; + // metadata of the message + common.Metadata metadata = 4; } repeated Result results = 1; // This ID is used to refer the responses to the request it corresponds to. diff --git a/packages/pynumaflow/pynumaflow/proto/mapper/map_pb2.py b/packages/pynumaflow/pynumaflow/proto/mapper/map_pb2.py index a9bb7332..924d5f74 100644 --- a/packages/pynumaflow/pynumaflow/proto/mapper/map_pb2.py +++ b/packages/pynumaflow/pynumaflow/proto/mapper/map_pb2.py @@ -24,9 +24,10 @@ from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 +from pynumaflow.proto.common import metadata_pb2 as pynumaflow_dot_proto_dot_common_dot_metadata__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n!pynumaflow/proto/mapper/map.proto\x12\x06map.v1\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"\xac\x03\n\nMapRequest\x12+\n\x07request\x18\x01 \x01(\x0b\x32\x1a.map.v1.MapRequest.Request\x12\n\n\x02id\x18\x02 \x01(\t\x12)\n\thandshake\x18\x03 \x01(\x0b\x32\x11.map.v1.HandshakeH\x00\x88\x01\x01\x12/\n\x06status\x18\x04 \x01(\x0b\x32\x1a.map.v1.TransmissionStatusH\x01\x88\x01\x01\x1a\xef\x01\n\x07Request\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12-\n\twatermark\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x38\n\x07headers\x18\x05 \x03(\x0b\x32\'.map.v1.MapRequest.Request.HeadersEntry\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0c\n\n_handshakeB\t\n\x07_status\"\x18\n\tHandshake\x12\x0b\n\x03sot\x18\x01 \x01(\x08\"!\n\x12TransmissionStatus\x12\x0b\n\x03\x65ot\x18\x01 \x01(\x08\"\xf0\x01\n\x0bMapResponse\x12+\n\x07results\x18\x01 \x03(\x0b\x32\x1a.map.v1.MapResponse.Result\x12\n\n\x02id\x18\x02 \x01(\t\x12)\n\thandshake\x18\x03 \x01(\x0b\x32\x11.map.v1.HandshakeH\x00\x88\x01\x01\x12/\n\x06status\x18\x04 \x01(\x0b\x32\x1a.map.v1.TransmissionStatusH\x01\x88\x01\x01\x1a\x33\n\x06Result\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12\x0c\n\x04tags\x18\x03 \x03(\tB\x0c\n\n_handshakeB\t\n\x07_status\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\x32u\n\x03Map\x12\x34\n\x05MapFn\x12\x12.map.v1.MapRequest\x1a\x13.map.v1.MapResponse(\x01\x30\x01\x12\x38\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x15.map.v1.ReadyResponseB7Z5github.com/numaproj/numaflow-go/pkg/apis/proto/map/v1b\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n!pynumaflow/proto/mapper/map.proto\x12\x06map.v1\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&pynumaflow/proto/common/metadata.proto\"\xd0\x03\n\nMapRequest\x12+\n\x07request\x18\x01 \x01(\x0b\x32\x1a.map.v1.MapRequest.Request\x12\n\n\x02id\x18\x02 \x01(\t\x12)\n\thandshake\x18\x03 \x01(\x0b\x32\x11.map.v1.HandshakeH\x00\x88\x01\x01\x12/\n\x06status\x18\x04 \x01(\x0b\x32\x1a.map.v1.TransmissionStatusH\x01\x88\x01\x01\x1a\x93\x02\n\x07Request\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12-\n\twatermark\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x38\n\x07headers\x18\x05 \x03(\x0b\x32\'.map.v1.MapRequest.Request.HeadersEntry\x12\"\n\x08metadata\x18\x06 \x01(\x0b\x32\x10.common.Metadata\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0c\n\n_handshakeB\t\n\x07_status\"\x18\n\tHandshake\x12\x0b\n\x03sot\x18\x01 \x01(\x08\"!\n\x12TransmissionStatus\x12\x0b\n\x03\x65ot\x18\x01 \x01(\x08\"\x94\x02\n\x0bMapResponse\x12+\n\x07results\x18\x01 \x03(\x0b\x32\x1a.map.v1.MapResponse.Result\x12\n\n\x02id\x18\x02 \x01(\t\x12)\n\thandshake\x18\x03 \x01(\x0b\x32\x11.map.v1.HandshakeH\x00\x88\x01\x01\x12/\n\x06status\x18\x04 \x01(\x0b\x32\x1a.map.v1.TransmissionStatusH\x01\x88\x01\x01\x1aW\n\x06Result\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12\x0c\n\x04tags\x18\x03 \x03(\t\x12\"\n\x08metadata\x18\x04 \x01(\x0b\x32\x10.common.MetadataB\x0c\n\n_handshakeB\t\n\x07_status\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\x32u\n\x03Map\x12\x34\n\x05MapFn\x12\x12.map.v1.MapRequest\x1a\x13.map.v1.MapResponse(\x01\x30\x01\x12\x38\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x15.map.v1.ReadyResponseB7Z5github.com/numaproj/numaflow-go/pkg/apis/proto/map/v1b\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -36,22 +37,22 @@ _globals['DESCRIPTOR']._serialized_options = b'Z5github.com/numaproj/numaflow-go/pkg/apis/proto/map/v1' _globals['_MAPREQUEST_REQUEST_HEADERSENTRY']._loaded_options = None _globals['_MAPREQUEST_REQUEST_HEADERSENTRY']._serialized_options = b'8\001' - _globals['_MAPREQUEST']._serialized_start=108 - _globals['_MAPREQUEST']._serialized_end=536 - _globals['_MAPREQUEST_REQUEST']._serialized_start=272 - _globals['_MAPREQUEST_REQUEST']._serialized_end=511 - _globals['_MAPREQUEST_REQUEST_HEADERSENTRY']._serialized_start=465 - _globals['_MAPREQUEST_REQUEST_HEADERSENTRY']._serialized_end=511 - _globals['_HANDSHAKE']._serialized_start=538 - _globals['_HANDSHAKE']._serialized_end=562 - _globals['_TRANSMISSIONSTATUS']._serialized_start=564 - _globals['_TRANSMISSIONSTATUS']._serialized_end=597 - _globals['_MAPRESPONSE']._serialized_start=600 - _globals['_MAPRESPONSE']._serialized_end=840 - _globals['_MAPRESPONSE_RESULT']._serialized_start=764 - _globals['_MAPRESPONSE_RESULT']._serialized_end=815 - _globals['_READYRESPONSE']._serialized_start=842 - _globals['_READYRESPONSE']._serialized_end=872 - _globals['_MAP']._serialized_start=874 - _globals['_MAP']._serialized_end=991 + _globals['_MAPREQUEST']._serialized_start=148 + _globals['_MAPREQUEST']._serialized_end=612 + _globals['_MAPREQUEST_REQUEST']._serialized_start=312 + _globals['_MAPREQUEST_REQUEST']._serialized_end=587 + _globals['_MAPREQUEST_REQUEST_HEADERSENTRY']._serialized_start=541 + _globals['_MAPREQUEST_REQUEST_HEADERSENTRY']._serialized_end=587 + _globals['_HANDSHAKE']._serialized_start=614 + _globals['_HANDSHAKE']._serialized_end=638 + _globals['_TRANSMISSIONSTATUS']._serialized_start=640 + _globals['_TRANSMISSIONSTATUS']._serialized_end=673 + _globals['_MAPRESPONSE']._serialized_start=676 + _globals['_MAPRESPONSE']._serialized_end=952 + _globals['_MAPRESPONSE_RESULT']._serialized_start=840 + _globals['_MAPRESPONSE_RESULT']._serialized_end=927 + _globals['_READYRESPONSE']._serialized_start=954 + _globals['_READYRESPONSE']._serialized_end=984 + _globals['_MAP']._serialized_start=986 + _globals['_MAP']._serialized_end=1103 # @@protoc_insertion_point(module_scope) diff --git a/packages/pynumaflow/pynumaflow/proto/mapper/map_pb2.pyi b/packages/pynumaflow/pynumaflow/proto/mapper/map_pb2.pyi index ff5e5d84..1f94a05f 100644 --- a/packages/pynumaflow/pynumaflow/proto/mapper/map_pb2.pyi +++ b/packages/pynumaflow/pynumaflow/proto/mapper/map_pb2.pyi @@ -2,6 +2,7 @@ import datetime from google.protobuf import empty_pb2 as _empty_pb2 from google.protobuf import timestamp_pb2 as _timestamp_pb2 +from pynumaflow.proto.common import metadata_pb2 as _metadata_pb2 from google.protobuf.internal import containers as _containers from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message @@ -13,7 +14,7 @@ DESCRIPTOR: _descriptor.FileDescriptor class MapRequest(_message.Message): __slots__ = ("request", "id", "handshake", "status") class Request(_message.Message): - __slots__ = ("keys", "value", "event_time", "watermark", "headers") + __slots__ = ("keys", "value", "event_time", "watermark", "headers", "metadata") class HeadersEntry(_message.Message): __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] @@ -26,12 +27,14 @@ class MapRequest(_message.Message): EVENT_TIME_FIELD_NUMBER: _ClassVar[int] WATERMARK_FIELD_NUMBER: _ClassVar[int] HEADERS_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] keys: _containers.RepeatedScalarFieldContainer[str] value: bytes event_time: _timestamp_pb2.Timestamp watermark: _timestamp_pb2.Timestamp headers: _containers.ScalarMap[str, str] - def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., watermark: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., headers: _Optional[_Mapping[str, str]] = ...) -> None: ... + metadata: _metadata_pb2.Metadata + def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., watermark: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., headers: _Optional[_Mapping[str, str]] = ..., metadata: _Optional[_Union[_metadata_pb2.Metadata, _Mapping]] = ...) -> None: ... REQUEST_FIELD_NUMBER: _ClassVar[int] ID_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] @@ -57,14 +60,16 @@ class TransmissionStatus(_message.Message): class MapResponse(_message.Message): __slots__ = ("results", "id", "handshake", "status") class Result(_message.Message): - __slots__ = ("keys", "value", "tags") + __slots__ = ("keys", "value", "tags", "metadata") KEYS_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] TAGS_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] keys: _containers.RepeatedScalarFieldContainer[str] value: bytes tags: _containers.RepeatedScalarFieldContainer[str] - def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., tags: _Optional[_Iterable[str]] = ...) -> None: ... + metadata: _metadata_pb2.Metadata + def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., tags: _Optional[_Iterable[str]] = ..., metadata: _Optional[_Union[_metadata_pb2.Metadata, _Mapping]] = ...) -> None: ... RESULTS_FIELD_NUMBER: _ClassVar[int] ID_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] From 354865154407c7d4a362919c6f62fb361055cf5b Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Oct 2025 09:26:45 +0530 Subject: [PATCH 04/23] include metadata in reduce request/response Signed-off-by: Sreekanth --- .../pynumaflow/proto/reducer/reduce.proto | 5 +++ .../pynumaflow/proto/reducer/reduce_pb2.py | 43 ++++++++++--------- .../pynumaflow/proto/reducer/reduce_pb2.pyi | 13 ++++-- 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/proto/reducer/reduce.proto b/packages/pynumaflow/pynumaflow/proto/reducer/reduce.proto index 1e21390a..0ce661c3 100644 --- a/packages/pynumaflow/pynumaflow/proto/reducer/reduce.proto +++ b/packages/pynumaflow/pynumaflow/proto/reducer/reduce.proto @@ -2,6 +2,7 @@ syntax = "proto3"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; +import "pynumaflow/proto/common/metadata.proto"; package reduce.v1; @@ -38,6 +39,8 @@ message ReduceRequest { google.protobuf.Timestamp event_time = 3; google.protobuf.Timestamp watermark = 4; map headers = 5; + // metadata of the message + common.Metadata metadata = 6; } Payload payload = 1; @@ -61,6 +64,8 @@ message ReduceResponse { repeated string keys = 1; bytes value = 2; repeated string tags = 3; + // metadata of the message + common.Metadata metadata = 4; } Result result = 1; diff --git a/packages/pynumaflow/pynumaflow/proto/reducer/reduce_pb2.py b/packages/pynumaflow/pynumaflow/proto/reducer/reduce_pb2.py index 9ed9fe19..d1686f10 100644 --- a/packages/pynumaflow/pynumaflow/proto/reducer/reduce_pb2.py +++ b/packages/pynumaflow/pynumaflow/proto/reducer/reduce_pb2.py @@ -24,9 +24,10 @@ from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 +from pynumaflow.proto.common import metadata_pb2 as pynumaflow_dot_proto_dot_common_dot_metadata__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n%pynumaflow/proto/reducer/reduce.proto\x12\treduce.v1\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"\x98\x04\n\rReduceRequest\x12\x31\n\x07payload\x18\x01 \x01(\x0b\x32 .reduce.v1.ReduceRequest.Payload\x12;\n\toperation\x18\x02 \x01(\x0b\x32(.reduce.v1.ReduceRequest.WindowOperation\x1a\x9e\x01\n\x0fWindowOperation\x12=\n\x05\x65vent\x18\x01 \x01(\x0e\x32..reduce.v1.ReduceRequest.WindowOperation.Event\x12\"\n\x07windows\x18\x02 \x03(\x0b\x32\x11.reduce.v1.Window\"(\n\x05\x45vent\x12\x08\n\x04OPEN\x10\x00\x12\t\n\x05\x43LOSE\x10\x01\x12\n\n\x06\x41PPEND\x10\x04\x1a\xf5\x01\n\x07Payload\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12-\n\twatermark\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12>\n\x07headers\x18\x05 \x03(\x0b\x32-.reduce.v1.ReduceRequest.Payload.HeadersEntry\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"j\n\x06Window\x12)\n\x05start\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\'\n\x03\x65nd\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04slot\x18\x03 \x01(\t\"\xa7\x01\n\x0eReduceResponse\x12\x30\n\x06result\x18\x01 \x01(\x0b\x32 .reduce.v1.ReduceResponse.Result\x12!\n\x06window\x18\x02 \x01(\x0b\x32\x11.reduce.v1.Window\x12\x0b\n\x03\x45OF\x18\x03 \x01(\x08\x1a\x33\n\x06Result\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12\x0c\n\x04tags\x18\x03 \x03(\t\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\x32\x8a\x01\n\x06Reduce\x12\x43\n\x08ReduceFn\x12\x18.reduce.v1.ReduceRequest\x1a\x19.reduce.v1.ReduceResponse(\x01\x30\x01\x12;\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x18.reduce.v1.ReadyResponseb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n%pynumaflow/proto/reducer/reduce.proto\x12\treduce.v1\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&pynumaflow/proto/common/metadata.proto\"\xbc\x04\n\rReduceRequest\x12\x31\n\x07payload\x18\x01 \x01(\x0b\x32 .reduce.v1.ReduceRequest.Payload\x12;\n\toperation\x18\x02 \x01(\x0b\x32(.reduce.v1.ReduceRequest.WindowOperation\x1a\x9e\x01\n\x0fWindowOperation\x12=\n\x05\x65vent\x18\x01 \x01(\x0e\x32..reduce.v1.ReduceRequest.WindowOperation.Event\x12\"\n\x07windows\x18\x02 \x03(\x0b\x32\x11.reduce.v1.Window\"(\n\x05\x45vent\x12\x08\n\x04OPEN\x10\x00\x12\t\n\x05\x43LOSE\x10\x01\x12\n\n\x06\x41PPEND\x10\x04\x1a\x99\x02\n\x07Payload\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12-\n\twatermark\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12>\n\x07headers\x18\x05 \x03(\x0b\x32-.reduce.v1.ReduceRequest.Payload.HeadersEntry\x12\"\n\x08metadata\x18\x06 \x01(\x0b\x32\x10.common.Metadata\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"j\n\x06Window\x12)\n\x05start\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\'\n\x03\x65nd\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04slot\x18\x03 \x01(\t\"\xcb\x01\n\x0eReduceResponse\x12\x30\n\x06result\x18\x01 \x01(\x0b\x32 .reduce.v1.ReduceResponse.Result\x12!\n\x06window\x18\x02 \x01(\x0b\x32\x11.reduce.v1.Window\x12\x0b\n\x03\x45OF\x18\x03 \x01(\x08\x1aW\n\x06Result\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12\x0c\n\x04tags\x18\x03 \x03(\t\x12\"\n\x08metadata\x18\x04 \x01(\x0b\x32\x10.common.Metadata\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\x32\x8a\x01\n\x06Reduce\x12\x43\n\x08ReduceFn\x12\x18.reduce.v1.ReduceRequest\x1a\x19.reduce.v1.ReduceResponse(\x01\x30\x01\x12;\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x18.reduce.v1.ReadyResponseb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -35,24 +36,24 @@ DESCRIPTOR._loaded_options = None _globals['_REDUCEREQUEST_PAYLOAD_HEADERSENTRY']._loaded_options = None _globals['_REDUCEREQUEST_PAYLOAD_HEADERSENTRY']._serialized_options = b'8\001' - _globals['_REDUCEREQUEST']._serialized_start=115 - _globals['_REDUCEREQUEST']._serialized_end=651 - _globals['_REDUCEREQUEST_WINDOWOPERATION']._serialized_start=245 - _globals['_REDUCEREQUEST_WINDOWOPERATION']._serialized_end=403 - _globals['_REDUCEREQUEST_WINDOWOPERATION_EVENT']._serialized_start=363 - _globals['_REDUCEREQUEST_WINDOWOPERATION_EVENT']._serialized_end=403 - _globals['_REDUCEREQUEST_PAYLOAD']._serialized_start=406 - _globals['_REDUCEREQUEST_PAYLOAD']._serialized_end=651 - _globals['_REDUCEREQUEST_PAYLOAD_HEADERSENTRY']._serialized_start=605 - _globals['_REDUCEREQUEST_PAYLOAD_HEADERSENTRY']._serialized_end=651 - _globals['_WINDOW']._serialized_start=653 - _globals['_WINDOW']._serialized_end=759 - _globals['_REDUCERESPONSE']._serialized_start=762 - _globals['_REDUCERESPONSE']._serialized_end=929 - _globals['_REDUCERESPONSE_RESULT']._serialized_start=878 - _globals['_REDUCERESPONSE_RESULT']._serialized_end=929 - _globals['_READYRESPONSE']._serialized_start=931 - _globals['_READYRESPONSE']._serialized_end=961 - _globals['_REDUCE']._serialized_start=964 - _globals['_REDUCE']._serialized_end=1102 + _globals['_REDUCEREQUEST']._serialized_start=155 + _globals['_REDUCEREQUEST']._serialized_end=727 + _globals['_REDUCEREQUEST_WINDOWOPERATION']._serialized_start=285 + _globals['_REDUCEREQUEST_WINDOWOPERATION']._serialized_end=443 + _globals['_REDUCEREQUEST_WINDOWOPERATION_EVENT']._serialized_start=403 + _globals['_REDUCEREQUEST_WINDOWOPERATION_EVENT']._serialized_end=443 + _globals['_REDUCEREQUEST_PAYLOAD']._serialized_start=446 + _globals['_REDUCEREQUEST_PAYLOAD']._serialized_end=727 + _globals['_REDUCEREQUEST_PAYLOAD_HEADERSENTRY']._serialized_start=681 + _globals['_REDUCEREQUEST_PAYLOAD_HEADERSENTRY']._serialized_end=727 + _globals['_WINDOW']._serialized_start=729 + _globals['_WINDOW']._serialized_end=835 + _globals['_REDUCERESPONSE']._serialized_start=838 + _globals['_REDUCERESPONSE']._serialized_end=1041 + _globals['_REDUCERESPONSE_RESULT']._serialized_start=954 + _globals['_REDUCERESPONSE_RESULT']._serialized_end=1041 + _globals['_READYRESPONSE']._serialized_start=1043 + _globals['_READYRESPONSE']._serialized_end=1073 + _globals['_REDUCE']._serialized_start=1076 + _globals['_REDUCE']._serialized_end=1214 # @@protoc_insertion_point(module_scope) diff --git a/packages/pynumaflow/pynumaflow/proto/reducer/reduce_pb2.pyi b/packages/pynumaflow/pynumaflow/proto/reducer/reduce_pb2.pyi index 4a1551ee..eb955cf9 100644 --- a/packages/pynumaflow/pynumaflow/proto/reducer/reduce_pb2.pyi +++ b/packages/pynumaflow/pynumaflow/proto/reducer/reduce_pb2.pyi @@ -2,6 +2,7 @@ import datetime from google.protobuf import empty_pb2 as _empty_pb2 from google.protobuf import timestamp_pb2 as _timestamp_pb2 +from pynumaflow.proto.common import metadata_pb2 as _metadata_pb2 from google.protobuf.internal import containers as _containers from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor @@ -29,7 +30,7 @@ class ReduceRequest(_message.Message): windows: _containers.RepeatedCompositeFieldContainer[Window] def __init__(self, event: _Optional[_Union[ReduceRequest.WindowOperation.Event, str]] = ..., windows: _Optional[_Iterable[_Union[Window, _Mapping]]] = ...) -> None: ... class Payload(_message.Message): - __slots__ = ("keys", "value", "event_time", "watermark", "headers") + __slots__ = ("keys", "value", "event_time", "watermark", "headers", "metadata") class HeadersEntry(_message.Message): __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] @@ -42,12 +43,14 @@ class ReduceRequest(_message.Message): EVENT_TIME_FIELD_NUMBER: _ClassVar[int] WATERMARK_FIELD_NUMBER: _ClassVar[int] HEADERS_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] keys: _containers.RepeatedScalarFieldContainer[str] value: bytes event_time: _timestamp_pb2.Timestamp watermark: _timestamp_pb2.Timestamp headers: _containers.ScalarMap[str, str] - def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., watermark: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., headers: _Optional[_Mapping[str, str]] = ...) -> None: ... + metadata: _metadata_pb2.Metadata + def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., watermark: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., headers: _Optional[_Mapping[str, str]] = ..., metadata: _Optional[_Union[_metadata_pb2.Metadata, _Mapping]] = ...) -> None: ... PAYLOAD_FIELD_NUMBER: _ClassVar[int] OPERATION_FIELD_NUMBER: _ClassVar[int] payload: ReduceRequest.Payload @@ -67,14 +70,16 @@ class Window(_message.Message): class ReduceResponse(_message.Message): __slots__ = ("result", "window", "EOF") class Result(_message.Message): - __slots__ = ("keys", "value", "tags") + __slots__ = ("keys", "value", "tags", "metadata") KEYS_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] TAGS_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] keys: _containers.RepeatedScalarFieldContainer[str] value: bytes tags: _containers.RepeatedScalarFieldContainer[str] - def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., tags: _Optional[_Iterable[str]] = ...) -> None: ... + metadata: _metadata_pb2.Metadata + def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., tags: _Optional[_Iterable[str]] = ..., metadata: _Optional[_Union[_metadata_pb2.Metadata, _Mapping]] = ...) -> None: ... RESULT_FIELD_NUMBER: _ClassVar[int] WINDOW_FIELD_NUMBER: _ClassVar[int] EOF_FIELD_NUMBER: _ClassVar[int] From 62b3eab08573a7329ed0ab93d436ad054b3352b9 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Oct 2025 09:28:02 +0530 Subject: [PATCH 05/23] include metadata in sideinput request/response Signed-off-by: Sreekanth --- .../pynumaflow/proto/sideinput/sideinput.proto | 3 +++ .../pynumaflow/proto/sideinput/sideinput_pb2.py | 15 ++++++++------- .../pynumaflow/proto/sideinput/sideinput_pb2.pyi | 10 +++++++--- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput.proto b/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput.proto index c53f055e..7c656446 100644 --- a/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput.proto +++ b/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput.proto @@ -1,6 +1,7 @@ syntax = "proto3"; import "google/protobuf/empty.proto"; +import "pynumaflow/proto/common/metadata.proto"; package sideinput.v1; @@ -30,6 +31,8 @@ message SideInputResponse { // True if value should not be broadcasted // False if value should be broadcasted bool no_broadcast = 2; + // metadata of the message + common.Metadata metadata = 3; } /** diff --git a/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput_pb2.py b/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput_pb2.py index af68192d..3e876ea1 100644 --- a/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput_pb2.py +++ b/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput_pb2.py @@ -23,19 +23,20 @@ from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 +from pynumaflow.proto.common import metadata_pb2 as pynumaflow_dot_proto_dot_common_dot_metadata__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n*pynumaflow/proto/sideinput/sideinput.proto\x12\x0csideinput.v1\x1a\x1bgoogle/protobuf/empty.proto\"8\n\x11SideInputResponse\x12\r\n\x05value\x18\x01 \x01(\x0c\x12\x14\n\x0cno_broadcast\x18\x02 \x01(\x08\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\x32\x99\x01\n\tSideInput\x12L\n\x11RetrieveSideInput\x12\x16.google.protobuf.Empty\x1a\x1f.sideinput.v1.SideInputResponse\x12>\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x1b.sideinput.v1.ReadyResponseb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n*pynumaflow/proto/sideinput/sideinput.proto\x12\x0csideinput.v1\x1a\x1bgoogle/protobuf/empty.proto\x1a&pynumaflow/proto/common/metadata.proto\"\\\n\x11SideInputResponse\x12\r\n\x05value\x18\x01 \x01(\x0c\x12\x14\n\x0cno_broadcast\x18\x02 \x01(\x08\x12\"\n\x08metadata\x18\x03 \x01(\x0b\x32\x10.common.Metadata\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\x32\x99\x01\n\tSideInput\x12L\n\x11RetrieveSideInput\x12\x16.google.protobuf.Empty\x1a\x1f.sideinput.v1.SideInputResponse\x12>\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x1b.sideinput.v1.ReadyResponseb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'pynumaflow.proto.sideinput.sideinput_pb2', _globals) if not _descriptor._USE_C_DESCRIPTORS: DESCRIPTOR._loaded_options = None - _globals['_SIDEINPUTRESPONSE']._serialized_start=89 - _globals['_SIDEINPUTRESPONSE']._serialized_end=145 - _globals['_READYRESPONSE']._serialized_start=147 - _globals['_READYRESPONSE']._serialized_end=177 - _globals['_SIDEINPUT']._serialized_start=180 - _globals['_SIDEINPUT']._serialized_end=333 + _globals['_SIDEINPUTRESPONSE']._serialized_start=129 + _globals['_SIDEINPUTRESPONSE']._serialized_end=221 + _globals['_READYRESPONSE']._serialized_start=223 + _globals['_READYRESPONSE']._serialized_end=253 + _globals['_SIDEINPUT']._serialized_start=256 + _globals['_SIDEINPUT']._serialized_end=409 # @@protoc_insertion_point(module_scope) diff --git a/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput_pb2.pyi b/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput_pb2.pyi index 0e2a86b3..dc97254f 100644 --- a/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput_pb2.pyi +++ b/packages/pynumaflow/pynumaflow/proto/sideinput/sideinput_pb2.pyi @@ -1,17 +1,21 @@ from google.protobuf import empty_pb2 as _empty_pb2 +from pynumaflow.proto.common import metadata_pb2 as _metadata_pb2 from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message -from typing import ClassVar as _ClassVar, Optional as _Optional +from collections.abc import Mapping as _Mapping +from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union DESCRIPTOR: _descriptor.FileDescriptor class SideInputResponse(_message.Message): - __slots__ = ("value", "no_broadcast") + __slots__ = ("value", "no_broadcast", "metadata") VALUE_FIELD_NUMBER: _ClassVar[int] NO_BROADCAST_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] value: bytes no_broadcast: bool - def __init__(self, value: _Optional[bytes] = ..., no_broadcast: bool = ...) -> None: ... + metadata: _metadata_pb2.Metadata + def __init__(self, value: _Optional[bytes] = ..., no_broadcast: bool = ..., metadata: _Optional[_Union[_metadata_pb2.Metadata, _Mapping]] = ...) -> None: ... class ReadyResponse(_message.Message): __slots__ = ("ready",) From 933277724fe79372fbe811fdd56d382f77c206f3 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Oct 2025 09:30:25 +0530 Subject: [PATCH 06/23] include metadata in sink request/response Signed-off-by: Sreekanth --- .../pynumaflow/proto/sinker/sink.proto | 6 ++- .../pynumaflow/proto/sinker/sink_pb2.py | 43 ++++++++++--------- .../pynumaflow/proto/sinker/sink_pb2.pyi | 15 +++++-- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/proto/sinker/sink.proto b/packages/pynumaflow/pynumaflow/proto/sinker/sink.proto index 71dbb418..a94afd31 100644 --- a/packages/pynumaflow/pynumaflow/proto/sinker/sink.proto +++ b/packages/pynumaflow/pynumaflow/proto/sinker/sink.proto @@ -2,7 +2,7 @@ syntax = "proto3"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; - +import "pynumaflow/proto/common/metadata.proto"; package sink.v1; @@ -25,6 +25,8 @@ message SinkRequest { google.protobuf.Timestamp watermark = 4; string id = 5; map headers = 6; + // metadata of the message + common.Metadata metadata = 7; } // Required field indicating the request. Request request = 1; @@ -64,6 +66,7 @@ enum Status { SUCCESS = 0; FAILURE = 1; FALLBACK = 2; + SERVE = 3; } /** @@ -77,6 +80,7 @@ message SinkResponse { Status status = 2; // err_msg is the error message, set it if success is set to false. string err_msg = 3; + optional bytes serve_response = 4; } repeated Result results = 1; optional Handshake handshake = 2; diff --git a/packages/pynumaflow/pynumaflow/proto/sinker/sink_pb2.py b/packages/pynumaflow/pynumaflow/proto/sinker/sink_pb2.py index 10462d7c..92b0e505 100644 --- a/packages/pynumaflow/pynumaflow/proto/sinker/sink_pb2.py +++ b/packages/pynumaflow/pynumaflow/proto/sinker/sink_pb2.py @@ -24,9 +24,10 @@ from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 +from pynumaflow.proto.common import metadata_pb2 as pynumaflow_dot_proto_dot_common_dot_metadata__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\"pynumaflow/proto/sinker/sink.proto\x12\x07sink.v1\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"\xa3\x03\n\x0bSinkRequest\x12-\n\x07request\x18\x01 \x01(\x0b\x32\x1c.sink.v1.SinkRequest.Request\x12+\n\x06status\x18\x02 \x01(\x0b\x32\x1b.sink.v1.TransmissionStatus\x12*\n\thandshake\x18\x03 \x01(\x0b\x32\x12.sink.v1.HandshakeH\x00\x88\x01\x01\x1a\xfd\x01\n\x07Request\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12-\n\twatermark\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\n\n\x02id\x18\x05 \x01(\t\x12:\n\x07headers\x18\x06 \x03(\x0b\x32).sink.v1.SinkRequest.Request.HeadersEntry\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0c\n\n_handshake\"\x18\n\tHandshake\x12\x0b\n\x03sot\x18\x01 \x01(\x08\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\"!\n\x12TransmissionStatus\x12\x0b\n\x03\x65ot\x18\x01 \x01(\x08\"\xfc\x01\n\x0cSinkResponse\x12-\n\x07results\x18\x01 \x03(\x0b\x32\x1c.sink.v1.SinkResponse.Result\x12*\n\thandshake\x18\x02 \x01(\x0b\x32\x12.sink.v1.HandshakeH\x00\x88\x01\x01\x12\x30\n\x06status\x18\x03 \x01(\x0b\x32\x1b.sink.v1.TransmissionStatusH\x01\x88\x01\x01\x1a\x46\n\x06Result\x12\n\n\x02id\x18\x01 \x01(\t\x12\x1f\n\x06status\x18\x02 \x01(\x0e\x32\x0f.sink.v1.Status\x12\x0f\n\x07\x65rr_msg\x18\x03 \x01(\tB\x0c\n\n_handshakeB\t\n\x07_status*0\n\x06Status\x12\x0b\n\x07SUCCESS\x10\x00\x12\x0b\n\x07\x46\x41ILURE\x10\x01\x12\x0c\n\x08\x46\x41LLBACK\x10\x02\x32|\n\x04Sink\x12\x39\n\x06SinkFn\x12\x14.sink.v1.SinkRequest\x1a\x15.sink.v1.SinkResponse(\x01\x30\x01\x12\x39\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x16.sink.v1.ReadyResponseb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\"pynumaflow/proto/sinker/sink.proto\x12\x07sink.v1\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a&pynumaflow/proto/common/metadata.proto\"\xc7\x03\n\x0bSinkRequest\x12-\n\x07request\x18\x01 \x01(\x0b\x32\x1c.sink.v1.SinkRequest.Request\x12+\n\x06status\x18\x02 \x01(\x0b\x32\x1b.sink.v1.TransmissionStatus\x12*\n\thandshake\x18\x03 \x01(\x0b\x32\x12.sink.v1.HandshakeH\x00\x88\x01\x01\x1a\xa1\x02\n\x07Request\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12-\n\twatermark\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\n\n\x02id\x18\x05 \x01(\t\x12:\n\x07headers\x18\x06 \x03(\x0b\x32).sink.v1.SinkRequest.Request.HeadersEntry\x12\"\n\x08metadata\x18\x07 \x01(\x0b\x32\x10.common.Metadata\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0c\n\n_handshake\"\x18\n\tHandshake\x12\x0b\n\x03sot\x18\x01 \x01(\x08\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\"!\n\x12TransmissionStatus\x12\x0b\n\x03\x65ot\x18\x01 \x01(\x08\"\xac\x02\n\x0cSinkResponse\x12-\n\x07results\x18\x01 \x03(\x0b\x32\x1c.sink.v1.SinkResponse.Result\x12*\n\thandshake\x18\x02 \x01(\x0b\x32\x12.sink.v1.HandshakeH\x00\x88\x01\x01\x12\x30\n\x06status\x18\x03 \x01(\x0b\x32\x1b.sink.v1.TransmissionStatusH\x01\x88\x01\x01\x1av\n\x06Result\x12\n\n\x02id\x18\x01 \x01(\t\x12\x1f\n\x06status\x18\x02 \x01(\x0e\x32\x0f.sink.v1.Status\x12\x0f\n\x07\x65rr_msg\x18\x03 \x01(\t\x12\x1b\n\x0eserve_response\x18\x04 \x01(\x0cH\x00\x88\x01\x01\x42\x11\n\x0f_serve_responseB\x0c\n\n_handshakeB\t\n\x07_status*;\n\x06Status\x12\x0b\n\x07SUCCESS\x10\x00\x12\x0b\n\x07\x46\x41ILURE\x10\x01\x12\x0c\n\x08\x46\x41LLBACK\x10\x02\x12\t\n\x05SERVE\x10\x03\x32|\n\x04Sink\x12\x39\n\x06SinkFn\x12\x14.sink.v1.SinkRequest\x1a\x15.sink.v1.SinkResponse(\x01\x30\x01\x12\x39\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x16.sink.v1.ReadyResponseb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -35,24 +36,24 @@ DESCRIPTOR._loaded_options = None _globals['_SINKREQUEST_REQUEST_HEADERSENTRY']._loaded_options = None _globals['_SINKREQUEST_REQUEST_HEADERSENTRY']._serialized_options = b'8\001' - _globals['_STATUS']._serialized_start=879 - _globals['_STATUS']._serialized_end=927 - _globals['_SINKREQUEST']._serialized_start=110 - _globals['_SINKREQUEST']._serialized_end=529 - _globals['_SINKREQUEST_REQUEST']._serialized_start=262 - _globals['_SINKREQUEST_REQUEST']._serialized_end=515 - _globals['_SINKREQUEST_REQUEST_HEADERSENTRY']._serialized_start=469 - _globals['_SINKREQUEST_REQUEST_HEADERSENTRY']._serialized_end=515 - _globals['_HANDSHAKE']._serialized_start=531 - _globals['_HANDSHAKE']._serialized_end=555 - _globals['_READYRESPONSE']._serialized_start=557 - _globals['_READYRESPONSE']._serialized_end=587 - _globals['_TRANSMISSIONSTATUS']._serialized_start=589 - _globals['_TRANSMISSIONSTATUS']._serialized_end=622 - _globals['_SINKRESPONSE']._serialized_start=625 - _globals['_SINKRESPONSE']._serialized_end=877 - _globals['_SINKRESPONSE_RESULT']._serialized_start=782 - _globals['_SINKRESPONSE_RESULT']._serialized_end=852 - _globals['_SINK']._serialized_start=929 - _globals['_SINK']._serialized_end=1053 + _globals['_STATUS']._serialized_start=1003 + _globals['_STATUS']._serialized_end=1062 + _globals['_SINKREQUEST']._serialized_start=150 + _globals['_SINKREQUEST']._serialized_end=605 + _globals['_SINKREQUEST_REQUEST']._serialized_start=302 + _globals['_SINKREQUEST_REQUEST']._serialized_end=591 + _globals['_SINKREQUEST_REQUEST_HEADERSENTRY']._serialized_start=545 + _globals['_SINKREQUEST_REQUEST_HEADERSENTRY']._serialized_end=591 + _globals['_HANDSHAKE']._serialized_start=607 + _globals['_HANDSHAKE']._serialized_end=631 + _globals['_READYRESPONSE']._serialized_start=633 + _globals['_READYRESPONSE']._serialized_end=663 + _globals['_TRANSMISSIONSTATUS']._serialized_start=665 + _globals['_TRANSMISSIONSTATUS']._serialized_end=698 + _globals['_SINKRESPONSE']._serialized_start=701 + _globals['_SINKRESPONSE']._serialized_end=1001 + _globals['_SINKRESPONSE_RESULT']._serialized_start=858 + _globals['_SINKRESPONSE_RESULT']._serialized_end=976 + _globals['_SINK']._serialized_start=1064 + _globals['_SINK']._serialized_end=1188 # @@protoc_insertion_point(module_scope) diff --git a/packages/pynumaflow/pynumaflow/proto/sinker/sink_pb2.pyi b/packages/pynumaflow/pynumaflow/proto/sinker/sink_pb2.pyi index 547d6327..57a3728b 100644 --- a/packages/pynumaflow/pynumaflow/proto/sinker/sink_pb2.pyi +++ b/packages/pynumaflow/pynumaflow/proto/sinker/sink_pb2.pyi @@ -2,6 +2,7 @@ import datetime from google.protobuf import empty_pb2 as _empty_pb2 from google.protobuf import timestamp_pb2 as _timestamp_pb2 +from pynumaflow.proto.common import metadata_pb2 as _metadata_pb2 from google.protobuf.internal import containers as _containers from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor @@ -16,14 +17,16 @@ class Status(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): SUCCESS: _ClassVar[Status] FAILURE: _ClassVar[Status] FALLBACK: _ClassVar[Status] + SERVE: _ClassVar[Status] SUCCESS: Status FAILURE: Status FALLBACK: Status +SERVE: Status class SinkRequest(_message.Message): __slots__ = ("request", "status", "handshake") class Request(_message.Message): - __slots__ = ("keys", "value", "event_time", "watermark", "id", "headers") + __slots__ = ("keys", "value", "event_time", "watermark", "id", "headers", "metadata") class HeadersEntry(_message.Message): __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] @@ -37,13 +40,15 @@ class SinkRequest(_message.Message): WATERMARK_FIELD_NUMBER: _ClassVar[int] ID_FIELD_NUMBER: _ClassVar[int] HEADERS_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] keys: _containers.RepeatedScalarFieldContainer[str] value: bytes event_time: _timestamp_pb2.Timestamp watermark: _timestamp_pb2.Timestamp id: str headers: _containers.ScalarMap[str, str] - def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., watermark: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., id: _Optional[str] = ..., headers: _Optional[_Mapping[str, str]] = ...) -> None: ... + metadata: _metadata_pb2.Metadata + def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., watermark: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., id: _Optional[str] = ..., headers: _Optional[_Mapping[str, str]] = ..., metadata: _Optional[_Union[_metadata_pb2.Metadata, _Mapping]] = ...) -> None: ... REQUEST_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] @@ -73,14 +78,16 @@ class TransmissionStatus(_message.Message): class SinkResponse(_message.Message): __slots__ = ("results", "handshake", "status") class Result(_message.Message): - __slots__ = ("id", "status", "err_msg") + __slots__ = ("id", "status", "err_msg", "serve_response") ID_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] ERR_MSG_FIELD_NUMBER: _ClassVar[int] + SERVE_RESPONSE_FIELD_NUMBER: _ClassVar[int] id: str status: Status err_msg: str - def __init__(self, id: _Optional[str] = ..., status: _Optional[_Union[Status, str]] = ..., err_msg: _Optional[str] = ...) -> None: ... + serve_response: bytes + def __init__(self, id: _Optional[str] = ..., status: _Optional[_Union[Status, str]] = ..., err_msg: _Optional[str] = ..., serve_response: _Optional[bytes] = ...) -> None: ... RESULTS_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] From 68286b7f79c4b8970640771e3c255cdbbb9a03d2 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Oct 2025 09:33:15 +0530 Subject: [PATCH 07/23] include metadata in source request/response Signed-off-by: Sreekanth --- .../pynumaflow/proto/sourcer/source.proto | 5 +- .../pynumaflow/proto/sourcer/source_pb2.py | 99 ++++++++++--------- .../pynumaflow/proto/sourcer/source_pb2.pyi | 7 +- 3 files changed, 59 insertions(+), 52 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/proto/sourcer/source.proto b/packages/pynumaflow/pynumaflow/proto/sourcer/source.proto index eab85847..c3551203 100644 --- a/packages/pynumaflow/pynumaflow/proto/sourcer/source.proto +++ b/packages/pynumaflow/pynumaflow/proto/sourcer/source.proto @@ -2,6 +2,7 @@ syntax = "proto3"; import "google/protobuf/timestamp.proto"; import "google/protobuf/empty.proto"; +import "pynumaflow/proto/common/metadata.proto"; package source.v1; @@ -80,6 +81,8 @@ message ReadResponse { // Headers are the metadata associated with the datum. // e.g. Kafka and Redis Stream message usually include information about the headers. map headers = 5; + // metadata of the message + common.Metadata metadata = 6; } message Status { // Code to indicate the status of the response. @@ -145,7 +148,7 @@ message AckResponse { message NackRequest { message Request { - // Required field holding the offset to be nacked + // Required field holding the offsets to be nacked repeated Offset offsets = 1; } // Required field holding the request. The list will be ordered and will have the same order as the original Read response. diff --git a/packages/pynumaflow/pynumaflow/proto/sourcer/source_pb2.py b/packages/pynumaflow/pynumaflow/proto/sourcer/source_pb2.py index f85d827f..b6ac0107 100644 --- a/packages/pynumaflow/pynumaflow/proto/sourcer/source_pb2.py +++ b/packages/pynumaflow/pynumaflow/proto/sourcer/source_pb2.py @@ -24,9 +24,10 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 +from pynumaflow.proto.common import metadata_pb2 as pynumaflow_dot_proto_dot_common_dot_metadata__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n%pynumaflow/proto/sourcer/source.proto\x12\tsource.v1\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1bgoogle/protobuf/empty.proto\"\x18\n\tHandshake\x12\x0b\n\x03sot\x18\x01 \x01(\x08\"\xb1\x01\n\x0bReadRequest\x12/\n\x07request\x18\x01 \x01(\x0b\x32\x1e.source.v1.ReadRequest.Request\x12,\n\thandshake\x18\x02 \x01(\x0b\x32\x14.source.v1.HandshakeH\x00\x88\x01\x01\x1a\x35\n\x07Request\x12\x13\n\x0bnum_records\x18\x01 \x01(\x04\x12\x15\n\rtimeout_in_ms\x18\x02 \x01(\rB\x0c\n\n_handshake\"\x81\x05\n\x0cReadResponse\x12.\n\x06result\x18\x01 \x01(\x0b\x32\x1e.source.v1.ReadResponse.Result\x12.\n\x06status\x18\x02 \x01(\x0b\x32\x1e.source.v1.ReadResponse.Status\x12,\n\thandshake\x18\x03 \x01(\x0b\x32\x14.source.v1.HandshakeH\x00\x88\x01\x01\x1a\xe8\x01\n\x06Result\x12\x0f\n\x07payload\x18\x01 \x01(\x0c\x12!\n\x06offset\x18\x02 \x01(\x0b\x32\x11.source.v1.Offset\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04keys\x18\x04 \x03(\t\x12<\n\x07headers\x18\x05 \x03(\x0b\x32+.source.v1.ReadResponse.Result.HeadersEntry\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xe9\x01\n\x06Status\x12\x0b\n\x03\x65ot\x18\x01 \x01(\x08\x12\x31\n\x04\x63ode\x18\x02 \x01(\x0e\x32#.source.v1.ReadResponse.Status.Code\x12\x38\n\x05\x65rror\x18\x03 \x01(\x0e\x32$.source.v1.ReadResponse.Status.ErrorH\x00\x88\x01\x01\x12\x10\n\x03msg\x18\x04 \x01(\tH\x01\x88\x01\x01\" \n\x04\x43ode\x12\x0b\n\x07SUCCESS\x10\x00\x12\x0b\n\x07\x46\x41ILURE\x10\x01\"\x1f\n\x05\x45rror\x12\x0b\n\x07UNACKED\x10\x00\x12\t\n\x05OTHER\x10\x01\x42\x08\n\x06_errorB\x06\n\x04_msgB\x0c\n\n_handshake\"\xa7\x01\n\nAckRequest\x12.\n\x07request\x18\x01 \x01(\x0b\x32\x1d.source.v1.AckRequest.Request\x12,\n\thandshake\x18\x02 \x01(\x0b\x32\x14.source.v1.HandshakeH\x00\x88\x01\x01\x1a-\n\x07Request\x12\"\n\x07offsets\x18\x01 \x03(\x0b\x32\x11.source.v1.OffsetB\x0c\n\n_handshake\"\xab\x01\n\x0b\x41\x63kResponse\x12-\n\x06result\x18\x01 \x01(\x0b\x32\x1d.source.v1.AckResponse.Result\x12,\n\thandshake\x18\x02 \x01(\x0b\x32\x14.source.v1.HandshakeH\x00\x88\x01\x01\x1a\x31\n\x06Result\x12\'\n\x07success\x18\x01 \x01(\x0b\x32\x16.google.protobuf.EmptyB\x0c\n\n_handshake\"m\n\x0bNackRequest\x12/\n\x07request\x18\x01 \x01(\x0b\x32\x1e.source.v1.NackRequest.Request\x1a-\n\x07Request\x12\"\n\x07offsets\x18\x01 \x03(\x0b\x32\x11.source.v1.Offset\"q\n\x0cNackResponse\x12.\n\x06result\x18\x01 \x01(\x0b\x32\x1e.source.v1.NackResponse.Result\x1a\x31\n\x06Result\x12\'\n\x07success\x18\x01 \x01(\x0b\x32\x16.google.protobuf.Empty\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\"]\n\x0fPendingResponse\x12\x31\n\x06result\x18\x01 \x01(\x0b\x32!.source.v1.PendingResponse.Result\x1a\x17\n\x06Result\x12\r\n\x05\x63ount\x18\x01 \x01(\x03\"h\n\x12PartitionsResponse\x12\x34\n\x06result\x18\x01 \x01(\x0b\x32$.source.v1.PartitionsResponse.Result\x1a\x1c\n\x06Result\x12\x12\n\npartitions\x18\x01 \x03(\x05\".\n\x06Offset\x12\x0e\n\x06offset\x18\x01 \x01(\x0c\x12\x14\n\x0cpartition_id\x18\x02 \x01(\x05\x32\x83\x03\n\x06Source\x12=\n\x06ReadFn\x12\x16.source.v1.ReadRequest\x1a\x17.source.v1.ReadResponse(\x01\x30\x01\x12:\n\x05\x41\x63kFn\x12\x15.source.v1.AckRequest\x1a\x16.source.v1.AckResponse(\x01\x30\x01\x12\x39\n\x06NackFn\x12\x16.source.v1.NackRequest\x1a\x17.source.v1.NackResponse\x12?\n\tPendingFn\x12\x16.google.protobuf.Empty\x1a\x1a.source.v1.PendingResponse\x12\x45\n\x0cPartitionsFn\x12\x16.google.protobuf.Empty\x1a\x1d.source.v1.PartitionsResponse\x12;\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x18.source.v1.ReadyResponseb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n%pynumaflow/proto/sourcer/source.proto\x12\tsource.v1\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a&pynumaflow/proto/common/metadata.proto\"\x18\n\tHandshake\x12\x0b\n\x03sot\x18\x01 \x01(\x08\"\xb1\x01\n\x0bReadRequest\x12/\n\x07request\x18\x01 \x01(\x0b\x32\x1e.source.v1.ReadRequest.Request\x12,\n\thandshake\x18\x02 \x01(\x0b\x32\x14.source.v1.HandshakeH\x00\x88\x01\x01\x1a\x35\n\x07Request\x12\x13\n\x0bnum_records\x18\x01 \x01(\x04\x12\x15\n\rtimeout_in_ms\x18\x02 \x01(\rB\x0c\n\n_handshake\"\xa5\x05\n\x0cReadResponse\x12.\n\x06result\x18\x01 \x01(\x0b\x32\x1e.source.v1.ReadResponse.Result\x12.\n\x06status\x18\x02 \x01(\x0b\x32\x1e.source.v1.ReadResponse.Status\x12,\n\thandshake\x18\x03 \x01(\x0b\x32\x14.source.v1.HandshakeH\x00\x88\x01\x01\x1a\x8c\x02\n\x06Result\x12\x0f\n\x07payload\x18\x01 \x01(\x0c\x12!\n\x06offset\x18\x02 \x01(\x0b\x32\x11.source.v1.Offset\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04keys\x18\x04 \x03(\t\x12<\n\x07headers\x18\x05 \x03(\x0b\x32+.source.v1.ReadResponse.Result.HeadersEntry\x12\"\n\x08metadata\x18\x06 \x01(\x0b\x32\x10.common.Metadata\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xe9\x01\n\x06Status\x12\x0b\n\x03\x65ot\x18\x01 \x01(\x08\x12\x31\n\x04\x63ode\x18\x02 \x01(\x0e\x32#.source.v1.ReadResponse.Status.Code\x12\x38\n\x05\x65rror\x18\x03 \x01(\x0e\x32$.source.v1.ReadResponse.Status.ErrorH\x00\x88\x01\x01\x12\x10\n\x03msg\x18\x04 \x01(\tH\x01\x88\x01\x01\" \n\x04\x43ode\x12\x0b\n\x07SUCCESS\x10\x00\x12\x0b\n\x07\x46\x41ILURE\x10\x01\"\x1f\n\x05\x45rror\x12\x0b\n\x07UNACKED\x10\x00\x12\t\n\x05OTHER\x10\x01\x42\x08\n\x06_errorB\x06\n\x04_msgB\x0c\n\n_handshake\"\xa7\x01\n\nAckRequest\x12.\n\x07request\x18\x01 \x01(\x0b\x32\x1d.source.v1.AckRequest.Request\x12,\n\thandshake\x18\x02 \x01(\x0b\x32\x14.source.v1.HandshakeH\x00\x88\x01\x01\x1a-\n\x07Request\x12\"\n\x07offsets\x18\x01 \x03(\x0b\x32\x11.source.v1.OffsetB\x0c\n\n_handshake\"\xab\x01\n\x0b\x41\x63kResponse\x12-\n\x06result\x18\x01 \x01(\x0b\x32\x1d.source.v1.AckResponse.Result\x12,\n\thandshake\x18\x02 \x01(\x0b\x32\x14.source.v1.HandshakeH\x00\x88\x01\x01\x1a\x31\n\x06Result\x12\'\n\x07success\x18\x01 \x01(\x0b\x32\x16.google.protobuf.EmptyB\x0c\n\n_handshake\"m\n\x0bNackRequest\x12/\n\x07request\x18\x01 \x01(\x0b\x32\x1e.source.v1.NackRequest.Request\x1a-\n\x07Request\x12\"\n\x07offsets\x18\x01 \x03(\x0b\x32\x11.source.v1.Offset\"q\n\x0cNackResponse\x12.\n\x06result\x18\x01 \x01(\x0b\x32\x1e.source.v1.NackResponse.Result\x1a\x31\n\x06Result\x12\'\n\x07success\x18\x01 \x01(\x0b\x32\x16.google.protobuf.Empty\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\"]\n\x0fPendingResponse\x12\x31\n\x06result\x18\x01 \x01(\x0b\x32!.source.v1.PendingResponse.Result\x1a\x17\n\x06Result\x12\r\n\x05\x63ount\x18\x01 \x01(\x03\"h\n\x12PartitionsResponse\x12\x34\n\x06result\x18\x01 \x01(\x0b\x32$.source.v1.PartitionsResponse.Result\x1a\x1c\n\x06Result\x12\x12\n\npartitions\x18\x01 \x03(\x05\".\n\x06Offset\x12\x0e\n\x06offset\x18\x01 \x01(\x0c\x12\x14\n\x0cpartition_id\x18\x02 \x01(\x05\x32\x83\x03\n\x06Source\x12=\n\x06ReadFn\x12\x16.source.v1.ReadRequest\x1a\x17.source.v1.ReadResponse(\x01\x30\x01\x12:\n\x05\x41\x63kFn\x12\x15.source.v1.AckRequest\x1a\x16.source.v1.AckResponse(\x01\x30\x01\x12\x39\n\x06NackFn\x12\x16.source.v1.NackRequest\x1a\x17.source.v1.NackResponse\x12?\n\tPendingFn\x12\x16.google.protobuf.Empty\x1a\x1a.source.v1.PendingResponse\x12\x45\n\x0cPartitionsFn\x12\x16.google.protobuf.Empty\x1a\x1d.source.v1.PartitionsResponse\x12;\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a\x18.source.v1.ReadyResponseb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -35,52 +36,52 @@ DESCRIPTOR._loaded_options = None _globals['_READRESPONSE_RESULT_HEADERSENTRY']._loaded_options = None _globals['_READRESPONSE_RESULT_HEADERSENTRY']._serialized_options = b'8\001' - _globals['_HANDSHAKE']._serialized_start=114 - _globals['_HANDSHAKE']._serialized_end=138 - _globals['_READREQUEST']._serialized_start=141 - _globals['_READREQUEST']._serialized_end=318 - _globals['_READREQUEST_REQUEST']._serialized_start=251 - _globals['_READREQUEST_REQUEST']._serialized_end=304 - _globals['_READRESPONSE']._serialized_start=321 - _globals['_READRESPONSE']._serialized_end=962 - _globals['_READRESPONSE_RESULT']._serialized_start=480 - _globals['_READRESPONSE_RESULT']._serialized_end=712 - _globals['_READRESPONSE_RESULT_HEADERSENTRY']._serialized_start=666 - _globals['_READRESPONSE_RESULT_HEADERSENTRY']._serialized_end=712 - _globals['_READRESPONSE_STATUS']._serialized_start=715 - _globals['_READRESPONSE_STATUS']._serialized_end=948 - _globals['_READRESPONSE_STATUS_CODE']._serialized_start=865 - _globals['_READRESPONSE_STATUS_CODE']._serialized_end=897 - _globals['_READRESPONSE_STATUS_ERROR']._serialized_start=899 - _globals['_READRESPONSE_STATUS_ERROR']._serialized_end=930 - _globals['_ACKREQUEST']._serialized_start=965 - _globals['_ACKREQUEST']._serialized_end=1132 - _globals['_ACKREQUEST_REQUEST']._serialized_start=1073 - _globals['_ACKREQUEST_REQUEST']._serialized_end=1118 - _globals['_ACKRESPONSE']._serialized_start=1135 - _globals['_ACKRESPONSE']._serialized_end=1306 - _globals['_ACKRESPONSE_RESULT']._serialized_start=1243 - _globals['_ACKRESPONSE_RESULT']._serialized_end=1292 - _globals['_NACKREQUEST']._serialized_start=1308 - _globals['_NACKREQUEST']._serialized_end=1417 - _globals['_NACKREQUEST_REQUEST']._serialized_start=1073 - _globals['_NACKREQUEST_REQUEST']._serialized_end=1118 - _globals['_NACKRESPONSE']._serialized_start=1419 - _globals['_NACKRESPONSE']._serialized_end=1532 - _globals['_NACKRESPONSE_RESULT']._serialized_start=1243 - _globals['_NACKRESPONSE_RESULT']._serialized_end=1292 - _globals['_READYRESPONSE']._serialized_start=1534 - _globals['_READYRESPONSE']._serialized_end=1564 - _globals['_PENDINGRESPONSE']._serialized_start=1566 - _globals['_PENDINGRESPONSE']._serialized_end=1659 - _globals['_PENDINGRESPONSE_RESULT']._serialized_start=1636 - _globals['_PENDINGRESPONSE_RESULT']._serialized_end=1659 - _globals['_PARTITIONSRESPONSE']._serialized_start=1661 - _globals['_PARTITIONSRESPONSE']._serialized_end=1765 - _globals['_PARTITIONSRESPONSE_RESULT']._serialized_start=1737 - _globals['_PARTITIONSRESPONSE_RESULT']._serialized_end=1765 - _globals['_OFFSET']._serialized_start=1767 - _globals['_OFFSET']._serialized_end=1813 - _globals['_SOURCE']._serialized_start=1816 - _globals['_SOURCE']._serialized_end=2203 + _globals['_HANDSHAKE']._serialized_start=154 + _globals['_HANDSHAKE']._serialized_end=178 + _globals['_READREQUEST']._serialized_start=181 + _globals['_READREQUEST']._serialized_end=358 + _globals['_READREQUEST_REQUEST']._serialized_start=291 + _globals['_READREQUEST_REQUEST']._serialized_end=344 + _globals['_READRESPONSE']._serialized_start=361 + _globals['_READRESPONSE']._serialized_end=1038 + _globals['_READRESPONSE_RESULT']._serialized_start=520 + _globals['_READRESPONSE_RESULT']._serialized_end=788 + _globals['_READRESPONSE_RESULT_HEADERSENTRY']._serialized_start=742 + _globals['_READRESPONSE_RESULT_HEADERSENTRY']._serialized_end=788 + _globals['_READRESPONSE_STATUS']._serialized_start=791 + _globals['_READRESPONSE_STATUS']._serialized_end=1024 + _globals['_READRESPONSE_STATUS_CODE']._serialized_start=941 + _globals['_READRESPONSE_STATUS_CODE']._serialized_end=973 + _globals['_READRESPONSE_STATUS_ERROR']._serialized_start=975 + _globals['_READRESPONSE_STATUS_ERROR']._serialized_end=1006 + _globals['_ACKREQUEST']._serialized_start=1041 + _globals['_ACKREQUEST']._serialized_end=1208 + _globals['_ACKREQUEST_REQUEST']._serialized_start=1149 + _globals['_ACKREQUEST_REQUEST']._serialized_end=1194 + _globals['_ACKRESPONSE']._serialized_start=1211 + _globals['_ACKRESPONSE']._serialized_end=1382 + _globals['_ACKRESPONSE_RESULT']._serialized_start=1319 + _globals['_ACKRESPONSE_RESULT']._serialized_end=1368 + _globals['_NACKREQUEST']._serialized_start=1384 + _globals['_NACKREQUEST']._serialized_end=1493 + _globals['_NACKREQUEST_REQUEST']._serialized_start=1149 + _globals['_NACKREQUEST_REQUEST']._serialized_end=1194 + _globals['_NACKRESPONSE']._serialized_start=1495 + _globals['_NACKRESPONSE']._serialized_end=1608 + _globals['_NACKRESPONSE_RESULT']._serialized_start=1319 + _globals['_NACKRESPONSE_RESULT']._serialized_end=1368 + _globals['_READYRESPONSE']._serialized_start=1610 + _globals['_READYRESPONSE']._serialized_end=1640 + _globals['_PENDINGRESPONSE']._serialized_start=1642 + _globals['_PENDINGRESPONSE']._serialized_end=1735 + _globals['_PENDINGRESPONSE_RESULT']._serialized_start=1712 + _globals['_PENDINGRESPONSE_RESULT']._serialized_end=1735 + _globals['_PARTITIONSRESPONSE']._serialized_start=1737 + _globals['_PARTITIONSRESPONSE']._serialized_end=1841 + _globals['_PARTITIONSRESPONSE_RESULT']._serialized_start=1813 + _globals['_PARTITIONSRESPONSE_RESULT']._serialized_end=1841 + _globals['_OFFSET']._serialized_start=1843 + _globals['_OFFSET']._serialized_end=1889 + _globals['_SOURCE']._serialized_start=1892 + _globals['_SOURCE']._serialized_end=2279 # @@protoc_insertion_point(module_scope) diff --git a/packages/pynumaflow/pynumaflow/proto/sourcer/source_pb2.pyi b/packages/pynumaflow/pynumaflow/proto/sourcer/source_pb2.pyi index 0e158815..16925099 100644 --- a/packages/pynumaflow/pynumaflow/proto/sourcer/source_pb2.pyi +++ b/packages/pynumaflow/pynumaflow/proto/sourcer/source_pb2.pyi @@ -2,6 +2,7 @@ import datetime from google.protobuf import timestamp_pb2 as _timestamp_pb2 from google.protobuf import empty_pb2 as _empty_pb2 +from pynumaflow.proto.common import metadata_pb2 as _metadata_pb2 from google.protobuf.internal import containers as _containers from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper from google.protobuf import descriptor as _descriptor @@ -35,7 +36,7 @@ class ReadRequest(_message.Message): class ReadResponse(_message.Message): __slots__ = ("result", "status", "handshake") class Result(_message.Message): - __slots__ = ("payload", "offset", "event_time", "keys", "headers") + __slots__ = ("payload", "offset", "event_time", "keys", "headers", "metadata") class HeadersEntry(_message.Message): __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] @@ -48,12 +49,14 @@ class ReadResponse(_message.Message): EVENT_TIME_FIELD_NUMBER: _ClassVar[int] KEYS_FIELD_NUMBER: _ClassVar[int] HEADERS_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] payload: bytes offset: Offset event_time: _timestamp_pb2.Timestamp keys: _containers.RepeatedScalarFieldContainer[str] headers: _containers.ScalarMap[str, str] - def __init__(self, payload: _Optional[bytes] = ..., offset: _Optional[_Union[Offset, _Mapping]] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., keys: _Optional[_Iterable[str]] = ..., headers: _Optional[_Mapping[str, str]] = ...) -> None: ... + metadata: _metadata_pb2.Metadata + def __init__(self, payload: _Optional[bytes] = ..., offset: _Optional[_Union[Offset, _Mapping]] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., keys: _Optional[_Iterable[str]] = ..., headers: _Optional[_Mapping[str, str]] = ..., metadata: _Optional[_Union[_metadata_pb2.Metadata, _Mapping]] = ...) -> None: ... class Status(_message.Message): __slots__ = ("eot", "code", "error", "msg") class Code(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): From 49bb7fbc180f196450c5bd4f5e2bdee648eefb4f Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Oct 2025 09:34:39 +0530 Subject: [PATCH 08/23] include metadata in sourcetransformer request/response Signed-off-by: Sreekanth --- .../proto/sourcetransformer/transform.proto | 5 +++ .../proto/sourcetransformer/transform_pb2.py | 35 ++++++++++--------- .../proto/sourcetransformer/transform_pb2.pyi | 13 ++++--- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform.proto b/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform.proto index 3c338611..fb346167 100644 --- a/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform.proto +++ b/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform.proto @@ -2,6 +2,7 @@ syntax = "proto3"; import "google/protobuf/timestamp.proto"; import "google/protobuf/empty.proto"; +import "pynumaflow/proto/common/metadata.proto"; package sourcetransformer.v1; @@ -35,6 +36,8 @@ message SourceTransformRequest { map headers = 5; // This ID is used to uniquely identify a transform request string id = 6; + // metadata of the message + common.Metadata metadata = 7; } Request request = 1; optional Handshake handshake = 2; @@ -49,6 +52,8 @@ message SourceTransformResponse { bytes value = 2; google.protobuf.Timestamp event_time = 3; repeated string tags = 4; + // metadata of the message + common.Metadata metadata = 5; } repeated Result results = 1; // This ID is used to refer the responses to the request it corresponds to. diff --git a/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform_pb2.py b/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform_pb2.py index aebfb85a..60077698 100644 --- a/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform_pb2.py +++ b/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform_pb2.py @@ -24,9 +24,10 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 +from pynumaflow.proto.common import metadata_pb2 as pynumaflow_dot_proto_dot_common_dot_metadata__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n2pynumaflow/proto/sourcetransformer/transform.proto\x12\x14sourcetransformer.v1\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1bgoogle/protobuf/empty.proto\"\x18\n\tHandshake\x12\x0b\n\x03sot\x18\x01 \x01(\x08\"\xbe\x03\n\x16SourceTransformRequest\x12\x45\n\x07request\x18\x01 \x01(\x0b\x32\x34.sourcetransformer.v1.SourceTransformRequest.Request\x12\x37\n\thandshake\x18\x02 \x01(\x0b\x32\x1f.sourcetransformer.v1.HandshakeH\x00\x88\x01\x01\x1a\x95\x02\n\x07Request\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12-\n\twatermark\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12R\n\x07headers\x18\x05 \x03(\x0b\x32\x41.sourcetransformer.v1.SourceTransformRequest.Request.HeadersEntry\x12\n\n\x02id\x18\x06 \x01(\t\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0c\n\n_handshake\"\x98\x02\n\x17SourceTransformResponse\x12\x45\n\x07results\x18\x01 \x03(\x0b\x32\x34.sourcetransformer.v1.SourceTransformResponse.Result\x12\n\n\x02id\x18\x02 \x01(\t\x12\x37\n\thandshake\x18\x03 \x01(\x0b\x32\x1f.sourcetransformer.v1.HandshakeH\x00\x88\x01\x01\x1a\x63\n\x06Result\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04tags\x18\x04 \x03(\tB\x0c\n\n_handshake\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\x32\xcf\x01\n\x0fSourceTransform\x12t\n\x11SourceTransformFn\x12,.sourcetransformer.v1.SourceTransformRequest\x1a-.sourcetransformer.v1.SourceTransformResponse(\x01\x30\x01\x12\x46\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a#.sourcetransformer.v1.ReadyResponseb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n2pynumaflow/proto/sourcetransformer/transform.proto\x12\x14sourcetransformer.v1\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a&pynumaflow/proto/common/metadata.proto\"\x18\n\tHandshake\x12\x0b\n\x03sot\x18\x01 \x01(\x08\"\xe2\x03\n\x16SourceTransformRequest\x12\x45\n\x07request\x18\x01 \x01(\x0b\x32\x34.sourcetransformer.v1.SourceTransformRequest.Request\x12\x37\n\thandshake\x18\x02 \x01(\x0b\x32\x1f.sourcetransformer.v1.HandshakeH\x00\x88\x01\x01\x1a\xb9\x02\n\x07Request\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12-\n\twatermark\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12R\n\x07headers\x18\x05 \x03(\x0b\x32\x41.sourcetransformer.v1.SourceTransformRequest.Request.HeadersEntry\x12\n\n\x02id\x18\x06 \x01(\t\x12\"\n\x08metadata\x18\x07 \x01(\x0b\x32\x10.common.Metadata\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0c\n\n_handshake\"\xbd\x02\n\x17SourceTransformResponse\x12\x45\n\x07results\x18\x01 \x03(\x0b\x32\x34.sourcetransformer.v1.SourceTransformResponse.Result\x12\n\n\x02id\x18\x02 \x01(\t\x12\x37\n\thandshake\x18\x03 \x01(\x0b\x32\x1f.sourcetransformer.v1.HandshakeH\x00\x88\x01\x01\x1a\x87\x01\n\x06Result\x12\x0c\n\x04keys\x18\x01 \x03(\t\x12\r\n\x05value\x18\x02 \x01(\x0c\x12.\n\nevent_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04tags\x18\x04 \x03(\t\x12\"\n\x08metadata\x18\x05 \x01(\x0b\x32\x10.common.MetadataB\x0c\n\n_handshake\"\x1e\n\rReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08\x32\xcf\x01\n\x0fSourceTransform\x12t\n\x11SourceTransformFn\x12,.sourcetransformer.v1.SourceTransformRequest\x1a-.sourcetransformer.v1.SourceTransformResponse(\x01\x30\x01\x12\x46\n\x07IsReady\x12\x16.google.protobuf.Empty\x1a#.sourcetransformer.v1.ReadyResponseb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -35,20 +36,20 @@ DESCRIPTOR._loaded_options = None _globals['_SOURCETRANSFORMREQUEST_REQUEST_HEADERSENTRY']._loaded_options = None _globals['_SOURCETRANSFORMREQUEST_REQUEST_HEADERSENTRY']._serialized_options = b'8\001' - _globals['_HANDSHAKE']._serialized_start=138 - _globals['_HANDSHAKE']._serialized_end=162 - _globals['_SOURCETRANSFORMREQUEST']._serialized_start=165 - _globals['_SOURCETRANSFORMREQUEST']._serialized_end=611 - _globals['_SOURCETRANSFORMREQUEST_REQUEST']._serialized_start=320 - _globals['_SOURCETRANSFORMREQUEST_REQUEST']._serialized_end=597 - _globals['_SOURCETRANSFORMREQUEST_REQUEST_HEADERSENTRY']._serialized_start=551 - _globals['_SOURCETRANSFORMREQUEST_REQUEST_HEADERSENTRY']._serialized_end=597 - _globals['_SOURCETRANSFORMRESPONSE']._serialized_start=614 - _globals['_SOURCETRANSFORMRESPONSE']._serialized_end=894 - _globals['_SOURCETRANSFORMRESPONSE_RESULT']._serialized_start=781 - _globals['_SOURCETRANSFORMRESPONSE_RESULT']._serialized_end=880 - _globals['_READYRESPONSE']._serialized_start=896 - _globals['_READYRESPONSE']._serialized_end=926 - _globals['_SOURCETRANSFORM']._serialized_start=929 - _globals['_SOURCETRANSFORM']._serialized_end=1136 + _globals['_HANDSHAKE']._serialized_start=178 + _globals['_HANDSHAKE']._serialized_end=202 + _globals['_SOURCETRANSFORMREQUEST']._serialized_start=205 + _globals['_SOURCETRANSFORMREQUEST']._serialized_end=687 + _globals['_SOURCETRANSFORMREQUEST_REQUEST']._serialized_start=360 + _globals['_SOURCETRANSFORMREQUEST_REQUEST']._serialized_end=673 + _globals['_SOURCETRANSFORMREQUEST_REQUEST_HEADERSENTRY']._serialized_start=627 + _globals['_SOURCETRANSFORMREQUEST_REQUEST_HEADERSENTRY']._serialized_end=673 + _globals['_SOURCETRANSFORMRESPONSE']._serialized_start=690 + _globals['_SOURCETRANSFORMRESPONSE']._serialized_end=1007 + _globals['_SOURCETRANSFORMRESPONSE_RESULT']._serialized_start=858 + _globals['_SOURCETRANSFORMRESPONSE_RESULT']._serialized_end=993 + _globals['_READYRESPONSE']._serialized_start=1009 + _globals['_READYRESPONSE']._serialized_end=1039 + _globals['_SOURCETRANSFORM']._serialized_start=1042 + _globals['_SOURCETRANSFORM']._serialized_end=1249 # @@protoc_insertion_point(module_scope) diff --git a/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform_pb2.pyi b/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform_pb2.pyi index e1e8fe7f..99b452a6 100644 --- a/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform_pb2.pyi +++ b/packages/pynumaflow/pynumaflow/proto/sourcetransformer/transform_pb2.pyi @@ -2,6 +2,7 @@ import datetime from google.protobuf import timestamp_pb2 as _timestamp_pb2 from google.protobuf import empty_pb2 as _empty_pb2 +from pynumaflow.proto.common import metadata_pb2 as _metadata_pb2 from google.protobuf.internal import containers as _containers from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message @@ -19,7 +20,7 @@ class Handshake(_message.Message): class SourceTransformRequest(_message.Message): __slots__ = ("request", "handshake") class Request(_message.Message): - __slots__ = ("keys", "value", "event_time", "watermark", "headers", "id") + __slots__ = ("keys", "value", "event_time", "watermark", "headers", "id", "metadata") class HeadersEntry(_message.Message): __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] @@ -33,13 +34,15 @@ class SourceTransformRequest(_message.Message): WATERMARK_FIELD_NUMBER: _ClassVar[int] HEADERS_FIELD_NUMBER: _ClassVar[int] ID_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] keys: _containers.RepeatedScalarFieldContainer[str] value: bytes event_time: _timestamp_pb2.Timestamp watermark: _timestamp_pb2.Timestamp headers: _containers.ScalarMap[str, str] id: str - def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., watermark: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., headers: _Optional[_Mapping[str, str]] = ..., id: _Optional[str] = ...) -> None: ... + metadata: _metadata_pb2.Metadata + def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., watermark: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., headers: _Optional[_Mapping[str, str]] = ..., id: _Optional[str] = ..., metadata: _Optional[_Union[_metadata_pb2.Metadata, _Mapping]] = ...) -> None: ... REQUEST_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] request: SourceTransformRequest.Request @@ -49,16 +52,18 @@ class SourceTransformRequest(_message.Message): class SourceTransformResponse(_message.Message): __slots__ = ("results", "id", "handshake") class Result(_message.Message): - __slots__ = ("keys", "value", "event_time", "tags") + __slots__ = ("keys", "value", "event_time", "tags", "metadata") KEYS_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] EVENT_TIME_FIELD_NUMBER: _ClassVar[int] TAGS_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] keys: _containers.RepeatedScalarFieldContainer[str] value: bytes event_time: _timestamp_pb2.Timestamp tags: _containers.RepeatedScalarFieldContainer[str] - def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., tags: _Optional[_Iterable[str]] = ...) -> None: ... + metadata: _metadata_pb2.Metadata + def __init__(self, keys: _Optional[_Iterable[str]] = ..., value: _Optional[bytes] = ..., event_time: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., tags: _Optional[_Iterable[str]] = ..., metadata: _Optional[_Union[_metadata_pb2.Metadata, _Mapping]] = ...) -> None: ... RESULTS_FIELD_NUMBER: _ClassVar[int] ID_FIELD_NUMBER: _ClassVar[int] HANDSHAKE_FIELD_NUMBER: _ClassVar[int] From fd81d4b2d0ebbbad55b60311d6f4e125da494f27 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Oct 2025 15:49:12 +0530 Subject: [PATCH 09/23] support metadata in mapper Signed-off-by: Sreekanth --- packages/pynumaflow/poetry.lock | 104 ++++++++++++++- .../pynumaflow/pynumaflow/mapper/__init__.py | 3 + .../pynumaflow/pynumaflow/mapper/_dtypes.py | 36 ++++- .../mapper/_servicer/_async_servicer.py | 18 ++- .../mapper/_servicer/_sync_servicer.py | 8 +- .../pynumaflow/pynumaflow/mapper/metadata.py | 123 ++++++++++++++++++ packages/pynumaflow/pyproject.toml | 4 + .../pynumaflow/tests/map/test_async_mapper.py | 27 ++-- packages/pynumaflow/tests/map/utils.py | 76 +++++++++++ 9 files changed, 368 insertions(+), 31 deletions(-) create mode 100644 packages/pynumaflow/pynumaflow/mapper/metadata.py diff --git a/packages/pynumaflow/poetry.lock b/packages/pynumaflow/poetry.lock index 3687c6cb..a8bd8a10 100644 --- a/packages/pynumaflow/poetry.lock +++ b/packages/pynumaflow/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "aiorun" @@ -434,6 +434,21 @@ protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4 [package.extras] grpc = ["grpcio (>=1.44.0,<2.0.0)"] +[[package]] +name = "grpc-stubs" +version = "1.53.0.6" +description = "Mypy stubs for gRPC" +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "grpc_stubs-1.53.0.6-py3-none-any.whl", hash = "sha256:3ffc5a6b5bd84ac46f3d84e2434e97936c1262b47b71b462bdedc43caaf227e1"}, + {file = "grpc_stubs-1.53.0.6.tar.gz", hash = "sha256:70a0840747bd73c2c82fe819699bbf4fcf6d59bd0ed27a4713a240e0c697e1ff"}, +] + +[package.dependencies] +grpcio = "*" + [[package]] name = "grpcio" version = "1.75.0" @@ -642,6 +657,67 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "mypy" +version = "1.18.2" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mypy-1.18.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eab0cf6294dafe397c261a75f96dc2c31bffe3b944faa24db5def4e2b0f77c"}, + {file = "mypy-1.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a780ca61fc239e4865968ebc5240bb3bf610ef59ac398de9a7421b54e4a207e"}, + {file = "mypy-1.18.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448acd386266989ef11662ce3c8011fd2a7b632e0ec7d61a98edd8e27472225b"}, + {file = "mypy-1.18.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f9e171c465ad3901dc652643ee4bffa8e9fef4d7d0eece23b428908c77a76a66"}, + {file = "mypy-1.18.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:592ec214750bc00741af1f80cbf96b5013d81486b7bb24cb052382c19e40b428"}, + {file = "mypy-1.18.2-cp310-cp310-win_amd64.whl", hash = "sha256:7fb95f97199ea11769ebe3638c29b550b5221e997c63b14ef93d2e971606ebed"}, + {file = "mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f"}, + {file = "mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341"}, + {file = "mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d"}, + {file = "mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86"}, + {file = "mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37"}, + {file = "mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8"}, + {file = "mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34"}, + {file = "mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764"}, + {file = "mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893"}, + {file = "mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914"}, + {file = "mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8"}, + {file = "mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074"}, + {file = "mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc"}, + {file = "mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e"}, + {file = "mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986"}, + {file = "mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d"}, + {file = "mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba"}, + {file = "mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544"}, + {file = "mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce"}, + {file = "mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d"}, + {file = "mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c"}, + {file = "mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb"}, + {file = "mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075"}, + {file = "mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf"}, + {file = "mypy-1.18.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:25a9c8fb67b00599f839cf472713f54249a62efd53a54b565eb61956a7e3296b"}, + {file = "mypy-1.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c2b9c7e284ee20e7598d6f42e13ca40b4928e6957ed6813d1ab6348aa3f47133"}, + {file = "mypy-1.18.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d6985ed057513e344e43a26cc1cd815c7a94602fb6a3130a34798625bc2f07b6"}, + {file = "mypy-1.18.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22f27105f1525ec024b5c630c0b9f36d5c1cc4d447d61fe51ff4bd60633f47ac"}, + {file = "mypy-1.18.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:030c52d0ea8144e721e49b1f68391e39553d7451f0c3f8a7565b59e19fcb608b"}, + {file = "mypy-1.18.2-cp39-cp39-win_amd64.whl", hash = "sha256:aa5e07ac1a60a253445797e42b8b2963c9675563a94f11291ab40718b016a7a0"}, + {file = "mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e"}, + {file = "mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b"}, +] + +[package.dependencies] +mypy_extensions = ">=1.0.0" +pathspec = ">=0.9.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing_extensions = ">=4.6.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +faster-cache = ["orjson"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -1070,6 +1146,30 @@ files = [ {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] +[[package]] +name = "types-protobuf" +version = "6.32.1.20250918" +description = "Typing stubs for protobuf" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "types_protobuf-6.32.1.20250918-py3-none-any.whl", hash = "sha256:22ba6133d142d11cc34d3788ad6dead2732368ebb0406eaa7790ea6ae46c8d0b"}, + {file = "types_protobuf-6.32.1.20250918.tar.gz", hash = "sha256:44ce0ae98475909ca72379946ab61a4435eec2a41090821e713c17e8faf5b88f"}, +] + +[[package]] +name = "types-psutil" +version = "7.0.0.20251001" +description = "Typing stubs for psutil" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "types_psutil-7.0.0.20251001-py3-none-any.whl", hash = "sha256:adc31de8386d31c61bd4123112fd51e2c700c7502a001cad72a3d56ba6b463d1"}, + {file = "types_psutil-7.0.0.20251001.tar.gz", hash = "sha256:60d696200ddae28677e7d88cdebd6e960294e85adefbaafe0f6e5d0e7b4c1963"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -1176,4 +1276,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.1" python-versions = ">=3.9, <3.15" -content-hash = "b5c4326f453e5e1f74ecf77e1020da56d6aab4c701ad9b2e9a5f60c134a91f9f" +content-hash = "46d9b0b86df1b83df8ac12ea9feac6a88bcfdbf3505bec5ff19cd104429dcbf6" diff --git a/packages/pynumaflow/pynumaflow/mapper/__init__.py b/packages/pynumaflow/pynumaflow/mapper/__init__.py index a713d039..d4cb4af3 100644 --- a/packages/pynumaflow/pynumaflow/mapper/__init__.py +++ b/packages/pynumaflow/pynumaflow/mapper/__init__.py @@ -3,6 +3,7 @@ from pynumaflow.mapper.sync_server import MapServer from pynumaflow.mapper._dtypes import Message, Messages, Datum, DROP, Mapper +from pynumaflow.mapper.metadata import UserMetadata, SystemMetadata __all__ = [ "Message", @@ -13,4 +14,6 @@ "MapServer", "MapAsyncServer", "MapMultiprocServer", + "UserMetadata", + "SystemMetadata", ] diff --git a/packages/pynumaflow/pynumaflow/mapper/_dtypes.py b/packages/pynumaflow/pynumaflow/mapper/_dtypes.py index 341aa929..f3ac8d60 100644 --- a/packages/pynumaflow/pynumaflow/mapper/_dtypes.py +++ b/packages/pynumaflow/pynumaflow/mapper/_dtypes.py @@ -1,11 +1,12 @@ from abc import ABCMeta, abstractmethod from collections.abc import Iterator, Sequence, Awaitable -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import datetime from typing import TypeVar, Callable, Union, Optional from warnings import warn from pynumaflow._constants import DROP +from .metadata import UserMetadata, SystemMetadata M = TypeVar("M", bound="Message") Ms = TypeVar("Ms", bound="Messages") @@ -20,21 +21,24 @@ class Message: value: data in bytes keys: []string keys for vertex (optional) tags: []string tags for conditional forwarding (optional) + user_metadata: metadata for the message (optional) """ - __slots__ = ("_value", "_keys", "_tags") + __slots__ = ("_value", "_keys", "_tags", "_user_metadata") _value: bytes _keys: list[str] _tags: list[str] + _user_metadata: UserMetadata - def __init__(self, value: bytes, keys: list[str] = None, tags: list[str] = None): + def __init__(self, value: bytes, keys: list[str] = None, tags: list[str] = None, user_metadata: Optional[UserMetadata] = None): """ Creates a Message object to send value to a vertex. """ self._keys = keys or [] self._tags = tags or [] self._value = value or b"" + self._user_metadata = user_metadata or UserMetadata() # returns the Message Object which will be dropped @classmethod @@ -53,6 +57,10 @@ def keys(self) -> list[str]: def tags(self) -> list[str]: return self._tags + @property + def user_metadata(self) -> UserMetadata: + return self._user_metadata + class Messages(Sequence[M]): """ @@ -84,10 +92,10 @@ def __getitem__(self, index: int) -> M: raise TypeError("Slicing is not supported for Messages") return self._messages[index] - def append(self, message: Message) -> None: + def append(self, message: M) -> None: self._messages.append(message) - def items(self) -> list[Message]: + def items(self) -> Sequence[M]: warn( "Using items is deprecated and will be removed in v0.5. " "Iterate or index the Messages object instead.", @@ -124,13 +132,15 @@ class Datum: ... ) """ - __slots__ = ("_keys", "_value", "_event_time", "_watermark", "_headers") + __slots__ = ("_keys", "_value", "_event_time", "_watermark", "_headers", "_user_metadata", "_system_metadata") _keys: list[str] _value: bytes _event_time: datetime _watermark: datetime _headers: dict[str, str] + _user_metadata: UserMetadata + _system_metadata: SystemMetadata def __init__( self, @@ -139,6 +149,8 @@ def __init__( event_time: datetime, watermark: datetime, headers: Optional[dict[str, str]] = None, + user_metadata: Optional[UserMetadata] = None, + system_metadata: Optional[SystemMetadata] = None, ): self._keys = keys or list() self._value = value or b"" @@ -149,6 +161,8 @@ def __init__( raise TypeError(f"Wrong data type: {type(watermark)} for Datum.watermark") self._watermark = watermark self._headers = headers or {} + self._user_metadata = user_metadata or UserMetadata() + self._system_metadata = system_metadata or SystemMetadata() @property def keys(self) -> list[str]: @@ -175,6 +189,16 @@ def headers(self) -> dict[str, str]: """Returns the headers of the event.""" return self._headers.copy() + @property + def user_metadata(self) -> UserMetadata: + """Returns the user metadata of the event.""" + return self._user_metadata + + @property + def system_metadata(self) -> SystemMetadata: + """Returns the system metadata of the event.""" + return self._system_metadata + class Mapper(metaclass=ABCMeta): """ diff --git a/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py b/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py index 4fd5b9a3..96e9abda 100644 --- a/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py +++ b/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py @@ -1,11 +1,12 @@ import asyncio -from collections.abc import AsyncIterable +from collections.abc import AsyncIterator, Iterator from google.protobuf import empty_pb2 as _empty_pb2 from pynumaflow.shared.asynciter import NonBlockingIterator from pynumaflow._constants import _LOGGER, STREAM_EOF, ERR_UDF_EXCEPTION_STRING -from pynumaflow.mapper._dtypes import MapAsyncCallable, Datum, MapError +from pynumaflow.mapper._dtypes import MapAsyncCallable, Datum, MapError, Message, Messages +from pynumaflow.mapper.metadata import _user_and_system_metadata_from_proto from pynumaflow.proto.mapper import map_pb2, map_pb2_grpc from pynumaflow.shared.server import handle_async_error from pynumaflow.types import NumaflowServicerContext @@ -27,9 +28,9 @@ def __init__( async def MapFn( self, - request_iterator: AsyncIterable[map_pb2.MapRequest], + request_iterator: AsyncIterator[map_pb2.MapRequest], context: NumaflowServicerContext, - ) -> AsyncIterable[map_pb2.MapResponse]: + ) -> AsyncIterator[map_pb2.MapResponse]: """ Applies a function to each datum element. The pascal case function name comes from the proto map_pb2_grpc.py file. @@ -70,7 +71,7 @@ async def MapFn( async def _process_inputs( self, - request_iterator: AsyncIterable[map_pb2.MapRequest], + request_iterator: AsyncIterator[map_pb2.MapRequest], result_queue: NonBlockingIterator, ): """ @@ -99,6 +100,7 @@ async def _invoke_map(self, req: map_pb2.MapRequest, result_queue: NonBlockingIt """ Invokes the user defined function. """ + user_metadata, system_metadata = _user_and_system_metadata_from_proto(req.request.metadata) try: datum = Datum( keys=list(req.request.keys), @@ -106,12 +108,14 @@ async def _invoke_map(self, req: map_pb2.MapRequest, result_queue: NonBlockingIt event_time=req.request.event_time.ToDatetime(), watermark=req.request.watermark.ToDatetime(), headers=dict(req.request.headers), + user_metadata=user_metadata, + system_metadata=system_metadata, ) - msgs = await self.__map_handler(list(req.request.keys), datum) + msgs: Messages[Message] = await self.__map_handler(list(req.request.keys), datum) datums = [] for msg in msgs: datums.append( - map_pb2.MapResponse.Result(keys=msg.keys, value=msg.value, tags=msg.tags) + map_pb2.MapResponse.Result(keys=msg.keys, value=msg.value, tags=msg.tags, metadata=msg.user_metadata._to_proto()) ) await result_queue.put(map_pb2.MapResponse(results=datums, id=req.id)) except BaseException as err: diff --git a/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py b/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py index 40b6c7ca..6e7455b4 100644 --- a/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py +++ b/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py @@ -1,6 +1,6 @@ import threading from concurrent.futures import ThreadPoolExecutor -from collections.abc import Iterable +from collections.abc import Iterator from google.protobuf import empty_pb2 as _empty_pb2 from pynumaflow.shared.server import exit_on_error @@ -28,9 +28,9 @@ def __init__(self, handler: MapSyncCallable, multiproc: bool = False): def MapFn( self, - request_iterator: Iterable[map_pb2.MapRequest], + request_iterator: Iterator[map_pb2.MapRequest], context: NumaflowServicerContext, - ) -> Iterable[map_pb2.MapResponse]: + ) -> Iterator[map_pb2.MapResponse]: """ Applies a function to each datum element. The pascal case function name comes from the proto map_pb2_grpc.py file. @@ -79,7 +79,7 @@ def MapFn( def _process_requests( self, context: NumaflowServicerContext, - request_iterator: Iterable[map_pb2.MapRequest], + request_iterator: Iterator[map_pb2.MapRequest], result_queue: SyncIterator, ): try: diff --git a/packages/pynumaflow/pynumaflow/mapper/metadata.py b/packages/pynumaflow/pynumaflow/mapper/metadata.py new file mode 100644 index 00000000..1efa693f --- /dev/null +++ b/packages/pynumaflow/pynumaflow/mapper/metadata.py @@ -0,0 +1,123 @@ +from dataclasses import dataclass, field +from collections import defaultdict +from pynumaflow.proto.common import metadata_pb2 + +""" + Metadata provides per-message metadata passed between vertices. + + A vertex could create one or more set of key-value pairs per group-name. + This is required because a vertex could forward a message to a + Kafka sink with Kafka headers, and a metrics Sink with some key/value points. + + There will be two kinds of metadata, + + system - generated by the system, exposed as read-only to UDFs + user - user generated with read-write access + | + | +-> [group-m] -> {k1:v1, ... } + | | + +-> [user] +-> [group-n] -> {k1:v1, ... } + | | + | +-> [group-o] -> {k1:v1, ... } + | + | +-> [group-h] -> {k1:v1, ... } + | | + +-> [sys] +-> [group-i] -> {k1:v1, ... } + | +""" + +@dataclass +class SystemMetadata: + """ + System metadata is the mapping of group name to key-value pairs for a given group. + System metadata wraps the system-generated metadata groups per message. It is read-only to UDFs. + """ + _data: dict[str, dict[str, bytes]] = field(default_factory=dict) + + def groups(self) -> list[str]: + """ + Returns the list of group names for the system metadata. + """ + return list(self._data.keys()) + + def keys(self, group: str) -> list[str]: + """ + Returns the list of keys for a given group. + """ + return list(self._data[group].keys()) + + def value(self, group: str, key: str) -> bytes: + """ + Returns the value for a given group and key. + """ + return self._data[group][key] + + +@dataclass +class UserMetadata: + """ + UserMetadata wraps the user-generated metadata groups per message. It is read-write to UDFs. + """ + _data: defaultdict[str, dict[str, bytes]] = field(default_factory=lambda: defaultdict(dict)) + + def groups(self) -> list[str]: + """ + Returns the list of group names for the user metadata. + """ + return list(self._data.keys()) + + def keys(self, group: str) -> list[str]: + """ + Returns the list of keys for a given group. + """ + return list(self._data[group].keys()) + + def value(self, group: str, key: str) -> bytes: + """ + Returns the value for a given group and key. + """ + return self._data[group][key] + + def add(self, group: str, key: str, value: bytes): + """ + Adds the value for a given group and key. + """ + self._data[group][key] = value + + def set_group(self, group: str, data: dict[str, bytes]): + """ + Sets the data for a given group. + """ + self._data[group] = data + + def remove(self, group: str, key: str): + """ + Removes the key and its value for a given group. + """ + del self._data[group][key] + + def remove_group(self, group: str): + """ + Removes the group and all its keys and values. + """ + del self._data[group] + + def clear(self): + """ + Clears all the groups and all their keys and values. + """ + self._data.clear() + + def _to_proto(self) -> metadata_pb2.Metadata: + return metadata_pb2.Metadata( + user_metadata={group: metadata_pb2.KeyValueGroup(key_value=value) for group, value in self._data.items()}, + ) + + +def _user_and_system_metadata_from_proto(proto: metadata_pb2.Metadata) -> tuple[UserMetadata, SystemMetadata]: + """ + Converts the protobuf metadata to the UserMetadata and SystemMetadata objects. + """ + user_metadata = {group: dict(kv.key_value) for group, kv in proto.user_metadata.items()} + system_metadata = {group: dict(kv.key_value) for group, kv in proto.sys_metadata.items()} + return UserMetadata(user_metadata), SystemMetadata(system_metadata) diff --git a/packages/pynumaflow/pyproject.toml b/packages/pynumaflow/pyproject.toml index c7eae207..0d3e7b59 100644 --- a/packages/pynumaflow/pyproject.toml +++ b/packages/pynumaflow/pyproject.toml @@ -44,6 +44,10 @@ black = "^23.1" grpcio-testing = "^1.48.1" ruff = "^0.0.264" pre-commit = "^3.3.1" +types-protobuf = "^6.32.1.20250918" +mypy = "^1.18.2" +grpc-stubs = "^1.53.0.6" +types-psutil = "^7.0.0.20251001" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/packages/pynumaflow/tests/map/test_async_mapper.py b/packages/pynumaflow/tests/map/test_async_mapper.py index 85ff5a70..6093f52a 100644 --- a/packages/pynumaflow/tests/map/test_async_mapper.py +++ b/packages/pynumaflow/tests/map/test_async_mapper.py @@ -1,12 +1,13 @@ import asyncio import logging import threading +from typing import Iterator import unittest from unittest.mock import patch import grpc from google.protobuf import empty_pb2 as _empty_pb2 -from grpc.aio._server import Server +from grpc.aio import Server from pynumaflow import setup_logging from pynumaflow._constants import MAX_MESSAGE_SIZE @@ -16,7 +17,8 @@ Message, ) from pynumaflow.mapper.async_server import MapAsyncServer -from pynumaflow.proto.mapper import map_pb2_grpc +from pynumaflow.proto.common import metadata_pb2 +from pynumaflow.proto.mapper import map_pb2, map_pb2_grpc from tests.map.utils import get_test_datums from tests.testing_utils import ( mock_terminate_on_stop, @@ -43,7 +45,7 @@ async def async_map_handler(keys: list[str], datum: Datum) -> Messages: ) val = bytes(msg, encoding="utf-8") messages = Messages() - messages.append(Message(str.encode(msg), keys=keys)) + messages.append(Message(str.encode(msg), keys=keys, user_metadata=datum.user_metadata)) return messages @@ -149,13 +151,13 @@ def test_run_server(self) -> None: def test_map(self) -> None: stub = map_pb2_grpc.MapStub(_channel) request = get_test_datums() - generator_response = None try: - generator_response = stub.MapFn(request_iterator=request_generator(request)) + generator_response: Iterator[map_pb2.MapResponse] = stub.MapFn(request_iterator=request_generator(request)) except grpc.RpcError as e: logging.error(e) + raise - responses = [] + responses: list[map_pb2.MapResponse] = [] # capture the output from the ReadFn generator and assert. for r in generator_response: responses.append(r) @@ -165,20 +167,21 @@ def test_map(self) -> None: self.assertTrue(responses[0].handshake.sot) - idx = 1 - while idx < len(responses): + for idx, resp in enumerate(responses[1:], 1): _id = "test-id-" + str(idx) - self.assertEqual(_id, responses[idx].id) + self.assertEqual(_id, resp.id) self.assertEqual( bytes( "payload:test_mock_message " "event_time:2022-09-12 16:00:00 watermark:2022-09-12 16:01:00", encoding="utf-8", ), - responses[idx].results[0].value, + resp.results[0].value, ) - self.assertEqual(1, len(responses[idx].results)) - idx += 1 + self.assertEqual(1, len(resp.results)) + self.assertEqual(resp.results[0].metadata.user_metadata['custom_info'], metadata_pb2.KeyValueGroup(key_value={"version": f'{idx}.0.0'.encode('utf-8')})) + # System metadata will be empty for user responses + self.assertEqual(resp.results[0].metadata.sys_metadata, {}) def test_map_grpc_error_no_handshake(self) -> None: stub = map_pb2_grpc.MapStub(_channel) diff --git a/packages/pynumaflow/tests/map/utils.py b/packages/pynumaflow/tests/map/utils.py index 1c063a28..5708e2c1 100644 --- a/packages/pynumaflow/tests/map/utils.py +++ b/packages/pynumaflow/tests/map/utils.py @@ -1,3 +1,4 @@ +from pynumaflow.proto.common import metadata_pb2 from pynumaflow.proto.mapper import map_pb2 from pynumaflow.mapper import Datum, Messages, Message, Mapper @@ -62,6 +63,31 @@ def get_test_datums(handshake=True): event_time=event_time_timestamp, watermark=watermark_timestamp, headers=mock_headers(), + metadata=metadata_pb2.Metadata( + previous_vertex="test-source", + sys_metadata={ + "numaflow_version_info": metadata_pb2.KeyValueGroup( + key_value={ + "version": b"1.0.0", + "build_time": b"2021-01-01 00:00:00", + "commit_hash": b"1234567890", + } + ), + }, + user_metadata={ + "custom_info": metadata_pb2.KeyValueGroup( + key_value={ + "version": b"1.0.0", + } + ), + "test_group": metadata_pb2.KeyValueGroup( + key_value={ + "key1": b"value1", + "key2": b"value2", + } + ) + }, + ), ), id="test-id-1", ), @@ -71,6 +97,31 @@ def get_test_datums(handshake=True): event_time=event_time_timestamp, watermark=watermark_timestamp, headers=mock_headers(), + metadata=metadata_pb2.Metadata( + previous_vertex="test-source", + sys_metadata={ + "numaflow_version_info": metadata_pb2.KeyValueGroup( + key_value={ + "version": b"1.0.0", + "build_time": b"2021-01-01 00:00:00", + "commit_hash": b"1234567890", + } + ), + }, + user_metadata={ + "custom_info": metadata_pb2.KeyValueGroup( + key_value={ + "version": b"2.0.0", + } + ), + "test_group": metadata_pb2.KeyValueGroup( + key_value={ + "key1": b"value1", + "key2": b"value2", + } + ) + }, + ), ), id="test-id-2", ), @@ -80,6 +131,31 @@ def get_test_datums(handshake=True): event_time=event_time_timestamp, watermark=watermark_timestamp, headers=mock_headers(), + metadata=metadata_pb2.Metadata( + previous_vertex="test-source", + sys_metadata={ + "numaflow_version_info": metadata_pb2.KeyValueGroup( + key_value={ + "version": b"1.0.0", + "build_time": b"2021-01-01 00:00:00", + "commit_hash": b"1234567890", + } + ), + }, + user_metadata={ + "custom_info": metadata_pb2.KeyValueGroup( + key_value={ + "version": b"3.0.0", + } + ), + "test_group": metadata_pb2.KeyValueGroup( + key_value={ + "key1": b"value1", + "key2": b"value2", + } + ) + }, + ), ), id="test-id-3", ), From 7c208130f8ded587dcd2d92b81529da93d1953f6 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Oct 2025 16:58:09 +0530 Subject: [PATCH 10/23] file formatting Signed-off-by: Sreekanth --- .../pynumaflow/pynumaflow/mapper/_dtypes.py | 18 ++++++++++++++++-- .../mapper/_servicer/_async_servicer.py | 7 ++++++- .../pynumaflow/pynumaflow/mapper/metadata.py | 12 ++++++++++-- .../pynumaflow/tests/map/test_async_mapper.py | 9 +++++++-- packages/pynumaflow/tests/map/utils.py | 6 +++--- 5 files changed, 42 insertions(+), 10 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/mapper/_dtypes.py b/packages/pynumaflow/pynumaflow/mapper/_dtypes.py index f3ac8d60..975aa27b 100644 --- a/packages/pynumaflow/pynumaflow/mapper/_dtypes.py +++ b/packages/pynumaflow/pynumaflow/mapper/_dtypes.py @@ -31,7 +31,13 @@ class Message: _tags: list[str] _user_metadata: UserMetadata - def __init__(self, value: bytes, keys: list[str] = None, tags: list[str] = None, user_metadata: Optional[UserMetadata] = None): + def __init__( + self, + value: bytes, + keys: list[str] = None, + tags: list[str] = None, + user_metadata: Optional[UserMetadata] = None, + ): """ Creates a Message object to send value to a vertex. """ @@ -132,7 +138,15 @@ class Datum: ... ) """ - __slots__ = ("_keys", "_value", "_event_time", "_watermark", "_headers", "_user_metadata", "_system_metadata") + __slots__ = ( + "_keys", + "_value", + "_event_time", + "_watermark", + "_headers", + "_user_metadata", + "_system_metadata", + ) _keys: list[str] _value: bytes diff --git a/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py b/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py index 96e9abda..e3e484e4 100644 --- a/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py +++ b/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py @@ -115,7 +115,12 @@ async def _invoke_map(self, req: map_pb2.MapRequest, result_queue: NonBlockingIt datums = [] for msg in msgs: datums.append( - map_pb2.MapResponse.Result(keys=msg.keys, value=msg.value, tags=msg.tags, metadata=msg.user_metadata._to_proto()) + map_pb2.MapResponse.Result( + keys=msg.keys, + value=msg.value, + tags=msg.tags, + metadata=msg.user_metadata._to_proto(), + ) ) await result_queue.put(map_pb2.MapResponse(results=datums, id=req.id)) except BaseException as err: diff --git a/packages/pynumaflow/pynumaflow/mapper/metadata.py b/packages/pynumaflow/pynumaflow/mapper/metadata.py index 1efa693f..9e367104 100644 --- a/packages/pynumaflow/pynumaflow/mapper/metadata.py +++ b/packages/pynumaflow/pynumaflow/mapper/metadata.py @@ -26,12 +26,14 @@ | """ + @dataclass class SystemMetadata: """ System metadata is the mapping of group name to key-value pairs for a given group. System metadata wraps the system-generated metadata groups per message. It is read-only to UDFs. """ + _data: dict[str, dict[str, bytes]] = field(default_factory=dict) def groups(self) -> list[str]: @@ -58,6 +60,7 @@ class UserMetadata: """ UserMetadata wraps the user-generated metadata groups per message. It is read-write to UDFs. """ + _data: defaultdict[str, dict[str, bytes]] = field(default_factory=lambda: defaultdict(dict)) def groups(self) -> list[str]: @@ -110,11 +113,16 @@ def clear(self): def _to_proto(self) -> metadata_pb2.Metadata: return metadata_pb2.Metadata( - user_metadata={group: metadata_pb2.KeyValueGroup(key_value=value) for group, value in self._data.items()}, + user_metadata={ + group: metadata_pb2.KeyValueGroup(key_value=value) + for group, value in self._data.items() + }, ) -def _user_and_system_metadata_from_proto(proto: metadata_pb2.Metadata) -> tuple[UserMetadata, SystemMetadata]: +def _user_and_system_metadata_from_proto( + proto: metadata_pb2.Metadata, +) -> tuple[UserMetadata, SystemMetadata]: """ Converts the protobuf metadata to the UserMetadata and SystemMetadata objects. """ diff --git a/packages/pynumaflow/tests/map/test_async_mapper.py b/packages/pynumaflow/tests/map/test_async_mapper.py index 6093f52a..acad0229 100644 --- a/packages/pynumaflow/tests/map/test_async_mapper.py +++ b/packages/pynumaflow/tests/map/test_async_mapper.py @@ -152,7 +152,9 @@ def test_map(self) -> None: stub = map_pb2_grpc.MapStub(_channel) request = get_test_datums() try: - generator_response: Iterator[map_pb2.MapResponse] = stub.MapFn(request_iterator=request_generator(request)) + generator_response: Iterator[map_pb2.MapResponse] = stub.MapFn( + request_iterator=request_generator(request) + ) except grpc.RpcError as e: logging.error(e) raise @@ -179,7 +181,10 @@ def test_map(self) -> None: resp.results[0].value, ) self.assertEqual(1, len(resp.results)) - self.assertEqual(resp.results[0].metadata.user_metadata['custom_info'], metadata_pb2.KeyValueGroup(key_value={"version": f'{idx}.0.0'.encode('utf-8')})) + self.assertEqual( + resp.results[0].metadata.user_metadata["custom_info"], + metadata_pb2.KeyValueGroup(key_value={"version": f"{idx}.0.0".encode("utf-8")}), + ) # System metadata will be empty for user responses self.assertEqual(resp.results[0].metadata.sys_metadata, {}) diff --git a/packages/pynumaflow/tests/map/utils.py b/packages/pynumaflow/tests/map/utils.py index 5708e2c1..c7fe303b 100644 --- a/packages/pynumaflow/tests/map/utils.py +++ b/packages/pynumaflow/tests/map/utils.py @@ -85,7 +85,7 @@ def get_test_datums(handshake=True): "key1": b"value1", "key2": b"value2", } - ) + ), }, ), ), @@ -119,7 +119,7 @@ def get_test_datums(handshake=True): "key1": b"value1", "key2": b"value2", } - ) + ), }, ), ), @@ -153,7 +153,7 @@ def get_test_datums(handshake=True): "key1": b"value1", "key2": b"value2", } - ) + ), }, ), ), From 9ab25ecaf3d662d0dc05a413f63c56e41c69aed3 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Oct 2025 17:01:01 +0530 Subject: [PATCH 11/23] ruff fixes Signed-off-by: Sreekanth --- packages/pynumaflow/pynumaflow/mapper/_dtypes.py | 2 +- .../pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py | 2 +- packages/pynumaflow/tests/map/test_async_mapper.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/mapper/_dtypes.py b/packages/pynumaflow/pynumaflow/mapper/_dtypes.py index 975aa27b..c6088a95 100644 --- a/packages/pynumaflow/pynumaflow/mapper/_dtypes.py +++ b/packages/pynumaflow/pynumaflow/mapper/_dtypes.py @@ -1,6 +1,6 @@ from abc import ABCMeta, abstractmethod from collections.abc import Iterator, Sequence, Awaitable -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime from typing import TypeVar, Callable, Union, Optional from warnings import warn diff --git a/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py b/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py index e3e484e4..623aeffe 100644 --- a/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py +++ b/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py @@ -1,5 +1,5 @@ import asyncio -from collections.abc import AsyncIterator, Iterator +from collections.abc import AsyncIterator from google.protobuf import empty_pb2 as _empty_pb2 from pynumaflow.shared.asynciter import NonBlockingIterator diff --git a/packages/pynumaflow/tests/map/test_async_mapper.py b/packages/pynumaflow/tests/map/test_async_mapper.py index acad0229..29a98694 100644 --- a/packages/pynumaflow/tests/map/test_async_mapper.py +++ b/packages/pynumaflow/tests/map/test_async_mapper.py @@ -1,7 +1,7 @@ import asyncio import logging import threading -from typing import Iterator +from collections.abc import Iterator import unittest from unittest.mock import patch @@ -183,7 +183,7 @@ def test_map(self) -> None: self.assertEqual(1, len(resp.results)) self.assertEqual( resp.results[0].metadata.user_metadata["custom_info"], - metadata_pb2.KeyValueGroup(key_value={"version": f"{idx}.0.0".encode("utf-8")}), + metadata_pb2.KeyValueGroup(key_value={"version": f"{idx}.0.0".encode()}), ) # System metadata will be empty for user responses self.assertEqual(resp.results[0].metadata.sys_metadata, {}) From 44be545baa99edd3ddb7351422105701cb68ae0a Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Tue, 21 Oct 2025 09:58:23 +0530 Subject: [PATCH 12/23] make metadata module common Signed-off-by: Sreekanth --- .../pynumaflow/pynumaflow/{mapper/metadata.py => _metadata.py} | 0 packages/pynumaflow/pynumaflow/mapper/__init__.py | 2 +- packages/pynumaflow/pynumaflow/mapper/_dtypes.py | 2 +- .../pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename packages/pynumaflow/pynumaflow/{mapper/metadata.py => _metadata.py} (100%) diff --git a/packages/pynumaflow/pynumaflow/mapper/metadata.py b/packages/pynumaflow/pynumaflow/_metadata.py similarity index 100% rename from packages/pynumaflow/pynumaflow/mapper/metadata.py rename to packages/pynumaflow/pynumaflow/_metadata.py diff --git a/packages/pynumaflow/pynumaflow/mapper/__init__.py b/packages/pynumaflow/pynumaflow/mapper/__init__.py index d4cb4af3..bb699960 100644 --- a/packages/pynumaflow/pynumaflow/mapper/__init__.py +++ b/packages/pynumaflow/pynumaflow/mapper/__init__.py @@ -3,7 +3,7 @@ from pynumaflow.mapper.sync_server import MapServer from pynumaflow.mapper._dtypes import Message, Messages, Datum, DROP, Mapper -from pynumaflow.mapper.metadata import UserMetadata, SystemMetadata +from pynumaflow._metadata import UserMetadata, SystemMetadata __all__ = [ "Message", diff --git a/packages/pynumaflow/pynumaflow/mapper/_dtypes.py b/packages/pynumaflow/pynumaflow/mapper/_dtypes.py index c6088a95..155b71e7 100644 --- a/packages/pynumaflow/pynumaflow/mapper/_dtypes.py +++ b/packages/pynumaflow/pynumaflow/mapper/_dtypes.py @@ -6,7 +6,7 @@ from warnings import warn from pynumaflow._constants import DROP -from .metadata import UserMetadata, SystemMetadata +from pynumaflow._metadata import UserMetadata, SystemMetadata M = TypeVar("M", bound="Message") Ms = TypeVar("Ms", bound="Messages") diff --git a/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py b/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py index 623aeffe..bda98ece 100644 --- a/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py +++ b/packages/pynumaflow/pynumaflow/mapper/_servicer/_async_servicer.py @@ -6,7 +6,7 @@ from pynumaflow._constants import _LOGGER, STREAM_EOF, ERR_UDF_EXCEPTION_STRING from pynumaflow.mapper._dtypes import MapAsyncCallable, Datum, MapError, Message, Messages -from pynumaflow.mapper.metadata import _user_and_system_metadata_from_proto +from pynumaflow._metadata import _user_and_system_metadata_from_proto from pynumaflow.proto.mapper import map_pb2, map_pb2_grpc from pynumaflow.shared.server import handle_async_error from pynumaflow.types import NumaflowServicerContext From a2a28c70f840a244e76f2fea1842ba10c0bdf72a Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Tue, 21 Oct 2025 11:15:07 +0530 Subject: [PATCH 13/23] support metadata in sink Signed-off-by: Sreekanth --- .../pynumaflow/pynumaflow/sinker/_dtypes.py | 29 ++++++++++++++++++- .../sinker/servicer/async_servicer.py | 6 ++-- .../sinker/servicer/sync_servicer.py | 6 ++-- .../pynumaflow/sinker/servicer/utils.py | 4 +++ .../pynumaflow/tests/sink/test_async_sink.py | 29 +++++++++++++++---- packages/pynumaflow/tests/sink/test_server.py | 19 ++++++++++++ 6 files changed, 80 insertions(+), 13 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/sinker/_dtypes.py b/packages/pynumaflow/pynumaflow/sinker/_dtypes.py index c90f1f2e..43ce3386 100644 --- a/packages/pynumaflow/pynumaflow/sinker/_dtypes.py +++ b/packages/pynumaflow/pynumaflow/sinker/_dtypes.py @@ -6,6 +6,8 @@ from collections.abc import Sequence, Iterator from warnings import warn +from pynumaflow._metadata import SystemMetadata, UserMetadata + R = TypeVar("R", bound="Response") Rs = TypeVar("Rs", bound="Responses") @@ -120,7 +122,16 @@ class Datum: ... ) """ - __slots__ = ("_keys", "_id", "_value", "_event_time", "_watermark", "_headers") + __slots__ = ( + "_keys", + "_id", + "_value", + "_event_time", + "_watermark", + "_headers", + "_user_metadata", + "_system_metadata", + ) _keys: list[str] _id: str @@ -128,6 +139,8 @@ class Datum: _event_time: datetime _watermark: datetime _headers: dict[str, str] + _user_metadata: UserMetadata + _system_metadata: SystemMetadata def __init__( self, @@ -137,6 +150,8 @@ def __init__( event_time: datetime, watermark: datetime, headers: Optional[dict[str, str]] = None, + user_metadata: Optional[UserMetadata] = None, + system_metadata: Optional[SystemMetadata] = None, ): self._keys = keys self._id = sink_msg_id or "" @@ -148,6 +163,8 @@ def __init__( raise TypeError(f"Wrong data type: {type(watermark)} for Datum.watermark") self._watermark = watermark self._headers = headers or {} + self._user_metadata = user_metadata or UserMetadata() + self._system_metadata = system_metadata or SystemMetadata() def __str__(self): value_string = self._value.decode("utf-8") @@ -192,6 +209,16 @@ def headers(self) -> dict[str, str]: """Returns the headers of the event.""" return self._headers.copy() + @property + def user_metadata(self) -> UserMetadata: + """Returns the user metadata of the event.""" + return self._user_metadata + + @property + def system_metadata(self) -> SystemMetadata: + """Returns the system metadata of the event.""" + return self._system_metadata + class Sinker(metaclass=ABCMeta): """ diff --git a/packages/pynumaflow/pynumaflow/sinker/servicer/async_servicer.py b/packages/pynumaflow/pynumaflow/sinker/servicer/async_servicer.py index ed9a5572..4b7d4a16 100644 --- a/packages/pynumaflow/pynumaflow/sinker/servicer/async_servicer.py +++ b/packages/pynumaflow/pynumaflow/sinker/servicer/async_servicer.py @@ -1,5 +1,5 @@ import asyncio -from collections.abc import AsyncIterable +from collections.abc import AsyncIterator from google.protobuf import empty_pb2 as _empty_pb2 from pynumaflow.shared.asynciter import NonBlockingIterator @@ -33,7 +33,7 @@ def __init__( async def SinkFn( self, - request_iterator: AsyncIterable[sink_pb2.SinkRequest], + request_iterator: AsyncIterator[sink_pb2.SinkRequest], context: NumaflowServicerContext, ) -> sink_pb2.SinkResponse: """ @@ -89,7 +89,7 @@ async def SinkFn( return async def __invoke_sink( - self, request_queue: AsyncIterable[Datum], context: NumaflowServicerContext + self, request_queue: AsyncIterator[Datum], context: NumaflowServicerContext ): try: # invoke the user function with the request queue diff --git a/packages/pynumaflow/pynumaflow/sinker/servicer/sync_servicer.py b/packages/pynumaflow/pynumaflow/sinker/servicer/sync_servicer.py index b3fc669a..bdcf98d7 100644 --- a/packages/pynumaflow/pynumaflow/sinker/servicer/sync_servicer.py +++ b/packages/pynumaflow/pynumaflow/sinker/servicer/sync_servicer.py @@ -1,4 +1,4 @@ -from collections.abc import Iterable +from collections.abc import Iterator from pynumaflow._constants import _LOGGER, STREAM_EOF @@ -26,8 +26,8 @@ def __init__(self, handler: SinkSyncCallable): self.handler: SinkSyncCallable = handler def SinkFn( - self, request_iterator: Iterable[sink_pb2.SinkRequest], context: NumaflowServicerContext - ) -> Iterable[sink_pb2.SinkResponse]: + self, request_iterator: Iterator[sink_pb2.SinkRequest], context: NumaflowServicerContext + ) -> Iterator[sink_pb2.SinkResponse]: """ Applies a sink function to datum elements. """ diff --git a/packages/pynumaflow/pynumaflow/sinker/servicer/utils.py b/packages/pynumaflow/pynumaflow/sinker/servicer/utils.py index e3d648c2..465240b4 100644 --- a/packages/pynumaflow/pynumaflow/sinker/servicer/utils.py +++ b/packages/pynumaflow/pynumaflow/sinker/servicer/utils.py @@ -1,3 +1,4 @@ +from pynumaflow._metadata import _user_and_system_metadata_from_proto from pynumaflow.proto.sinker import sink_pb2 from pynumaflow.sinker._dtypes import Response, Datum, Responses @@ -47,6 +48,7 @@ def datum_from_sink_req(d: sink_pb2.SinkRequest) -> Datum: Returns: Datum: A Datum object populated with the data from the input SinkRequest object. """ + user_metadata, system_metadata = _user_and_system_metadata_from_proto(d.request.metadata) datum = Datum( keys=list(d.request.keys), sink_msg_id=d.request.id, @@ -54,6 +56,8 @@ def datum_from_sink_req(d: sink_pb2.SinkRequest) -> Datum: event_time=d.request.event_time.ToDatetime(), watermark=d.request.watermark.ToDatetime(), headers=dict(d.request.headers), + user_metadata=user_metadata, + system_metadata=system_metadata, ) return datum diff --git a/packages/pynumaflow/tests/sink/test_async_sink.py b/packages/pynumaflow/tests/sink/test_async_sink.py index efade0cd..c3d91fe5 100644 --- a/packages/pynumaflow/tests/sink/test_async_sink.py +++ b/packages/pynumaflow/tests/sink/test_async_sink.py @@ -7,7 +7,7 @@ import grpc from google.protobuf import empty_pb2 as _empty_pb2 -from grpc.aio._server import Server +from grpc.aio import Server from pynumaflow import setup_logging from pynumaflow._constants import ( @@ -16,6 +16,7 @@ FALLBACK_SINK_SOCK_PATH, FALLBACK_SINK_SERVER_INFO_FILE_PATH, ) +from pynumaflow.proto.common import metadata_pb2 from pynumaflow.sinker import ( Datum, ) @@ -41,6 +42,10 @@ async def udsink_handler(datums: AsyncIterable[Datum]) -> Responses: elif msg.value.decode("utf-8") == "test_mock_fallback_message": responses.append(Response.as_fallback(msg.id)) else: + if msg.user_metadata.groups() != ["custom_info"]: + raise ValueError("user metadata groups do not match") + if msg.system_metadata.groups() != ["numaflow_version_info"]: + raise ValueError("system metadata groups do not match") responses.append(Response.as_success(msg.id)) return responses @@ -55,7 +60,21 @@ def start_sink_streaming_request(_id: str, req_type) -> (Datum, tuple): value = mock_fallback_message() request = sink_pb2.SinkRequest.Request( - value=value, event_time=event_time_timestamp, watermark=watermark_timestamp, id=_id + value=value, + event_time=event_time_timestamp, + watermark=watermark_timestamp, + id=_id, + metadata=metadata_pb2.Metadata( + previous_vertex="test-source", + user_metadata={ + "custom_info": metadata_pb2.KeyValueGroup(key_value={"version": b"1.0.0"}), + }, + sys_metadata={ + "numaflow_version_info": metadata_pb2.KeyValueGroup( + key_value={"version": b"1.0.0"} + ), + }, + ), ) return sink_pb2.SinkRequest(request=request) @@ -64,10 +83,8 @@ def request_generator(count, req_type="success", session=1, handshake=True): if handshake: yield sink_pb2.SinkRequest(handshake=sink_pb2.Handshake(sot=True)) - for j in range(session): - for i in range(count): - yield start_sink_streaming_request(str(i), req_type) - + for _ in range(session): + yield from (start_sink_streaming_request(str(i), req_type) for i in range(count)) yield sink_pb2.SinkRequest(status=sink_pb2.TransmissionStatus(eot=True)) diff --git a/packages/pynumaflow/tests/sink/test_server.py b/packages/pynumaflow/tests/sink/test_server.py index 318226d9..8baf9c96 100644 --- a/packages/pynumaflow/tests/sink/test_server.py +++ b/packages/pynumaflow/tests/sink/test_server.py @@ -15,6 +15,7 @@ FALLBACK_SINK_SOCK_PATH, FALLBACK_SINK_SERVER_INFO_FILE_PATH, ) +from pynumaflow.proto.common import metadata_pb2 from pynumaflow.proto.sinker import sink_pb2 from pynumaflow.sinker import Responses, Datum, Response, SinkServer from tests.testing_utils import mock_terminate_on_stop @@ -32,6 +33,10 @@ def udsink_handler(datums: Iterator[Datum]) -> Responses: elif "fallback" in msg.value.decode("utf-8"): results.append(Response.as_fallback(msg.id)) else: + if msg.user_metadata.groups() != ["custom_info"]: + raise ValueError("user metadata groups do not match") + if msg.system_metadata.groups() != ["numaflow_version_info"]: + raise ValueError("system metadata groups do not match") results.append(Response.as_success(msg.id)) return results @@ -68,6 +73,16 @@ def mock_watermark(): # We are mocking the terminate function from the psutil to not exit the program during testing @patch("psutil.Process.kill", mock_terminate_on_stop) class TestServer(unittest.TestCase): + metadata = metadata_pb2.Metadata( + previous_vertex="test-source", + user_metadata={ + "custom_info": metadata_pb2.KeyValueGroup(key_value={"version": b"1.0.0"}), + }, + sys_metadata={ + "numaflow_version_info": metadata_pb2.KeyValueGroup(key_value={"version": b"1.0.0"}), + }, + ) + def setUp(self) -> None: server = SinkServer(sinker_instance=udsink_handler) my_servicer = server.servicer @@ -147,6 +162,7 @@ def test_udsink_err(self): value=mock_message(), event_time=event_time_timestamp, watermark=watermark_timestamp, + metadata=self.metadata, ) ), sink_pb2.SinkRequest( @@ -155,6 +171,7 @@ def test_udsink_err(self): value=mock_err_message(), event_time=event_time_timestamp, watermark=watermark_timestamp, + metadata=self.metadata, ) ), sink_pb2.SinkRequest(status=sink_pb2.TransmissionStatus(eot=True)), @@ -201,6 +218,7 @@ def test_forward_message(self): value=mock_message(), event_time=event_time_timestamp, watermark=watermark_timestamp, + metadata=self.metadata, ) ), sink_pb2.SinkRequest( @@ -209,6 +227,7 @@ def test_forward_message(self): value=mock_err_message(), event_time=event_time_timestamp, watermark=watermark_timestamp, + metadata=self.metadata, ) ), sink_pb2.SinkRequest(status=sink_pb2.TransmissionStatus(eot=True)), From 1b71a8c463f4ecf193c4c2148d92b662cabe94d0 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Tue, 21 Oct 2025 13:52:43 +0530 Subject: [PATCH 14/23] support metadata in source Signed-off-by: Sreekanth --- packages/pynumaflow/pynumaflow/_metadata.py | 80 +++++++++++++++---- .../pynumaflow/pynumaflow/shared/asynciter.py | 14 ++-- .../pynumaflow/pynumaflow/sinker/__init__.py | 12 ++- .../pynumaflow/pynumaflow/sourcer/__init__.py | 2 + .../pynumaflow/pynumaflow/sourcer/_dtypes.py | 12 ++- .../sourcer/servicer/async_servicer.py | 11 ++- .../tests/source/test_async_source.py | 22 ++++- .../tests/source/test_async_source_err.py | 2 +- packages/pynumaflow/tests/source/utils.py | 23 +++++- 9 files changed, 143 insertions(+), 35 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/_metadata.py b/packages/pynumaflow/pynumaflow/_metadata.py index 9e367104..5b03429d 100644 --- a/packages/pynumaflow/pynumaflow/_metadata.py +++ b/packages/pynumaflow/pynumaflow/_metadata.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from collections import defaultdict +from typing import Optional from pynumaflow.proto.common import metadata_pb2 """ @@ -46,13 +46,13 @@ def keys(self, group: str) -> list[str]: """ Returns the list of keys for a given group. """ - return list(self._data[group].keys()) + return list(self._data.get(group, {}).keys()) - def value(self, group: str, key: str) -> bytes: + def value(self, group: str, key: str) -> Optional[bytes]: """ Returns the value for a given group and key. """ - return self._data[group][key] + return self._data.get(group, {}).get(key) @dataclass @@ -61,7 +61,7 @@ class UserMetadata: UserMetadata wraps the user-generated metadata groups per message. It is read-write to UDFs. """ - _data: defaultdict[str, dict[str, bytes]] = field(default_factory=lambda: defaultdict(dict)) + _data: dict[str, dict[str, bytes]] = field(default_factory=dict) def groups(self) -> list[str]: """ @@ -69,23 +69,61 @@ def groups(self) -> list[str]: """ return list(self._data.keys()) - def keys(self, group: str) -> list[str]: + def keys(self, group: str) -> Optional[list[str]]: """ Returns the list of keys for a given group. """ - return list(self._data[group].keys()) + keys = self._data.get(group) + if keys is None: + return None + return list(keys.keys()) - def value(self, group: str, key: str) -> bytes: + def __contains__(self, group: str) -> bool: """ - Returns the value for a given group and key. + Returns True if the group exists. + """ + return group in self._data + + def __getitem__(self, group: str) -> dict[str, bytes]: + """ + Returns the data for a given group. + Raises KeyError if the group does not exist. + """ + return self._data[group] + + def __setitem__(self, group: str, data: dict[str, bytes]): + """ + Sets the data for a given group. + """ + self._data[group] = data + + def __delitem__(self, group: str): + """ + Removes the group and all its keys and values. + Raises KeyError if the group does not exist. + """ + del self._data[group] + + def __len__(self) -> int: """ - return self._data[group][key] + Returns the number of groups. + """ + return len(self._data) + + def value(self, group: str, key: str) -> Optional[bytes]: + """ + Returns the value for a given group and key. If the group or key does not exist, returns None. + """ + value = self._data.get(group) + if value is None: + return None + return value.get(key) def add(self, group: str, key: str, value: bytes): """ Adds the value for a given group and key. """ - self._data[group][key] = value + self._data.setdefault(group, {})[key] = value def set_group(self, group: str, data: dict[str, bytes]): """ @@ -93,17 +131,25 @@ def set_group(self, group: str, data: dict[str, bytes]): """ self._data[group] = data - def remove(self, group: str, key: str): + def remove(self, group: str, key: str) -> Optional[bytes]: """ - Removes the key and its value for a given group. + Removes the key and its value for a given group and returns the value. If this key is the only key in the group, the group will be removed. + Returns None if the group or key does not exist. """ - del self._data[group][key] + group_data = self._data.pop(group, None) + if group_data is None: + return None + value = group_data.pop(key, None) + if group_data: + self._data[group] = group_data + return value - def remove_group(self, group: str): + def remove_group(self, group: str) -> Optional[dict[str, bytes]]: """ - Removes the group and all its keys and values. + Removes the group and all its keys and values and returns the data. + Returns None if the group does not exist. """ - del self._data[group] + return self._data.pop(group, None) def clear(self): """ diff --git a/packages/pynumaflow/pynumaflow/shared/asynciter.py b/packages/pynumaflow/pynumaflow/shared/asynciter.py index 91155b93..87bdec84 100644 --- a/packages/pynumaflow/pynumaflow/shared/asynciter.py +++ b/packages/pynumaflow/pynumaflow/shared/asynciter.py @@ -1,17 +1,21 @@ import asyncio +from typing import Generic, TypeVar +from collections.abc import AsyncIterator from pynumaflow._constants import STREAM_EOF +T = TypeVar("T") -class NonBlockingIterator: + +class NonBlockingIterator(Generic[T]): """An Async Interator backed by a queue""" __slots__ = "_queue" - def __init__(self, size=0): - self._queue = asyncio.Queue(maxsize=size) + def __init__(self, size: int = 0) -> None: + self._queue: asyncio.Queue[T] = asyncio.Queue(maxsize=size) - async def read_iterator(self): + async def read_iterator(self) -> AsyncIterator[T]: item = await self._queue.get() while True: if item == STREAM_EOF: @@ -19,5 +23,5 @@ async def read_iterator(self): yield item item = await self._queue.get() - async def put(self, item): + async def put(self, item: T) -> None: await self._queue.put(item) diff --git a/packages/pynumaflow/pynumaflow/sinker/__init__.py b/packages/pynumaflow/pynumaflow/sinker/__init__.py index 322b5e81..1064d96a 100644 --- a/packages/pynumaflow/pynumaflow/sinker/__init__.py +++ b/packages/pynumaflow/pynumaflow/sinker/__init__.py @@ -2,6 +2,16 @@ from pynumaflow.sinker.server import SinkServer +from pynumaflow._metadata import UserMetadata, SystemMetadata from pynumaflow.sinker._dtypes import Response, Responses, Datum, Sinker -__all__ = ["Response", "Responses", "Datum", "Sinker", "SinkAsyncServer", "SinkServer"] +__all__ = [ + "Response", + "Responses", + "Datum", + "Sinker", + "SinkAsyncServer", + "SinkServer", + "UserMetadata", + "SystemMetadata", +] diff --git a/packages/pynumaflow/pynumaflow/sourcer/__init__.py b/packages/pynumaflow/pynumaflow/sourcer/__init__.py index 013fac4e..73b62735 100644 --- a/packages/pynumaflow/pynumaflow/sourcer/__init__.py +++ b/packages/pynumaflow/pynumaflow/sourcer/__init__.py @@ -10,6 +10,7 @@ Sourcer, SourceCallable, ) +from pynumaflow._metadata import UserMetadata from pynumaflow.sourcer.async_server import SourceAsyncServer __all__ = [ @@ -24,4 +25,5 @@ "Sourcer", "SourceAsyncServer", "SourceCallable", + "UserMetadata", ] diff --git a/packages/pynumaflow/pynumaflow/sourcer/_dtypes.py b/packages/pynumaflow/pynumaflow/sourcer/_dtypes.py index 42c30515..faae8692 100644 --- a/packages/pynumaflow/pynumaflow/sourcer/_dtypes.py +++ b/packages/pynumaflow/pynumaflow/sourcer/_dtypes.py @@ -5,6 +5,7 @@ from datetime import datetime from typing import Callable, Optional +from pynumaflow._metadata import UserMetadata from pynumaflow.shared.asynciter import NonBlockingIterator @@ -56,15 +57,17 @@ class Message: event_time: event time of the message, usually extracted from the payload. keys: []string keys for vertex (optional) headers: dict of headers for the message (optional) + user_metadata: metadata for the message (optional) """ - __slots__ = ("_payload", "_offset", "_event_time", "_keys", "_headers") + __slots__ = ("_payload", "_offset", "_event_time", "_keys", "_headers", "_user_metadata") _payload: bytes _offset: Offset _event_time: datetime _keys: list[str] _headers: dict[str, str] + _user_metadata: UserMetadata def __init__( self, @@ -73,6 +76,7 @@ def __init__( event_time: datetime, keys: list[str] = None, headers: Optional[dict[str, str]] = None, + user_metadata: Optional[UserMetadata] = None, ): """ Creates a Message object to send value to a vertex. @@ -82,6 +86,7 @@ def __init__( self._event_time = event_time self._keys = keys or [] self._headers = headers or {} + self._user_metadata = user_metadata or UserMetadata() @property def payload(self) -> bytes: @@ -103,6 +108,11 @@ def event_time(self) -> datetime: def headers(self) -> dict[str, str]: return self._headers + @property + def user_metadata(self) -> UserMetadata: + """Returns the user metadata of the message.""" + return self._user_metadata + @dataclass(init=False) class ReadRequest: diff --git a/packages/pynumaflow/pynumaflow/sourcer/servicer/async_servicer.py b/packages/pynumaflow/pynumaflow/sourcer/servicer/async_servicer.py index bb8e58fd..a9da9340 100644 --- a/packages/pynumaflow/pynumaflow/sourcer/servicer/async_servicer.py +++ b/packages/pynumaflow/pynumaflow/sourcer/servicer/async_servicer.py @@ -1,5 +1,6 @@ import asyncio from collections.abc import AsyncIterator +from typing import Union from google.protobuf import timestamp_pb2 as _timestamp_pb2 from google.protobuf import empty_pb2 as _empty_pb2 @@ -9,6 +10,7 @@ from pynumaflow.sourcer import ReadRequest, Offset, NackRequest, AckRequest, SourceCallable from pynumaflow.proto.sourcer import source_pb2 from pynumaflow.proto.sourcer import source_pb2_grpc +from pynumaflow.sourcer._dtypes import Message from pynumaflow.types import NumaflowServicerContext from pynumaflow._constants import _LOGGER, STREAM_EOF, ERR_UDF_EXCEPTION_STRING @@ -31,7 +33,7 @@ def _create_ack_handshake_response(): ) -def _create_read_response(response): +def _create_read_response(response: Message): """Create a read response from the handler result.""" event_time_timestamp = _timestamp_pb2.Timestamp() event_time_timestamp.FromDatetime(dt=response.event_time) @@ -41,6 +43,7 @@ def _create_read_response(response): offset=response.offset.as_dict, event_time=event_time_timestamp, headers=response.headers, + metadata=response.user_metadata._to_proto(), ) status = source_pb2.ReadResponse.Status(eot=False, code=source_pb2.ReadResponse.Status.SUCCESS) return source_pb2.ReadResponse(result=result, status=status) @@ -98,7 +101,7 @@ async def ReadFn( async for req in request_iterator: # create an iterator to be provided to the user function where the responses will # be streamed - niter = NonBlockingIterator() + niter: NonBlockingIterator[Union[Message, Exception]] = NonBlockingIterator() riter = niter.read_iterator() task = asyncio.create_task(self.__invoke_read(req, niter)) # Save a reference to the result of this function, to avoid a @@ -121,7 +124,9 @@ async def ReadFn( _LOGGER.critical("User-Defined Source ReadFn error", exc_info=True) await handle_async_error(context, err, ERR_UDF_EXCEPTION_STRING) - async def __invoke_read(self, req, niter): + async def __invoke_read( + self, req: source_pb2.ReadRequest, niter: NonBlockingIterator[Union[Message, Exception]] + ): """Invoke the read handler and manage the iterator.""" try: await self.__source_read_handler( diff --git a/packages/pynumaflow/tests/source/test_async_source.py b/packages/pynumaflow/tests/source/test_async_source.py index e255b7c8..ae867709 100644 --- a/packages/pynumaflow/tests/source/test_async_source.py +++ b/packages/pynumaflow/tests/source/test_async_source.py @@ -1,13 +1,15 @@ import asyncio +from collections.abc import Iterator import logging import threading import unittest import grpc from google.protobuf import empty_pb2 as _empty_pb2 -from grpc.aio._server import Server +from grpc.aio import Server from pynumaflow import setup_logging +from pynumaflow._metadata import _user_and_system_metadata_from_proto from pynumaflow.proto.sourcer import source_pb2_grpc, source_pb2 from pynumaflow.sourcer import ( SourceAsyncServer, @@ -100,13 +102,13 @@ def test_read_source(self) -> None: stub = source_pb2_grpc.SourceStub(channel) request = read_req_source_fn() - generator_response = None try: - generator_response = stub.ReadFn( + generator_response: Iterator[source_pb2.ReadResponse] = stub.ReadFn( request_iterator=request_generator(1, request, "read") ) except grpc.RpcError as e: logging.error(e) + raise counter = 0 first = True @@ -139,6 +141,20 @@ def test_read_source(self) -> None: r.result.offset.partition_id, ) + print(r.result) + (user_metadata, sys_metadata) = _user_and_system_metadata_from_proto( + r.result.metadata + ) + print(user_metadata) + + self.assertCountEqual(user_metadata.groups(), ["custom_info", "test_info"]) + self.assertCountEqual( + user_metadata.keys("custom_info"), ["custom_key", "custom_key2"] + ) + self.assertIsNone(user_metadata.value("custom_info", "test_key")) + self.assertEqual(user_metadata.value("custom_info", "custom_key"), b"custom_value") + self.assertEqual(user_metadata.value("test_info", "test_key"), b"test_value") + self.assertFalse(first) self.assertTrue(last) diff --git a/packages/pynumaflow/tests/source/test_async_source_err.py b/packages/pynumaflow/tests/source/test_async_source_err.py index 0ce4fa3d..2c8d64d7 100644 --- a/packages/pynumaflow/tests/source/test_async_source_err.py +++ b/packages/pynumaflow/tests/source/test_async_source_err.py @@ -6,7 +6,7 @@ import grpc -from grpc.aio._server import Server +from grpc.aio import Server from pynumaflow import setup_logging from pynumaflow.proto.sourcer import source_pb2_grpc diff --git a/packages/pynumaflow/tests/source/utils.py b/packages/pynumaflow/tests/source/utils.py index 7baa91b7..ff5cbc11 100644 --- a/packages/pynumaflow/tests/source/utils.py +++ b/packages/pynumaflow/tests/source/utils.py @@ -1,8 +1,7 @@ from collections.abc import Iterable from pynumaflow.shared.asynciter import NonBlockingIterator - -from pynumaflow.sourcer import ReadRequest, Message +from pynumaflow.sourcer import ReadRequest, Message, UserMetadata from pynumaflow.sourcer import ( AckRequest, PendingResponse, @@ -29,9 +28,19 @@ async def read_handler(self, datum: ReadRequest, output: NonBlockingIterator): keys = ["test_key"] offset = mock_offset() event_time = mock_event_time() + metadata = UserMetadata() + metadata.add("custom_info", "custom_key", b"custom_value") + metadata.add("custom_info", "custom_key2", b"custom_value2") + metadata.add("test_info", "test_key", b"test_value") for i in range(10): await output.put( - Message(payload=payload, keys=keys, offset=offset, event_time=event_time) + Message( + payload=payload, + keys=keys, + offset=offset, + event_time=event_time, + user_metadata=metadata, + ) ) async def ack_handler(self, ack_request: AckRequest): @@ -98,7 +107,13 @@ async def read_handler(self, datum: ReadRequest, output: NonBlockingIterator): event_time = mock_event_time() for i in range(datum.num_records): await output.put( - Message(payload=payload, keys=keys, offset=offset, event_time=event_time) + Message( + payload=payload, + keys=keys, + offset=offset, + event_time=event_time, + user_metadata=UserMetadata(), + ) ) raise RuntimeError("Got a runtime error from read handler.") From a74fb116d3830f76732d8c53d5630a15489d5cc4 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Tue, 21 Oct 2025 13:54:46 +0530 Subject: [PATCH 15/23] Fix formatting Signed-off-by: Sreekanth --- packages/pynumaflow/pynumaflow/_metadata.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/_metadata.py b/packages/pynumaflow/pynumaflow/_metadata.py index 5b03429d..243b135d 100644 --- a/packages/pynumaflow/pynumaflow/_metadata.py +++ b/packages/pynumaflow/pynumaflow/_metadata.py @@ -112,7 +112,8 @@ def __len__(self) -> int: def value(self, group: str, key: str) -> Optional[bytes]: """ - Returns the value for a given group and key. If the group or key does not exist, returns None. + Returns the value for a given group and key. + If the group or key does not exist, returns None. """ value = self._data.get(group) if value is None: @@ -133,7 +134,8 @@ def set_group(self, group: str, data: dict[str, bytes]): def remove(self, group: str, key: str) -> Optional[bytes]: """ - Removes the key and its value for a given group and returns the value. If this key is the only key in the group, the group will be removed. + Removes the key and its value for a given group and returns the value. + If this key is the only key in the group, the group will be removed. Returns None if the group or key does not exist. """ group_data = self._data.pop(group, None) From 426cea109b1121ecdee9a765680a7998b9a5c3ae Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Tue, 21 Oct 2025 14:08:06 +0530 Subject: [PATCH 16/23] unit tests for metadata types Signed-off-by: Sreekanth --- packages/pynumaflow/tests/test_metadata.py | 432 +++++++++++++++++++++ 1 file changed, 432 insertions(+) create mode 100644 packages/pynumaflow/tests/test_metadata.py diff --git a/packages/pynumaflow/tests/test_metadata.py b/packages/pynumaflow/tests/test_metadata.py new file mode 100644 index 00000000..ecfa63bc --- /dev/null +++ b/packages/pynumaflow/tests/test_metadata.py @@ -0,0 +1,432 @@ +import pytest +from pynumaflow._metadata import UserMetadata, SystemMetadata + + +class TestSystemMetadata: + """Tests for SystemMetadata (read-only)""" + + def test_empty_system_metadata(self): + """Test empty SystemMetadata""" + metadata = SystemMetadata() + assert metadata.groups() == [] + assert metadata.keys("any_group") == [] + assert metadata.value("any_group", "any_key") is None + + def test_system_metadata_groups(self): + """Test groups() method""" + metadata = SystemMetadata(_data={ + "group1": {"key1": b"value1"}, + "group2": {"key2": b"value2"} + }) + groups = metadata.groups() + assert len(groups) == 2 + assert "group1" in groups + assert "group2" in groups + + def test_system_metadata_keys_existing_group(self): + """Test keys() method with existing group""" + metadata = SystemMetadata(_data={ + "group1": {"key1": b"value1", "key2": b"value2"} + }) + keys = metadata.keys("group1") + assert len(keys) == 2 + assert "key1" in keys + assert "key2" in keys + + def test_system_metadata_keys_nonexistent_group(self): + """Test keys() method with non-existent group""" + metadata = SystemMetadata(_data={"group1": {"key1": b"value1"}}) + keys = metadata.keys("nonexistent") + assert keys == [] + + def test_system_metadata_value_existing(self): + """Test value() method with existing group and key""" + metadata = SystemMetadata(_data={ + "group1": {"key1": b"value1"} + }) + assert metadata.value("group1", "key1") == b"value1" + + def test_system_metadata_value_nonexistent_group(self): + """Test value() method with non-existent group""" + metadata = SystemMetadata(_data={"group1": {"key1": b"value1"}}) + assert metadata.value("nonexistent", "key1") is None + + def test_system_metadata_value_nonexistent_key(self): + """Test value() method with non-existent key""" + metadata = SystemMetadata(_data={"group1": {"key1": b"value1"}}) + assert metadata.value("group1", "nonexistent") is None + + def test_system_metadata_value_nonexistent_both(self): + """Test value() method with non-existent group and key""" + metadata = SystemMetadata() + assert metadata.value("nonexistent", "nonexistent") is None + + +class TestUserMetadata: + """Tests for UserMetadata (read-write)""" + + def test_empty_user_metadata(self): + """Test empty UserMetadata""" + metadata = UserMetadata() + assert metadata.groups() == [] + assert len(metadata) == 0 + assert metadata.keys("any_group") is None + assert metadata.value("any_group", "any_key") is None + + def test_user_metadata_groups(self): + """Test groups() method""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + metadata.add("group2", "key2", b"value2") + groups = metadata.groups() + assert len(groups) == 2 + assert "group1" in groups + assert "group2" in groups + + def test_user_metadata_keys_existing_group(self): + """Test keys() method with existing group""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + metadata.add("group1", "key2", b"value2") + keys = metadata.keys("group1") + assert keys is not None + assert len(keys) == 2 + assert "key1" in keys + assert "key2" in keys + + def test_user_metadata_keys_nonexistent_group(self): + """Test keys() method with non-existent group returns None""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + assert metadata.keys("nonexistent") is None + + def test_user_metadata_contains(self): + """Test __contains__ method""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + assert "group1" in metadata + assert "nonexistent" not in metadata + + def test_user_metadata_getitem_existing(self): + """Test __getitem__ with existing group""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + metadata.add("group1", "key2", b"value2") + group_data = metadata["group1"] + assert group_data == {"key1": b"value1", "key2": b"value2"} + + def test_user_metadata_getitem_nonexistent_raises_keyerror(self): + """Test __getitem__ raises KeyError for non-existent group""" + metadata = UserMetadata() + with pytest.raises(KeyError): + _ = metadata["nonexistent"] + + def test_user_metadata_setitem(self): + """Test __setitem__ method""" + metadata = UserMetadata() + metadata["group1"] = {"key1": b"value1", "key2": b"value2"} + assert metadata["group1"] == {"key1": b"value1", "key2": b"value2"} + assert len(metadata) == 1 + + def test_user_metadata_setitem_overwrite(self): + """Test __setitem__ overwrites existing group""" + metadata = UserMetadata() + metadata["group1"] = {"key1": b"value1"} + metadata["group1"] = {"key2": b"value2"} + assert metadata["group1"] == {"key2": b"value2"} + assert len(metadata) == 1 + + def test_user_metadata_delitem_existing(self): + """Test __delitem__ with existing group""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + metadata.add("group2", "key2", b"value2") + del metadata["group1"] + assert "group1" not in metadata + assert "group2" in metadata + assert len(metadata) == 1 + + def test_user_metadata_delitem_nonexistent_raises_keyerror(self): + """Test __delitem__ raises KeyError for non-existent group""" + metadata = UserMetadata() + with pytest.raises(KeyError): + del metadata["nonexistent"] + + def test_user_metadata_len(self): + """Test __len__ method""" + metadata = UserMetadata() + assert len(metadata) == 0 + metadata.add("group1", "key1", b"value1") + assert len(metadata) == 1 + metadata.add("group2", "key2", b"value2") + assert len(metadata) == 2 + metadata.add("group1", "key3", b"value3") + assert len(metadata) == 2 # Still 2 groups + + def test_user_metadata_value_existing(self): + """Test value() method with existing group and key""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + assert metadata.value("group1", "key1") == b"value1" + + def test_user_metadata_value_nonexistent_group(self): + """Test value() method with non-existent group returns None""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + assert metadata.value("nonexistent", "key1") is None + + def test_user_metadata_value_nonexistent_key(self): + """Test value() method with non-existent key returns None""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + assert metadata.value("group1", "nonexistent") is None + + def test_user_metadata_value_nonexistent_both(self): + """Test value() method with non-existent group and key returns None""" + metadata = UserMetadata() + assert metadata.value("nonexistent", "nonexistent") is None + + def test_user_metadata_add_new_group(self): + """Test add() method creates new group""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + assert "group1" in metadata + assert metadata.value("group1", "key1") == b"value1" + + def test_user_metadata_add_to_existing_group(self): + """Test add() method adds to existing group""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + metadata.add("group1", "key2", b"value2") + assert metadata.value("group1", "key1") == b"value1" + assert metadata.value("group1", "key2") == b"value2" + assert len(metadata["group1"]) == 2 + + def test_user_metadata_add_overwrites_existing_key(self): + """Test add() method overwrites existing key""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + metadata.add("group1", "key1", b"new_value") + assert metadata.value("group1", "key1") == b"new_value" + assert len(metadata["group1"]) == 1 + + def test_user_metadata_set_group(self): + """Test set_group() method""" + metadata = UserMetadata() + metadata.set_group("group1", {"key1": b"value1", "key2": b"value2"}) + assert metadata["group1"] == {"key1": b"value1", "key2": b"value2"} + + def test_user_metadata_set_group_overwrites(self): + """Test set_group() overwrites existing group""" + metadata = UserMetadata() + metadata.set_group("group1", {"key1": b"value1"}) + metadata.set_group("group1", {"key2": b"value2"}) + assert metadata["group1"] == {"key2": b"value2"} + assert "key1" not in metadata["group1"] + + def test_user_metadata_remove_existing_key(self): + """Test remove() method with existing key""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + metadata.add("group1", "key2", b"value2") + removed_value = metadata.remove("group1", "key1") + assert removed_value == b"value1" + assert metadata.value("group1", "key1") is None + assert metadata.value("group1", "key2") == b"value2" + assert "group1" in metadata # Group still exists + + def test_user_metadata_remove_last_key_removes_group(self): + """Test remove() removes group when last key is removed""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + removed_value = metadata.remove("group1", "key1") + assert removed_value == b"value1" + assert "group1" not in metadata + + def test_user_metadata_remove_nonexistent_group(self): + """Test remove() with non-existent group returns None""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + removed_value = metadata.remove("nonexistent", "key1") + assert removed_value is None + + def test_user_metadata_remove_nonexistent_key(self): + """Test remove() with non-existent key returns None and keeps group""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + metadata.add("group1", "key2", b"value2") + removed_value = metadata.remove("group1", "nonexistent") + assert removed_value is None + # Group remains because it still has other keys + assert "group1" in metadata + assert metadata.value("group1", "key1") == b"value1" + assert metadata.value("group1", "key2") == b"value2" + + def test_user_metadata_remove_nonexistent_key_single_key_group(self): + """Test remove() with non-existent key on single-key group keeps the group""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + removed_value = metadata.remove("group1", "nonexistent") + assert removed_value is None + # Group remains even though it only has one key and we tried to remove a different one + assert "group1" in metadata + assert metadata.value("group1", "key1") == b"value1" + + def test_user_metadata_remove_nonexistent_both(self): + """Test remove() with non-existent group and key returns None""" + metadata = UserMetadata() + removed_value = metadata.remove("nonexistent", "nonexistent") + assert removed_value is None + + def test_user_metadata_remove_group_existing(self): + """Test remove_group() with existing group""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + metadata.add("group1", "key2", b"value2") + metadata.add("group2", "key3", b"value3") + removed_data = metadata.remove_group("group1") + assert removed_data == {"key1": b"value1", "key2": b"value2"} + assert "group1" not in metadata + assert "group2" in metadata + + def test_user_metadata_remove_group_nonexistent(self): + """Test remove_group() with non-existent group returns None""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + removed_data = metadata.remove_group("nonexistent") + assert removed_data is None + assert "group1" in metadata + + def test_user_metadata_clear(self): + """Test clear() method""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + metadata.add("group2", "key2", b"value2") + metadata.add("group3", "key3", b"value3") + assert len(metadata) == 3 + metadata.clear() + assert len(metadata) == 0 + assert metadata.groups() == [] + + def test_user_metadata_clear_empty(self): + """Test clear() on empty metadata""" + metadata = UserMetadata() + metadata.clear() + assert len(metadata) == 0 + + def test_user_metadata_to_proto(self): + """Test _to_proto() method""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"value1") + metadata.add("group1", "key2", b"value2") + metadata.add("group2", "key3", b"value3") + proto = metadata._to_proto() + assert len(proto.user_metadata) == 2 + assert "group1" in proto.user_metadata + assert "group2" in proto.user_metadata + assert proto.user_metadata["group1"].key_value["key1"] == b"value1" + assert proto.user_metadata["group1"].key_value["key2"] == b"value2" + assert proto.user_metadata["group2"].key_value["key3"] == b"value3" + + def test_user_metadata_to_proto_empty(self): + """Test _to_proto() with empty metadata""" + metadata = UserMetadata() + proto = metadata._to_proto() + assert len(proto.user_metadata) == 0 + + def test_user_metadata_complex_scenario(self): + """Test complex scenario with multiple operations""" + metadata = UserMetadata() + + # Add multiple groups + metadata.add("headers", "content-type", b"application/json") + metadata.add("headers", "authorization", b"Bearer token123") + metadata.add("metrics", "counter", b"42") + metadata.add("metrics", "timestamp", b"1234567890") + + assert len(metadata) == 2 + assert len(metadata["headers"]) == 2 + assert len(metadata["metrics"]) == 2 + + # Remove a key + metadata.remove("headers", "authorization") + assert len(metadata["headers"]) == 1 + assert metadata.value("headers", "authorization") is None + + # Remove entire group + removed = metadata.remove_group("metrics") + assert removed == {"counter": b"42", "timestamp": b"1234567890"} + assert "metrics" not in metadata + + # Add new group + metadata["config"] = {"setting1": b"value1", "setting2": b"value2"} + assert len(metadata) == 2 + + # Clear all + metadata.clear() + assert len(metadata) == 0 + + +class TestUserMetadataEdgeCases: + """Edge cases and special scenarios for UserMetadata""" + + def test_empty_group_name(self): + """Test with empty string as group name""" + metadata = UserMetadata() + metadata.add("", "key1", b"value1") + assert "" in metadata + assert metadata.value("", "key1") == b"value1" + + def test_empty_key_name(self): + """Test with empty string as key name""" + metadata = UserMetadata() + metadata.add("group1", "", b"value1") + assert metadata.value("group1", "") == b"value1" + + def test_empty_value(self): + """Test with empty bytes as value""" + metadata = UserMetadata() + metadata.add("group1", "key1", b"") + assert metadata.value("group1", "key1") == b"" + + def test_multiple_groups_with_same_keys(self): + """Test different groups can have the same key names""" + metadata = UserMetadata() + metadata.add("group1", "key", b"value1") + metadata.add("group2", "key", b"value2") + assert metadata.value("group1", "key") == b"value1" + assert metadata.value("group2", "key") == b"value2" + + def test_special_characters_in_names(self): + """Test special characters in group and key names""" + metadata = UserMetadata() + metadata.add("group-1", "key_1", b"value1") + metadata.add("group.2", "key:2", b"value2") + metadata.add("group@3", "key#3", b"value3") + assert len(metadata) == 3 + assert metadata.value("group-1", "key_1") == b"value1" + assert metadata.value("group.2", "key:2") == b"value2" + assert metadata.value("group@3", "key#3") == b"value3" + + def test_large_values(self): + """Test with large byte values""" + metadata = UserMetadata() + large_value = b"x" * 10000 + metadata.add("group1", "large_key", large_value) + assert metadata.value("group1", "large_key") == large_value + + def test_many_groups(self): + """Test with many groups""" + metadata = UserMetadata() + for i in range(100): + metadata.add(f"group{i}", f"key{i}", f"value{i}".encode()) + assert len(metadata) == 100 + assert metadata.value("group50", "key50") == b"value50" + + def test_many_keys_in_group(self): + """Test with many keys in a single group""" + metadata = UserMetadata() + for i in range(100): + metadata.add("group1", f"key{i}", f"value{i}".encode()) + assert len(metadata["group1"]) == 100 + assert metadata.value("group1", "key50") == b"value50" From 42cf41b7c426d1616afc5046e9eb5b4eddd02196 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Tue, 21 Oct 2025 14:09:34 +0530 Subject: [PATCH 17/23] Fix formatting Signed-off-by: Sreekanth --- packages/pynumaflow/tests/test_metadata.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/packages/pynumaflow/tests/test_metadata.py b/packages/pynumaflow/tests/test_metadata.py index ecfa63bc..87ceef5d 100644 --- a/packages/pynumaflow/tests/test_metadata.py +++ b/packages/pynumaflow/tests/test_metadata.py @@ -14,10 +14,9 @@ def test_empty_system_metadata(self): def test_system_metadata_groups(self): """Test groups() method""" - metadata = SystemMetadata(_data={ - "group1": {"key1": b"value1"}, - "group2": {"key2": b"value2"} - }) + metadata = SystemMetadata( + _data={"group1": {"key1": b"value1"}, "group2": {"key2": b"value2"}} + ) groups = metadata.groups() assert len(groups) == 2 assert "group1" in groups @@ -25,9 +24,7 @@ def test_system_metadata_groups(self): def test_system_metadata_keys_existing_group(self): """Test keys() method with existing group""" - metadata = SystemMetadata(_data={ - "group1": {"key1": b"value1", "key2": b"value2"} - }) + metadata = SystemMetadata(_data={"group1": {"key1": b"value1", "key2": b"value2"}}) keys = metadata.keys("group1") assert len(keys) == 2 assert "key1" in keys @@ -41,9 +38,7 @@ def test_system_metadata_keys_nonexistent_group(self): def test_system_metadata_value_existing(self): """Test value() method with existing group and key""" - metadata = SystemMetadata(_data={ - "group1": {"key1": b"value1"} - }) + metadata = SystemMetadata(_data={"group1": {"key1": b"value1"}}) assert metadata.value("group1", "key1") == b"value1" def test_system_metadata_value_nonexistent_group(self): From 70ea54f3a07ff1d72260793edeb34baf9cf2bc78 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Tue, 21 Oct 2025 14:18:00 +0530 Subject: [PATCH 18/23] more tests Signed-off-by: Sreekanth --- packages/pynumaflow/tests/map/test_async_mapper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/pynumaflow/tests/map/test_async_mapper.py b/packages/pynumaflow/tests/map/test_async_mapper.py index 29a98694..59367079 100644 --- a/packages/pynumaflow/tests/map/test_async_mapper.py +++ b/packages/pynumaflow/tests/map/test_async_mapper.py @@ -45,6 +45,8 @@ async def async_map_handler(keys: list[str], datum: Datum) -> Messages: ) val = bytes(msg, encoding="utf-8") messages = Messages() + if datum.system_metadata.value("numaflow_version_info", "version") != b"1.0.0": + raise ValueError("System metadata version mismatch") messages.append(Message(str.encode(msg), keys=keys, user_metadata=datum.user_metadata)) return messages From d3e93b43234906b1ebff97db73965df40c393a8b Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Tue, 21 Oct 2025 16:01:40 +0530 Subject: [PATCH 19/23] bug fix Signed-off-by: Sreekanth --- .../pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py b/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py index 6e7455b4..c9b7cff0 100644 --- a/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py +++ b/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py @@ -4,6 +4,7 @@ from google.protobuf import empty_pb2 as _empty_pb2 from pynumaflow.shared.server import exit_on_error +from pynumaflow._metadata import _user_and_system_metadata_from_proto from pynumaflow._constants import NUM_THREADS_DEFAULT, STREAM_EOF, _LOGGER, ERR_UDF_EXCEPTION_STRING from pynumaflow.mapper._dtypes import MapSyncCallable, Datum, MapError @@ -101,12 +102,15 @@ def _invoke_map( result_queue: SyncIterator, ): try: + (user_metadata, system_metadata) = _user_and_system_metadata_from_proto(request.request.metadata) d = Datum( keys=list(request.request.keys), value=request.request.value, event_time=request.request.event_time.ToDatetime(), watermark=request.request.watermark.ToDatetime(), headers=dict(request.request.headers), + user_metadata=user_metadata, + system_metadata=system_metadata, ) responses = self.__map_handler(list(request.request.keys), d) @@ -117,6 +121,7 @@ def _invoke_map( keys=list(resp.keys), value=resp.value, tags=resp.tags, + metadata=resp.user_metadata._to_proto(), ) ) result_queue.put(map_pb2.MapResponse(results=results, id=request.id)) From b0c7e28d6669967220d745541fdf9cfcd64b1b62 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Tue, 21 Oct 2025 16:12:46 +0530 Subject: [PATCH 20/23] fix formatting Signed-off-by: Sreekanth --- .../pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py b/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py index c9b7cff0..4154acc8 100644 --- a/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py +++ b/packages/pynumaflow/pynumaflow/mapper/_servicer/_sync_servicer.py @@ -102,7 +102,9 @@ def _invoke_map( result_queue: SyncIterator, ): try: - (user_metadata, system_metadata) = _user_and_system_metadata_from_proto(request.request.metadata) + (user_metadata, system_metadata) = _user_and_system_metadata_from_proto( + request.request.metadata + ) d = Datum( keys=list(request.request.keys), value=request.request.value, From 4cfcb735c417e0ee80ddd4f17a192c7ebee22e79 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Fri, 7 Nov 2025 08:56:35 +0530 Subject: [PATCH 21/23] add_key, remove_key methods Signed-off-by: Sreekanth --- packages/pynumaflow/pynumaflow/_metadata.py | 10 +- packages/pynumaflow/tests/test_metadata.py | 124 ++++++++++---------- 2 files changed, 66 insertions(+), 68 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/_metadata.py b/packages/pynumaflow/pynumaflow/_metadata.py index 243b135d..39dcfe1b 100644 --- a/packages/pynumaflow/pynumaflow/_metadata.py +++ b/packages/pynumaflow/pynumaflow/_metadata.py @@ -69,13 +69,11 @@ def groups(self) -> list[str]: """ return list(self._data.keys()) - def keys(self, group: str) -> Optional[list[str]]: + def keys(self, group: str) -> list[str]: """ Returns the list of keys for a given group. """ - keys = self._data.get(group) - if keys is None: - return None + keys = self._data.get(group) or {} return list(keys.keys()) def __contains__(self, group: str) -> bool: @@ -120,7 +118,7 @@ def value(self, group: str, key: str) -> Optional[bytes]: return None return value.get(key) - def add(self, group: str, key: str, value: bytes): + def add_key(self, group: str, key: str, value: bytes): """ Adds the value for a given group and key. """ @@ -132,7 +130,7 @@ def set_group(self, group: str, data: dict[str, bytes]): """ self._data[group] = data - def remove(self, group: str, key: str) -> Optional[bytes]: + def remove_key(self, group: str, key: str) -> Optional[bytes]: """ Removes the key and its value for a given group and returns the value. If this key is the only key in the group, the group will be removed. diff --git a/packages/pynumaflow/tests/test_metadata.py b/packages/pynumaflow/tests/test_metadata.py index 87ceef5d..19cdcee4 100644 --- a/packages/pynumaflow/tests/test_metadata.py +++ b/packages/pynumaflow/tests/test_metadata.py @@ -65,14 +65,14 @@ def test_empty_user_metadata(self): metadata = UserMetadata() assert metadata.groups() == [] assert len(metadata) == 0 - assert metadata.keys("any_group") is None + assert metadata.keys("any_group") == [] assert metadata.value("any_group", "any_key") is None def test_user_metadata_groups(self): """Test groups() method""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - metadata.add("group2", "key2", b"value2") + metadata.add_key("group1", "key1", b"value1") + metadata.add_key("group2", "key2", b"value2") groups = metadata.groups() assert len(groups) == 2 assert "group1" in groups @@ -81,8 +81,8 @@ def test_user_metadata_groups(self): def test_user_metadata_keys_existing_group(self): """Test keys() method with existing group""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - metadata.add("group1", "key2", b"value2") + metadata.add_key("group1", "key1", b"value1") + metadata.add_key("group1", "key2", b"value2") keys = metadata.keys("group1") assert keys is not None assert len(keys) == 2 @@ -92,21 +92,21 @@ def test_user_metadata_keys_existing_group(self): def test_user_metadata_keys_nonexistent_group(self): """Test keys() method with non-existent group returns None""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - assert metadata.keys("nonexistent") is None + metadata.add_key("group1", "key1", b"value1") + assert metadata.keys("nonexistent") == [] def test_user_metadata_contains(self): """Test __contains__ method""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") + metadata.add_key("group1", "key1", b"value1") assert "group1" in metadata assert "nonexistent" not in metadata def test_user_metadata_getitem_existing(self): """Test __getitem__ with existing group""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - metadata.add("group1", "key2", b"value2") + metadata.add_key("group1", "key1", b"value1") + metadata.add_key("group1", "key2", b"value2") group_data = metadata["group1"] assert group_data == {"key1": b"value1", "key2": b"value2"} @@ -134,8 +134,8 @@ def test_user_metadata_setitem_overwrite(self): def test_user_metadata_delitem_existing(self): """Test __delitem__ with existing group""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - metadata.add("group2", "key2", b"value2") + metadata.add_key("group1", "key1", b"value1") + metadata.add_key("group2", "key2", b"value2") del metadata["group1"] assert "group1" not in metadata assert "group2" in metadata @@ -151,29 +151,29 @@ def test_user_metadata_len(self): """Test __len__ method""" metadata = UserMetadata() assert len(metadata) == 0 - metadata.add("group1", "key1", b"value1") + metadata.add_key("group1", "key1", b"value1") assert len(metadata) == 1 - metadata.add("group2", "key2", b"value2") + metadata.add_key("group2", "key2", b"value2") assert len(metadata) == 2 - metadata.add("group1", "key3", b"value3") + metadata.add_key("group1", "key3", b"value3") assert len(metadata) == 2 # Still 2 groups def test_user_metadata_value_existing(self): """Test value() method with existing group and key""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") + metadata.add_key("group1", "key1", b"value1") assert metadata.value("group1", "key1") == b"value1" def test_user_metadata_value_nonexistent_group(self): """Test value() method with non-existent group returns None""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") + metadata.add_key("group1", "key1", b"value1") assert metadata.value("nonexistent", "key1") is None def test_user_metadata_value_nonexistent_key(self): """Test value() method with non-existent key returns None""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") + metadata.add_key("group1", "key1", b"value1") assert metadata.value("group1", "nonexistent") is None def test_user_metadata_value_nonexistent_both(self): @@ -184,15 +184,15 @@ def test_user_metadata_value_nonexistent_both(self): def test_user_metadata_add_new_group(self): """Test add() method creates new group""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") + metadata.add_key("group1", "key1", b"value1") assert "group1" in metadata assert metadata.value("group1", "key1") == b"value1" def test_user_metadata_add_to_existing_group(self): """Test add() method adds to existing group""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - metadata.add("group1", "key2", b"value2") + metadata.add_key("group1", "key1", b"value1") + metadata.add_key("group1", "key2", b"value2") assert metadata.value("group1", "key1") == b"value1" assert metadata.value("group1", "key2") == b"value2" assert len(metadata["group1"]) == 2 @@ -200,8 +200,8 @@ def test_user_metadata_add_to_existing_group(self): def test_user_metadata_add_overwrites_existing_key(self): """Test add() method overwrites existing key""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - metadata.add("group1", "key1", b"new_value") + metadata.add_key("group1", "key1", b"value1") + metadata.add_key("group1", "key1", b"new_value") assert metadata.value("group1", "key1") == b"new_value" assert len(metadata["group1"]) == 1 @@ -222,9 +222,9 @@ def test_user_metadata_set_group_overwrites(self): def test_user_metadata_remove_existing_key(self): """Test remove() method with existing key""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - metadata.add("group1", "key2", b"value2") - removed_value = metadata.remove("group1", "key1") + metadata.add_key("group1", "key1", b"value1") + metadata.add_key("group1", "key2", b"value2") + removed_value = metadata.remove_key("group1", "key1") assert removed_value == b"value1" assert metadata.value("group1", "key1") is None assert metadata.value("group1", "key2") == b"value2" @@ -233,24 +233,24 @@ def test_user_metadata_remove_existing_key(self): def test_user_metadata_remove_last_key_removes_group(self): """Test remove() removes group when last key is removed""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - removed_value = metadata.remove("group1", "key1") + metadata.add_key("group1", "key1", b"value1") + removed_value = metadata.remove_key("group1", "key1") assert removed_value == b"value1" assert "group1" not in metadata def test_user_metadata_remove_nonexistent_group(self): """Test remove() with non-existent group returns None""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - removed_value = metadata.remove("nonexistent", "key1") + metadata.add_key("group1", "key1", b"value1") + removed_value = metadata.remove_key("nonexistent", "key1") assert removed_value is None def test_user_metadata_remove_nonexistent_key(self): """Test remove() with non-existent key returns None and keeps group""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - metadata.add("group1", "key2", b"value2") - removed_value = metadata.remove("group1", "nonexistent") + metadata.add_key("group1", "key1", b"value1") + metadata.add_key("group1", "key2", b"value2") + removed_value = metadata.remove_key("group1", "nonexistent") assert removed_value is None # Group remains because it still has other keys assert "group1" in metadata @@ -260,8 +260,8 @@ def test_user_metadata_remove_nonexistent_key(self): def test_user_metadata_remove_nonexistent_key_single_key_group(self): """Test remove() with non-existent key on single-key group keeps the group""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - removed_value = metadata.remove("group1", "nonexistent") + metadata.add_key("group1", "key1", b"value1") + removed_value = metadata.remove_key("group1", "nonexistent") assert removed_value is None # Group remains even though it only has one key and we tried to remove a different one assert "group1" in metadata @@ -270,15 +270,15 @@ def test_user_metadata_remove_nonexistent_key_single_key_group(self): def test_user_metadata_remove_nonexistent_both(self): """Test remove() with non-existent group and key returns None""" metadata = UserMetadata() - removed_value = metadata.remove("nonexistent", "nonexistent") + removed_value = metadata.remove_key("nonexistent", "nonexistent") assert removed_value is None def test_user_metadata_remove_group_existing(self): """Test remove_group() with existing group""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - metadata.add("group1", "key2", b"value2") - metadata.add("group2", "key3", b"value3") + metadata.add_key("group1", "key1", b"value1") + metadata.add_key("group1", "key2", b"value2") + metadata.add_key("group2", "key3", b"value3") removed_data = metadata.remove_group("group1") assert removed_data == {"key1": b"value1", "key2": b"value2"} assert "group1" not in metadata @@ -287,7 +287,7 @@ def test_user_metadata_remove_group_existing(self): def test_user_metadata_remove_group_nonexistent(self): """Test remove_group() with non-existent group returns None""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") + metadata.add_key("group1", "key1", b"value1") removed_data = metadata.remove_group("nonexistent") assert removed_data is None assert "group1" in metadata @@ -295,9 +295,9 @@ def test_user_metadata_remove_group_nonexistent(self): def test_user_metadata_clear(self): """Test clear() method""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - metadata.add("group2", "key2", b"value2") - metadata.add("group3", "key3", b"value3") + metadata.add_key("group1", "key1", b"value1") + metadata.add_key("group2", "key2", b"value2") + metadata.add_key("group3", "key3", b"value3") assert len(metadata) == 3 metadata.clear() assert len(metadata) == 0 @@ -312,9 +312,9 @@ def test_user_metadata_clear_empty(self): def test_user_metadata_to_proto(self): """Test _to_proto() method""" metadata = UserMetadata() - metadata.add("group1", "key1", b"value1") - metadata.add("group1", "key2", b"value2") - metadata.add("group2", "key3", b"value3") + metadata.add_key("group1", "key1", b"value1") + metadata.add_key("group1", "key2", b"value2") + metadata.add_key("group2", "key3", b"value3") proto = metadata._to_proto() assert len(proto.user_metadata) == 2 assert "group1" in proto.user_metadata @@ -334,17 +334,17 @@ def test_user_metadata_complex_scenario(self): metadata = UserMetadata() # Add multiple groups - metadata.add("headers", "content-type", b"application/json") - metadata.add("headers", "authorization", b"Bearer token123") - metadata.add("metrics", "counter", b"42") - metadata.add("metrics", "timestamp", b"1234567890") + metadata.add_key("headers", "content-type", b"application/json") + metadata.add_key("headers", "authorization", b"Bearer token123") + metadata.add_key("metrics", "counter", b"42") + metadata.add_key("metrics", "timestamp", b"1234567890") assert len(metadata) == 2 assert len(metadata["headers"]) == 2 assert len(metadata["metrics"]) == 2 # Remove a key - metadata.remove("headers", "authorization") + metadata.remove_key("headers", "authorization") assert len(metadata["headers"]) == 1 assert metadata.value("headers", "authorization") is None @@ -368,36 +368,36 @@ class TestUserMetadataEdgeCases: def test_empty_group_name(self): """Test with empty string as group name""" metadata = UserMetadata() - metadata.add("", "key1", b"value1") + metadata.add_key("", "key1", b"value1") assert "" in metadata assert metadata.value("", "key1") == b"value1" def test_empty_key_name(self): """Test with empty string as key name""" metadata = UserMetadata() - metadata.add("group1", "", b"value1") + metadata.add_key("group1", "", b"value1") assert metadata.value("group1", "") == b"value1" def test_empty_value(self): """Test with empty bytes as value""" metadata = UserMetadata() - metadata.add("group1", "key1", b"") + metadata.add_key("group1", "key1", b"") assert metadata.value("group1", "key1") == b"" def test_multiple_groups_with_same_keys(self): """Test different groups can have the same key names""" metadata = UserMetadata() - metadata.add("group1", "key", b"value1") - metadata.add("group2", "key", b"value2") + metadata.add_key("group1", "key", b"value1") + metadata.add_key("group2", "key", b"value2") assert metadata.value("group1", "key") == b"value1" assert metadata.value("group2", "key") == b"value2" def test_special_characters_in_names(self): """Test special characters in group and key names""" metadata = UserMetadata() - metadata.add("group-1", "key_1", b"value1") - metadata.add("group.2", "key:2", b"value2") - metadata.add("group@3", "key#3", b"value3") + metadata.add_key("group-1", "key_1", b"value1") + metadata.add_key("group.2", "key:2", b"value2") + metadata.add_key("group@3", "key#3", b"value3") assert len(metadata) == 3 assert metadata.value("group-1", "key_1") == b"value1" assert metadata.value("group.2", "key:2") == b"value2" @@ -407,14 +407,14 @@ def test_large_values(self): """Test with large byte values""" metadata = UserMetadata() large_value = b"x" * 10000 - metadata.add("group1", "large_key", large_value) + metadata.add_key("group1", "large_key", large_value) assert metadata.value("group1", "large_key") == large_value def test_many_groups(self): """Test with many groups""" metadata = UserMetadata() for i in range(100): - metadata.add(f"group{i}", f"key{i}", f"value{i}".encode()) + metadata.add_key(f"group{i}", f"key{i}", f"value{i}".encode()) assert len(metadata) == 100 assert metadata.value("group50", "key50") == b"value50" @@ -422,6 +422,6 @@ def test_many_keys_in_group(self): """Test with many keys in a single group""" metadata = UserMetadata() for i in range(100): - metadata.add("group1", f"key{i}", f"value{i}".encode()) + metadata.add_key("group1", f"key{i}", f"value{i}".encode()) assert len(metadata["group1"]) == 100 assert metadata.value("group1", "key50") == b"value50" From c3a757c37146bd1c995115f648506c1ebe0bae71 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Fri, 7 Nov 2025 09:00:47 +0530 Subject: [PATCH 22/23] remove set_group method Signed-off-by: Sreekanth --- packages/pynumaflow/pynumaflow/_metadata.py | 6 ------ packages/pynumaflow/tests/test_metadata.py | 14 -------------- 2 files changed, 20 deletions(-) diff --git a/packages/pynumaflow/pynumaflow/_metadata.py b/packages/pynumaflow/pynumaflow/_metadata.py index 39dcfe1b..d64eb402 100644 --- a/packages/pynumaflow/pynumaflow/_metadata.py +++ b/packages/pynumaflow/pynumaflow/_metadata.py @@ -124,12 +124,6 @@ def add_key(self, group: str, key: str, value: bytes): """ self._data.setdefault(group, {})[key] = value - def set_group(self, group: str, data: dict[str, bytes]): - """ - Sets the data for a given group. - """ - self._data[group] = data - def remove_key(self, group: str, key: str) -> Optional[bytes]: """ Removes the key and its value for a given group and returns the value. diff --git a/packages/pynumaflow/tests/test_metadata.py b/packages/pynumaflow/tests/test_metadata.py index 19cdcee4..049c7d6e 100644 --- a/packages/pynumaflow/tests/test_metadata.py +++ b/packages/pynumaflow/tests/test_metadata.py @@ -205,20 +205,6 @@ def test_user_metadata_add_overwrites_existing_key(self): assert metadata.value("group1", "key1") == b"new_value" assert len(metadata["group1"]) == 1 - def test_user_metadata_set_group(self): - """Test set_group() method""" - metadata = UserMetadata() - metadata.set_group("group1", {"key1": b"value1", "key2": b"value2"}) - assert metadata["group1"] == {"key1": b"value1", "key2": b"value2"} - - def test_user_metadata_set_group_overwrites(self): - """Test set_group() overwrites existing group""" - metadata = UserMetadata() - metadata.set_group("group1", {"key1": b"value1"}) - metadata.set_group("group1", {"key2": b"value2"}) - assert metadata["group1"] == {"key2": b"value2"} - assert "key1" not in metadata["group1"] - def test_user_metadata_remove_existing_key(self): """Test remove() method with existing key""" metadata = UserMetadata() From 876c78a0260f087c4720a2bf61dc60b227980e7d Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Fri, 7 Nov 2025 09:36:18 +0530 Subject: [PATCH 23/23] fix tests Signed-off-by: Sreekanth --- packages/pynumaflow/tests/source/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/pynumaflow/tests/source/utils.py b/packages/pynumaflow/tests/source/utils.py index ff5cbc11..04daaa61 100644 --- a/packages/pynumaflow/tests/source/utils.py +++ b/packages/pynumaflow/tests/source/utils.py @@ -29,9 +29,9 @@ async def read_handler(self, datum: ReadRequest, output: NonBlockingIterator): offset = mock_offset() event_time = mock_event_time() metadata = UserMetadata() - metadata.add("custom_info", "custom_key", b"custom_value") - metadata.add("custom_info", "custom_key2", b"custom_value2") - metadata.add("test_info", "test_key", b"test_value") + metadata.add_key("custom_info", "custom_key", b"custom_value") + metadata.add_key("custom_info", "custom_key2", b"custom_value2") + metadata.add_key("test_info", "test_key", b"test_value") for i in range(10): await output.put( Message(