Skip to content

Commit d39d5bb

Browse files
committed
backport test fixes
1 parent 0abf737 commit d39d5bb

File tree

4 files changed

+17
-22
lines changed

4 files changed

+17
-22
lines changed

justfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,14 @@ syncMinMaxFlag := if sync == "min" {
7777
# sync project dependencies - set sync=false to skip in other target deps
7878
sync:
7979
[ "{{ sync }}" != "false" ] && \
80-
uv sync --extra=google --extra=anthropic --extra=cohere {{ syncMinMaxFlag }} || true
80+
uv sync --extra=google --extra=anthropic --extra=cohere --extra=mcp {{ syncMinMaxFlag }} || true
8181

8282
alias sync-local := sync
8383

8484
# sync project dependencies related to fenic cloud
8585
sync-cloud:
8686
[ "{{ sync }}" != "false" ] && \
87-
uv sync --extra=cloud --extra=google --extra=anthropic --extra=cohere {{ syncMinMaxFlag }} || true
87+
uv sync --extra=cloud --extra=google --extra=anthropic --extra=cohere --extra=mcp {{ syncMinMaxFlag }} || true
8888

8989
# sync rust changes (via maturin)
9090
sync-rust:

src/fenic/api/mcp/server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def create_mcp_server(
4343
automated_tool_generation.table_names,
4444
session,
4545
tool_group_name=automated_tool_generation.tool_group_name,
46-
sql_max_rows=automated_tool_generation.max_result_rows)
46+
max_result_limit=automated_tool_generation.max_result_rows)
4747
)
4848
if not (parameterized_tools or dynamic_tools):
4949
raise ConfigurationError("No tools provided. Either provide tools or set generate_automated_tools=True and provide datasets.")

src/fenic/api/mcp/tool_generation.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def auto_generate_core_tools_from_tables(
9191
session: Session,
9292
*,
9393
tool_group_name: str,
94-
sql_max_rows: int = 100,
94+
max_result_limit: int = 100,
9595
) -> List[DynamicToolDefinition]:
9696
"""Generate Schema/Profile/Read/Search/Analyze tools from catalog tables.
9797
@@ -102,7 +102,7 @@ def auto_generate_core_tools_from_tables(
102102
datasets,
103103
session,
104104
tool_group_name=tool_group_name,
105-
sql_max_rows=sql_max_rows,
105+
max_result_limit=max_result_limit,
106106
)
107107

108108

@@ -217,15 +217,12 @@ def _auto_generate_read_tool(
217217
result_limit: int = 50,
218218
) -> DynamicToolDefinition:
219219
"""Create a read tool over one or many datasets."""
220-
# avoid import issue from __init__
221-
from fastmcp.server.context import Context
222220
if len(datasets) == 0:
223221
raise ConfigurationError("Cannot create read tool: no datasets provided.")
224222

225223
name_to_df: Dict[str, DataFrame] = {d.table_name: d.df for d in datasets}
226224

227225
async def _validate_columns(
228-
ctx: Context,
229226
available_columns: List[str],
230227
original_columns: List[str],
231228
filtered_columns: List[str],
@@ -234,10 +231,9 @@ async def _validate_columns(
234231
raise ValidationError(f"Column(s) {original_columns} not found. Available: {', '.join(available_columns)}")
235232
if len(filtered_columns) != len(original_columns):
236233
invalid_columns = [c for c in original_columns if c not in filtered_columns]
237-
await ctx.warning(f"Column(s) {invalid_columns} not found. Available: {', '.join(available_columns)}")
234+
raise ValidationError(f"Column(s) {invalid_columns} not found. Available: {', '.join(available_columns)}")
238235

239236
async def read_func(
240-
ctx: Context, # MCP server context allows us to log warnings back to the client.
241237
df_name: Annotated[str, "Dataset name to read rows from."],
242238
limit: Annotated[Optional[int], "Max rows to read within a page"] = result_limit,
243239
offset: Annotated[Optional[int], "Row offset to start from (requires order_by)"] = None,
@@ -259,11 +255,11 @@ async def read_func(
259255
exclude_columns = [c.strip() for c in exclude_columns.split(",") if c.strip()] if exclude_columns else None
260256
if include_columns:
261257
filtered_columns = [c for c in include_columns if c in available_columns]
262-
await _validate_columns(ctx, available_columns, include_columns, filtered_columns)
258+
await _validate_columns(available_columns, include_columns, filtered_columns)
263259
df = df.select(*filtered_columns)
264260
if exclude_columns:
265261
filtered_columns = [c for c in available_columns if c not in exclude_columns]
266-
await _validate_columns(ctx, available_columns, exclude_columns, filtered_columns)
262+
await _validate_columns(available_columns, exclude_columns, filtered_columns)
267263
df = df.select(*filtered_columns)
268264
# Apply paging (handles offset+order_by via SQL and optional limit)
269265
return _apply_paging(
@@ -466,15 +462,15 @@ def _auto_generate_sql_tool(
466462
"""Create an Analyze tool that executes DuckDB SELECT SQL across datasets.
467463
468464
- JOINs between the provided datasets are allowed.
469-
- DDL/DML, CTEs, subqueries, UNION, and multiple top-level queries are not allowed (enforced upstream).
465+
- DDL/DML and multiple top-level queries are not allowed (enforced in `session.sql()`).
470466
- The callable returns a LogicalPlan gathered later by the MCP server.
471467
"""
472468
if len(datasets) == 0:
473469
raise ConfigurationError("Cannot create SQL tool: no datasets provided.")
474470

475471
async def analyze_func(
476472
full_sql: Annotated[
477-
str, "Full SELECT SQL. Refer to DataFrames by name in braces, e.g., `SELECT * FROM {orders}`. JOINs between the provided datasets are allowed. SQL dialect: DuckDB. DDL/DML, CTEs, subqueries, UNION, and multiple top-level queries are not allowed"]
473+
str, "Full SELECT SQL. Refer to DataFrames by name in braces, e.g., `SELECT * FROM {orders}`. JOINs between the provided datasets are allowed. SQL dialect: DuckDB. DDL/DML and multiple top-level queries are not allowed"]
478474
) -> LogicalPlan:
479475
return session.sql(full_sql.strip(), **{spec.table_name: spec.df for spec in datasets})._logical_plan
480476

@@ -809,7 +805,7 @@ def _auto_generate_core_tools(
809805
session: Session,
810806
*,
811807
tool_group_name: str,
812-
sql_max_rows: int = 100,
808+
max_result_limit: int = 100,
813809
) -> List[DynamicToolDefinition]:
814810
"""Generate core tools spanning all datasets: Schema, Profile, Analyze.
815811
@@ -858,7 +854,7 @@ def _auto_generate_core_tools(
858854
"Available datasets:",
859855
group_desc,
860856
]),
861-
result_limit=sql_max_rows,
857+
result_limit=max_result_limit,
862858
)
863859

864860
search_summary_tool = _auto_generate_search_summary_tool(
@@ -880,7 +876,7 @@ def _auto_generate_core_tools(
880876
"Available datasets:",
881877
group_desc,
882878
]),
883-
result_limit=sql_max_rows,
879+
result_limit=max_result_limit,
884880
)
885881

886882
analyze_tool = _auto_generate_sql_tool(
@@ -889,14 +885,14 @@ def _auto_generate_core_tools(
889885
tool_name=f"{tool_group_name} - Analyze",
890886
tool_description="\n\n".join([
891887
"Execute Read-Only (SELECT) SQL over the provided datasets using fenic's SQL support.",
892-
"DDL/DML, CTEs, subqueries, UNION, and multiple top-level queries are not allowed (enforced upstream).",
888+
"DDL/DML and multiple top-level queries are not allowed.",
893889
"For text search, prefer regular expressions (REGEXP_MATCHES()/REGEXP_EXTRACT()).",
894890
"Paging: use ORDER BY to define row order, then LIMIT and OFFSET for pages.",
895891
"JOINs between datasets are allowed. Refer to datasets by name in braces, e.g., {orders}.",
896892
"Below, the available datasets are listed, by name and description.",
897893
group_desc,
898894
]),
899-
result_limit=sql_max_rows,
895+
result_limit=max_result_limit,
900896
)
901897

902898
return [schema_tool, profile_tool, read_tool, search_summary_tool, search_content_tool, analyze_tool]

tests/api/mcp/test_tool_generation.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33

44
import pytest
55

6-
pytest.importorskip("fastmcp")
7-
86
from fenic.api.mcp.tool_generation import (
97
auto_generate_core_tools_from_tables,
108
fenic_tool,
@@ -27,6 +25,7 @@ def test_auto_generate_core_tools_from_tables_requires_descriptions(local_sessio
2725

2826

2927
def test_auto_generate_core_tools_from_tables_builds_tools(local_session):
28+
pytest.importorskip("fastmcp")
3029
create_table_with_rows(local_session, "t1", [1, 2, 3], description="table one")
3130
create_table_with_rows(local_session, "t2", [10, 20], description="table two")
3231

@@ -53,7 +52,7 @@ def test_auto_generate_core_tools_from_tables_builds_tools(local_session):
5352

5453
# Sanity check: the Schema tool's callable returns a LogicalPlan we can collect
5554
schema_tool = next(t for t in tools if t.name.endswith("Schema"))
56-
plan = schema_tool.func() # type: ignore[call-arg]
55+
plan = asyncio.run(schema_tool.func()) # type: ignore[call-arg]
5756
pl_df, _ = local_session._session_state.execution.collect(plan)
5857
assert set(pl_df.columns) == {"dataset", "schema"}
5958
assert sorted(pl_df.get_column("dataset").to_list()) == ["t1", "t2"]

0 commit comments

Comments
 (0)