backport test fixes

bcallender · bcallender · commit d39d5bbf53bc · 2025-09-22T15:33:00.000-07:00
diff --git a/justfile b/justfile
@@ -77,14 +77,14 @@ syncMinMaxFlag := if sync == "min" {
 # sync project dependencies - set sync=false to skip in other target deps
 sync:
   [ "{{ sync }}" != "false" ] && \
-  uv sync --extra=google --extra=anthropic --extra=cohere {{ syncMinMaxFlag }} || true
+  uv sync --extra=google --extra=anthropic --extra=cohere --extra=mcp {{ syncMinMaxFlag }} || true
 
 alias sync-local := sync
 
 # sync project dependencies related to fenic cloud
 sync-cloud:
   [ "{{ sync }}" != "false" ] && \
-  uv sync --extra=cloud --extra=google --extra=anthropic --extra=cohere {{ syncMinMaxFlag }} || true
+  uv sync --extra=cloud --extra=google --extra=anthropic --extra=cohere --extra=mcp {{ syncMinMaxFlag }} || true
 
 # sync rust changes (via maturin)
 sync-rust:
diff --git a/src/fenic/api/mcp/server.py b/src/fenic/api/mcp/server.py
@@ -43,7 +43,7 @@ def create_mcp_server(
             automated_tool_generation.table_names,
             session,
             tool_group_name=automated_tool_generation.tool_group_name,
-            sql_max_rows=automated_tool_generation.max_result_rows)
+            max_result_limit=automated_tool_generation.max_result_rows)
         )
     if not (parameterized_tools or dynamic_tools):
         raise ConfigurationError("No tools provided. Either provide tools or set generate_automated_tools=True and provide datasets.")
diff --git a/src/fenic/api/mcp/tool_generation.py b/src/fenic/api/mcp/tool_generation.py
@@ -91,7 +91,7 @@ def auto_generate_core_tools_from_tables(
     session: Session,
     *,
     tool_group_name: str,
-    sql_max_rows: int = 100,
+    max_result_limit: int = 100,
 ) -> List[DynamicToolDefinition]:
     """Generate Schema/Profile/Read/Search/Analyze tools from catalog tables.
 
@@ -102,7 +102,7 @@ def auto_generate_core_tools_from_tables(
         datasets,
         session,
         tool_group_name=tool_group_name,
-        sql_max_rows=sql_max_rows,
+        max_result_limit=max_result_limit,
     )
 
 
@@ -217,15 +217,12 @@ def _auto_generate_read_tool(
     result_limit: int = 50,
 ) -> DynamicToolDefinition:
     """Create a read tool over one or many datasets."""
-    # avoid import issue from __init__
-    from fastmcp.server.context import Context
     if len(datasets) == 0:
         raise ConfigurationError("Cannot create read tool: no datasets provided.")
 
     name_to_df: Dict[str, DataFrame] = {d.table_name: d.df for d in datasets}
 
     async def _validate_columns(
-        ctx: Context,
         available_columns: List[str],
         original_columns: List[str],
         filtered_columns: List[str],
@@ -234,10 +231,9 @@ async def _validate_columns(
             raise ValidationError(f"Column(s) {original_columns} not found. Available: {', '.join(available_columns)}")
         if len(filtered_columns) != len(original_columns):
             invalid_columns = [c for c in original_columns if c not in filtered_columns]
-            await ctx.warning(f"Column(s) {invalid_columns} not found. Available: {', '.join(available_columns)}")
+            raise ValidationError(f"Column(s) {invalid_columns} not found. Available: {', '.join(available_columns)}")
 
     async def read_func(
-        ctx: Context, # MCP server context allows us to log warnings back to the client.
         df_name: Annotated[str, "Dataset name to read rows from."],
         limit: Annotated[Optional[int], "Max rows to read within a page"] = result_limit,
         offset: Annotated[Optional[int], "Row offset to start from (requires order_by)"] = None,
@@ -259,11 +255,11 @@ async def read_func(
         exclude_columns = [c.strip() for c in exclude_columns.split(",") if c.strip()] if exclude_columns else None
         if include_columns:
             filtered_columns = [c for c in include_columns if c in available_columns]
-            await _validate_columns(ctx, available_columns, include_columns, filtered_columns)
+            await _validate_columns(available_columns, include_columns, filtered_columns)
             df = df.select(*filtered_columns)
         if exclude_columns:
             filtered_columns = [c for c in available_columns if c not in exclude_columns]
-            await _validate_columns(ctx, available_columns, exclude_columns, filtered_columns)
+            await _validate_columns(available_columns, exclude_columns, filtered_columns)
             df = df.select(*filtered_columns)
         # Apply paging (handles offset+order_by via SQL and optional limit)
         return _apply_paging(
@@ -466,15 +462,15 @@ def _auto_generate_sql_tool(
     """Create an Analyze tool that executes DuckDB SELECT SQL across datasets.
 
     - JOINs between the provided datasets are allowed.
-    - DDL/DML, CTEs, subqueries, UNION, and multiple top-level queries are not allowed (enforced upstream).
+    - DDL/DML and multiple top-level queries are not allowed (enforced in `session.sql()`).
     - The callable returns a LogicalPlan gathered later by the MCP server.
     """
     if len(datasets) == 0:
         raise ConfigurationError("Cannot create SQL tool: no datasets provided.")
 
     async def analyze_func(
         full_sql: Annotated[
-            str, "Full SELECT SQL. Refer to DataFrames by name in braces, e.g., `SELECT * FROM {orders}`. JOINs between the provided datasets are allowed. SQL dialect: DuckDB. DDL/DML, CTEs, subqueries, UNION, and multiple top-level queries are not allowed"]
+            str, "Full SELECT SQL. Refer to DataFrames by name in braces, e.g., `SELECT * FROM {orders}`. JOINs between the provided datasets are allowed. SQL dialect: DuckDB. DDL/DML and multiple top-level queries are not allowed"]
     ) -> LogicalPlan:
         return session.sql(full_sql.strip(), **{spec.table_name: spec.df for spec in datasets})._logical_plan
 
@@ -809,7 +805,7 @@ def _auto_generate_core_tools(
     session: Session,
     *,
     tool_group_name: str,
-    sql_max_rows: int = 100,
+    max_result_limit: int = 100,
 ) -> List[DynamicToolDefinition]:
     """Generate core tools spanning all datasets: Schema, Profile, Analyze.
 
@@ -858,7 +854,7 @@ def _auto_generate_core_tools(
             "Available datasets:",
             group_desc,
         ]),
-        result_limit=sql_max_rows,
+        result_limit=max_result_limit,
     )
 
     search_summary_tool = _auto_generate_search_summary_tool(
@@ -880,7 +876,7 @@ def _auto_generate_core_tools(
             "Available datasets:",
             group_desc,
         ]),
-        result_limit=sql_max_rows,
+        result_limit=max_result_limit,
     )
 
     analyze_tool = _auto_generate_sql_tool(
@@ -889,14 +885,14 @@ def _auto_generate_core_tools(
         tool_name=f"{tool_group_name} - Analyze",
         tool_description="\n\n".join([
             "Execute Read-Only (SELECT) SQL over the provided datasets using fenic's SQL support.",
-            "DDL/DML, CTEs, subqueries, UNION, and multiple top-level queries are not allowed (enforced upstream).",
+            "DDL/DML and multiple top-level queries are not allowed.",
             "For text search, prefer regular expressions (REGEXP_MATCHES()/REGEXP_EXTRACT()).",
             "Paging: use ORDER BY to define row order, then LIMIT and OFFSET for pages.",
             "JOINs between datasets are allowed. Refer to datasets by name in braces, e.g., {orders}.",
             "Below, the available datasets are listed, by name and description.",
             group_desc,
         ]),
-        result_limit=sql_max_rows,
+        result_limit=max_result_limit,
     )
 
     return [schema_tool, profile_tool, read_tool, search_summary_tool, search_content_tool, analyze_tool]
diff --git a/tests/api/mcp/test_tool_generation.py b/tests/api/mcp/test_tool_generation.py
@@ -3,8 +3,6 @@
 
 import pytest
 
-pytest.importorskip("fastmcp")
-
 from fenic.api.mcp.tool_generation import (
     auto_generate_core_tools_from_tables,
     fenic_tool,
@@ -27,6 +25,7 @@ def test_auto_generate_core_tools_from_tables_requires_descriptions(local_sessio
 
 
 def test_auto_generate_core_tools_from_tables_builds_tools(local_session):
+    pytest.importorskip("fastmcp")
     create_table_with_rows(local_session, "t1", [1, 2, 3], description="table one")
     create_table_with_rows(local_session, "t2", [10, 20], description="table two")
 
@@ -53,7 +52,7 @@ def test_auto_generate_core_tools_from_tables_builds_tools(local_session):
 
     # Sanity check: the Schema tool's callable returns a LogicalPlan we can collect
     schema_tool = next(t for t in tools if t.name.endswith("Schema"))
-    plan = schema_tool.func()  # type: ignore[call-arg]
+    plan = asyncio.run(schema_tool.func())  # type: ignore[call-arg]
     pl_df, _ = local_session._session_state.execution.collect(plan)
     assert set(pl_df.columns) == {"dataset", "schema"}
     assert sorted(pl_df.get_column("dataset").to_list()) == ["t1", "t2"]

Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ def create_mcp_server(`
`43`	`43`	`automated_tool_generation.table_names,`
`44`	`44`	`session,`
`45`	`45`	`tool_group_name=automated_tool_generation.tool_group_name,`
`46`		`- sql_max_rows=automated_tool_generation.max_result_rows)`
	`46`	`+ max_result_limit=automated_tool_generation.max_result_rows)`
`47`	`47`	`)`
`48`	`48`	`if not (parameterized_tools or dynamic_tools):`
`49`	`49`	`raise ConfigurationError("No tools provided. Either provide tools or set generate_automated_tools=True and provide datasets.")`