From 623411db9a5060883b8dfc613a55c475021aa0b7 Mon Sep 17 00:00:00 2001 From: mpak123 Date: Wed, 22 Oct 2025 12:31:28 -0400 Subject: [PATCH 1/7] Completed implementation --- pandas/core/generic.py | 2 + pandas/io/sql.py | 110 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6d703c398f055..ae3c54d60bb3b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2798,6 +2798,7 @@ def to_sql( chunksize: int | None = None, dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, + hints: dict[str, str | list[str]] | None = None, ) -> int | None: """ Write records stored in a DataFrame to a SQL database. @@ -3044,6 +3045,7 @@ def to_sql( chunksize=chunksize, dtype=dtype, method=method, + hints=hints, ) @final diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 7a8ba2e146bcf..65b984db101f5 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -18,7 +18,6 @@ datetime, time, ) -from functools import partial import re from typing import ( TYPE_CHECKING, @@ -232,6 +231,41 @@ def _wrap_result_adbc( return df +def _process_sql_hints( + hints: dict[str, str | list[str]] | None, dialect_name: str +) -> str | None: + if hints is None or not hints: + return None + + dialect_name = dialect_name.lower() + + hint_value = None + for key, value in hints.items(): + if key.lower() == dialect_name: + hint_value = value + break + + if hint_value is None: + return None + + if isinstance(hint_value, list): + hint_str = " ".join(hint_value) + else: + hint_str = str(hint_value) + + if hint_str.strip().startswith("/*+") and hint_str.strip().endswith("*/"): + return hint_str.strip() + + if dialect_name == "oracle": + return f"/*+ {hint_str} */" + elif dialect_name == "mysql": + return hint_str + elif dialect_name == "mssql": + return hint_str + else: + return f"/*+ {hint_str} */" + + # ----------------------------------------------------------------------------- # -- Read and write to DataFrames @@ -750,6 +784,7 @@ def to_sql( dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, engine: str = "auto", + hints: dict[str, str | list[str]] | None = None, **engine_kwargs, ) -> int | None: """ @@ -852,6 +887,7 @@ def to_sql( dtype=dtype, method=method, engine=engine, + hints=hints, **engine_kwargs, ) @@ -998,7 +1034,13 @@ def create(self) -> None: else: self._execute_create() - def _execute_insert(self, conn, keys: list[str], data_iter) -> int: + def _execute_insert( + self, + conn, + keys: list[str], + data_iter, + hint_str: str | None = None, + ) -> int: """ Execute SQL statement inserting data @@ -1010,11 +1052,23 @@ def _execute_insert(self, conn, keys: list[str], data_iter) -> int: data_iter : generator of list Each item contains a list of values to be inserted """ - data = [dict(zip(keys, row, strict=True)) for row in data_iter] - result = self.pd_sql.execute(self.table.insert(), data) + data = [dict(zip(keys, row, strict=False)) for row in data_iter] + + if hint_str: + stmt = self.table.insert().prefix_with(hint_str) + else: + stmt = self.table.insert() + + result = self.pd_sql.execute(stmt, data) return result.rowcount - def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int: + def _execute_insert_multi( + self, + conn, + keys: list[str], + data_iter, + hint_str: str | None = None, + ) -> int: """ Alternative to _execute_insert for DBs support multi-value INSERT. @@ -1023,11 +1077,15 @@ def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int: but performance degrades quickly with increase of columns. """ - from sqlalchemy import insert - data = [dict(zip(keys, row, strict=True)) for row in data_iter] - stmt = insert(self.table).values(data) + data = [dict(zip(keys, row, strict=False)) for row in data_iter] + + if hint_str: + stmt = insert(self.table).values(data).prefix_with(hint_str) + else: + stmt = insert(self.table).values(data) + result = self.pd_sql.execute(stmt) return result.rowcount @@ -1084,6 +1142,8 @@ def insert( self, chunksize: int | None = None, method: Literal["multi"] | Callable | None = None, + hints: dict[str, str | list[str]] | None = None, + dialect_name: str | None = None, ) -> int | None: # set insert method if method is None: @@ -1091,7 +1151,11 @@ def insert( elif method == "multi": exec_insert = self._execute_insert_multi elif callable(method): - exec_insert = partial(method, self) + + def callable_wrapper(conn, keys, data_iter, hint_str=None): + return method(self, conn, keys, data_iter) + + exec_insert = callable_wrapper else: raise ValueError(f"Invalid parameter `method`: {method}") @@ -1108,6 +1172,9 @@ def insert( raise ValueError("chunksize argument should be non-zero") chunks = (nrows // chunksize) + 1 + + hint_str = _process_sql_hints(hints, dialect_name) if dialect_name else None + total_inserted = None with self.pd_sql.run_transaction() as conn: for i in range(chunks): @@ -1119,7 +1186,7 @@ def insert( chunk_iter = zip( *(arr[start_i:end_i] for arr in data_list), strict=True ) - num_inserted = exec_insert(conn, keys, chunk_iter) + num_inserted = exec_insert(conn, keys, chunk_iter, hint_str) # GH 46891 if num_inserted is not None: if total_inserted is None: @@ -1503,6 +1570,7 @@ def to_sql( chunksize: int | None = None, dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, + hints: dict[str, str | list[str]] | None = None, engine: str = "auto", **engine_kwargs, ) -> int | None: @@ -1539,6 +1607,8 @@ def insert_records( schema=None, chunksize: int | None = None, method=None, + hints: dict[str, str | list[str]] | None = None, + dialect_name: str | None = None, **engine_kwargs, ) -> int | None: """ @@ -1563,6 +1633,8 @@ def insert_records( schema=None, chunksize: int | None = None, method=None, + hints: dict[str, str | list[str]] | None = None, + dialect_name: str | None = None, **engine_kwargs, ) -> int | None: from sqlalchemy import exc @@ -1975,6 +2047,7 @@ def to_sql( dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, engine: str = "auto", + hints: dict[str, str | list[str]] | None = None, **engine_kwargs, ) -> int | None: """ @@ -2047,6 +2120,8 @@ def to_sql( schema=schema, chunksize=chunksize, method=method, + hints=hints, + dialect_name=self.con.dialect.name, **engine_kwargs, ) @@ -2339,6 +2414,7 @@ def to_sql( dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, engine: str = "auto", + hints: dict[str, str | list[str]] | None = None, **engine_kwargs, ) -> int | None: """ @@ -2388,6 +2464,8 @@ def to_sql( raise NotImplementedError( "engine != 'auto' not implemented for ADBC drivers" ) + if hints: + raise NotImplementedError("'hints' is not implemented for ADBC drivers") if schema: table_name = f"{schema}.{name}" @@ -2569,7 +2647,7 @@ def insert_statement(self, *, num_rows: int) -> str: ) return insert_statement - def _execute_insert(self, conn, keys, data_iter) -> int: + def _execute_insert(self, conn, keys, data_iter, hints) -> int: from sqlite3 import Error data_list = list(data_iter) @@ -2579,7 +2657,7 @@ def _execute_insert(self, conn, keys, data_iter) -> int: raise DatabaseError("Execution failed") from exc return conn.rowcount - def _execute_insert_multi(self, conn, keys, data_iter) -> int: + def _execute_insert_multi(self, conn, keys, data_iter, hints) -> int: data_list = list(data_iter) flattened_data = [x for row in data_list for x in row] conn.execute(self.insert_statement(num_rows=len(data_list)), flattened_data) @@ -2816,6 +2894,7 @@ def to_sql( dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, engine: str = "auto", + hints: dict[str, str | list[str]] | None = None, **engine_kwargs, ) -> int | None: """ @@ -2857,6 +2936,13 @@ def to_sql( Details and a sample callable implementation can be found in the section :ref:`insert method `. """ + if hints: + warnings.warn( + "SQL hints are not supported for SQLite and will be ignored.", + UserWarning, + stacklevel=find_stack_level(), + ) + if dtype: if not is_dict_like(dtype): # error: Value expression in dictionary comprehension has incompatible From 787f1fa869fc10787a009a1f48e7566c2c562c7d Mon Sep 17 00:00:00 2001 From: mpak123 Date: Wed, 22 Oct 2025 12:48:59 -0400 Subject: [PATCH 2/7] Added tests --- pandas/tests/io/test_sql.py | 182 ++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 5865c46b4031e..a3619623912c9 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -4398,3 +4398,185 @@ def test_xsqlite_if_exists(sqlite_buildin): (5, "E"), ] drop_table(table_name, sqlite_buildin) + + +# ----------------------------------------------------------------------------- +# -- Testing SQL Hints + + +class TestProcessSQLHints: + """Tests for _process_sql_hints helper function.""" + + def test_process_sql_hints_oracle_list(self): + """Test hint processing with Oracle dialect and list input.""" + hints = {"oracle": ["APPEND", "PARALLEL"]} + result = sql._process_sql_hints(hints, "oracle") + assert result == "/*+ APPEND PARALLEL */" + + def test_process_sql_hints_oracle_string(self): + """Test hint processing with Oracle dialect and string input.""" + hints = {"oracle": "APPEND PARALLEL"} + result = sql._process_sql_hints(hints, "oracle") + assert result == "/*+ APPEND PARALLEL */" + + def test_process_sql_hints_preformatted(self): + """Test that pre-formatted hints are returned as-is.""" + hints = {"oracle": "/*+ APPEND PARALLEL */"} + result = sql._process_sql_hints(hints, "oracle") + assert result == "/*+ APPEND PARALLEL */" + + def test_process_sql_hints_case_insensitive(self): + """Test that dialect names are case-insensitive.""" + hints = {"ORACLE": ["APPEND"]} + result = sql._process_sql_hints(hints, "oracle") + assert result == "/*+ APPEND */" + + hints = {"oracle": ["APPEND"]} + result = sql._process_sql_hints(hints, "ORACLE") + assert result == "/*+ APPEND */" + + def test_process_sql_hints_no_match(self): + """Test that None is returned when dialect doesn't match.""" + hints = {"mysql": "HIGH_PRIORITY"} + result = sql._process_sql_hints(hints, "oracle") + assert result is None + + def test_process_sql_hints_none(self): + """Test that None input returns None.""" + result = sql._process_sql_hints(None, "oracle") + assert result is None + + def test_process_sql_hints_empty_dict(self): + """Test that empty dict returns None.""" + result = sql._process_sql_hints({}, "oracle") + assert result is None + + def test_process_sql_hints_mysql(self): + """Test hint processing for MySQL dialect.""" + hints = {"mysql": "HIGH_PRIORITY"} + result = sql._process_sql_hints(hints, "mysql") + assert result == "HIGH_PRIORITY" + + def test_process_sql_hints_mssql(self): + """Test hint processing for SQL Server dialect.""" + hints = {"mssql": "TABLOCK"} + result = sql._process_sql_hints(hints, "mssql") + assert result == "TABLOCK" + + +@pytest.mark.parametrize("conn", sqlalchemy_connectable) +def test_to_sql_with_hints_parameter(conn, test_frame1, request): + """Test that to_sql accepts hints parameter without error.""" + conn = request.getfixturevalue(conn) + + with pandasSQL_builder(conn, need_transaction=True) as pandasSQL: + pandasSQL.to_sql( + test_frame1, "test_hints", hints={"oracle": ["APPEND"]}, if_exists="replace" + ) + assert pandasSQL.has_table("test_hints") + assert count_rows(conn, "test_hints") == len(test_frame1) + + +@pytest.mark.parametrize("conn", sqlalchemy_connectable) +def test_to_sql_hints_none_default(conn, test_frame1, request): + """Test that hints defaults to None and doesn't break existing code.""" + conn = request.getfixturevalue(conn) + + with pandasSQL_builder(conn, need_transaction=True) as pandasSQL: + pandasSQL.to_sql(test_frame1, "test_no_hints") + assert pandasSQL.has_table("test_no_hints") + assert count_rows(conn, "test_no_hints") == len(test_frame1) + + +@pytest.mark.parametrize("conn", sqlalchemy_connectable) +def test_to_sql_hints_with_method(conn, test_frame1, request): + """Test that hints work alongside custom method parameter.""" + conn = request.getfixturevalue(conn) + + check = [] + + def sample(pd_table, conn, keys, data_iter): + check.append(1) + data = [dict(zip(keys, row)) for row in data_iter] + conn.execute(pd_table.table.insert(), data) + + with pandasSQL_builder(conn, need_transaction=True) as pandasSQL: + pandasSQL.to_sql( + test_frame1, + "test_hints_method", + method=sample, + hints={"oracle": ["APPEND"]}, + ) + assert pandasSQL.has_table("test_hints_method") + + assert check == [1] + assert count_rows(conn, "test_hints_method") == len(test_frame1) + + +@pytest.mark.parametrize("conn", sqlalchemy_connectable) +@pytest.mark.parametrize("method", [None, "multi"]) +def test_to_sql_hints_with_different_methods(conn, method, test_frame1, request): + """Test hints work with different insertion methods.""" + conn = request.getfixturevalue(conn) + + with pandasSQL_builder(conn, need_transaction=True) as pandasSQL: + pandasSQL.to_sql( + test_frame1, + "test_hints_methods", + method=method, + hints={"oracle": ["APPEND", "PARALLEL"]}, + if_exists="replace", + ) + assert pandasSQL.has_table("test_hints_methods") + + assert count_rows(conn, "test_hints_methods") == len(test_frame1) + + +@pytest.mark.parametrize("conn", sqlalchemy_connectable) +def test_to_sql_hints_multidb_dict(conn, test_frame1, request): + """Test that multi-database hints dict works (only matching dialect used).""" + conn = request.getfixturevalue(conn) + + hints = { + "oracle": ["APPEND", "PARALLEL"], + "mysql": "HIGH_PRIORITY", + "postgresql": "some_pg_hint", + "sqlite": "ignored", + } + + with pandasSQL_builder(conn, need_transaction=True) as pandasSQL: + pandasSQL.to_sql( + test_frame1, "test_multidb_hints", hints=hints, if_exists="replace" + ) + assert pandasSQL.has_table("test_multidb_hints") + + assert count_rows(conn, "test_multidb_hints") == len(test_frame1) + + +def test_to_sql_hints_adbc_not_supported(sqlite_adbc_conn, test_frame1): + """Test that ADBC connections raise NotImplementedError for hints.""" + pytest.importorskip("adbc_driver_manager.dbapi") + + df = test_frame1.copy() + msg = "'hints' is not implemented for ADBC drivers" + + with pytest.raises(NotImplementedError, match=msg): + df.to_sql("test", sqlite_adbc_conn, hints={"oracle": ["APPEND"]}) + + +def test_to_sql_hints_sqlite_builtin(sqlite_buildin, test_frame1): + """Test that sqlite builtin connection handles hints gracefully.""" + df = test_frame1.copy() + + msg = "SQL hints are not supported for SQLite and will be ignored." + with tm.assert_produces_warning(UserWarning, match=msg): + result = df.to_sql( + "test_sqlite_hints", + sqlite_buildin, + if_exists="replace", + hints={"sqlite": "IGNORED"}, + ) + + assert result == len(test_frame1) + result_df = pd.read_sql("SELECT * FROM test_sqlite_hints", sqlite_buildin) + assert len(result_df) == len(test_frame1) From 520567b09b707079c0ea1963d4053ea37b92bffd Mon Sep 17 00:00:00 2001 From: mpak123 Date: Wed, 22 Oct 2025 15:58:39 -0400 Subject: [PATCH 3/7] Modified implementation to take in user input as string, not list --- pandas/io/sql.py | 64 +++++++++++++++++-------------------- pandas/tests/io/test_sql.py | 54 ++++++++++++++++++------------- 2 files changed, 60 insertions(+), 58 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 65b984db101f5..af44b1b8ac1bc 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -231,39 +231,16 @@ def _wrap_result_adbc( return df -def _process_sql_hints( - hints: dict[str, str | list[str]] | None, dialect_name: str -) -> str | None: - if hints is None or not hints: +def _process_sql_hints(hints: dict[str, str] | None, dialect_name: str) -> str | None: + if hints is None: return None dialect_name = dialect_name.lower() - - hint_value = None for key, value in hints.items(): if key.lower() == dialect_name: - hint_value = value - break - - if hint_value is None: - return None + return value - if isinstance(hint_value, list): - hint_str = " ".join(hint_value) - else: - hint_str = str(hint_value) - - if hint_str.strip().startswith("/*+") and hint_str.strip().endswith("*/"): - return hint_str.strip() - - if dialect_name == "oracle": - return f"/*+ {hint_str} */" - elif dialect_name == "mysql": - return hint_str - elif dialect_name == "mssql": - return hint_str - else: - return f"/*+ {hint_str} */" + return None # ----------------------------------------------------------------------------- @@ -784,7 +761,7 @@ def to_sql( dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, engine: str = "auto", - hints: dict[str, str | list[str]] | None = None, + hints: dict[str, str] | None = None, **engine_kwargs, ) -> int | None: """ @@ -845,6 +822,23 @@ def to_sql( .. versionadded:: 1.3.0 + hints : dict[str, str], optional + SQL hints to optimize insertion performance, keyed by database dialect name. + Each hint should be a complete string formatted exactly as required by the + target database. The user is responsible for constructing dialect-specific + syntax. + + Examples: ``{'oracle': '/*+ APPEND PARALLEL(4) */'}`` + ``{'mysql': 'DELAYED'}`` + ``{'mssql': 'WITH (TABLOCK)'}`` + + .. note:: + - Hints are database-specific and will be ignored for unsupported dialects + - SQLite will raise a UserWarning (hints not supported) + - ADBC connections will raise NotImplementedError + + .. versionadded:: + **engine_kwargs Any additional kwargs are passed to the engine. @@ -1142,7 +1136,7 @@ def insert( self, chunksize: int | None = None, method: Literal["multi"] | Callable | None = None, - hints: dict[str, str | list[str]] | None = None, + hints: dict[str, str] | None = None, dialect_name: str | None = None, ) -> int | None: # set insert method @@ -1570,7 +1564,7 @@ def to_sql( chunksize: int | None = None, dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, - hints: dict[str, str | list[str]] | None = None, + hints: dict[str, str] | None = None, engine: str = "auto", **engine_kwargs, ) -> int | None: @@ -1607,7 +1601,7 @@ def insert_records( schema=None, chunksize: int | None = None, method=None, - hints: dict[str, str | list[str]] | None = None, + hints: dict[str, str] | None = None, dialect_name: str | None = None, **engine_kwargs, ) -> int | None: @@ -1633,7 +1627,7 @@ def insert_records( schema=None, chunksize: int | None = None, method=None, - hints: dict[str, str | list[str]] | None = None, + hints: dict[str, str] | None = None, dialect_name: str | None = None, **engine_kwargs, ) -> int | None: @@ -2047,7 +2041,7 @@ def to_sql( dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, engine: str = "auto", - hints: dict[str, str | list[str]] | None = None, + hints: dict[str, str] | None = None, **engine_kwargs, ) -> int | None: """ @@ -2414,7 +2408,7 @@ def to_sql( dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, engine: str = "auto", - hints: dict[str, str | list[str]] | None = None, + hints: dict[str, str] | None = None, **engine_kwargs, ) -> int | None: """ @@ -2894,7 +2888,7 @@ def to_sql( dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, engine: str = "auto", - hints: dict[str, str | list[str]] | None = None, + hints: dict[str, str] | None = None, **engine_kwargs, ) -> int | None: """ diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a3619623912c9..21ac722246836 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -4407,31 +4407,25 @@ def test_xsqlite_if_exists(sqlite_buildin): class TestProcessSQLHints: """Tests for _process_sql_hints helper function.""" - def test_process_sql_hints_oracle_list(self): - """Test hint processing with Oracle dialect and list input.""" - hints = {"oracle": ["APPEND", "PARALLEL"]} - result = sql._process_sql_hints(hints, "oracle") - assert result == "/*+ APPEND PARALLEL */" - def test_process_sql_hints_oracle_string(self): - """Test hint processing with Oracle dialect and string input.""" - hints = {"oracle": "APPEND PARALLEL"} + """Test hint processing with Oracle dialect - user provides complete string.""" + hints = {"oracle": "/*+ APPEND PARALLEL */"} result = sql._process_sql_hints(hints, "oracle") assert result == "/*+ APPEND PARALLEL */" - def test_process_sql_hints_preformatted(self): - """Test that pre-formatted hints are returned as-is.""" - hints = {"oracle": "/*+ APPEND PARALLEL */"} + def test_process_sql_hints_oracle_simple(self): + """Test hint processing with simple Oracle hint string.""" + hints = {"oracle": "/*+ PARALLEL */"} result = sql._process_sql_hints(hints, "oracle") - assert result == "/*+ APPEND PARALLEL */" + assert result == "/*+ PARALLEL */" def test_process_sql_hints_case_insensitive(self): """Test that dialect names are case-insensitive.""" - hints = {"ORACLE": ["APPEND"]} + hints = {"ORACLE": "/*+ APPEND */"} result = sql._process_sql_hints(hints, "oracle") assert result == "/*+ APPEND */" - hints = {"oracle": ["APPEND"]} + hints = {"oracle": "/*+ APPEND */"} result = sql._process_sql_hints(hints, "ORACLE") assert result == "/*+ APPEND */" @@ -4459,9 +4453,20 @@ def test_process_sql_hints_mysql(self): def test_process_sql_hints_mssql(self): """Test hint processing for SQL Server dialect.""" - hints = {"mssql": "TABLOCK"} + hints = {"mssql": "WITH (TABLOCK)"} result = sql._process_sql_hints(hints, "mssql") - assert result == "TABLOCK" + assert result == "WITH (TABLOCK)" + + def test_process_sql_hints_multiple_dialects(self): + """Test extraction from dict with multiple dialects.""" + hints = { + "oracle": "/*+ PARALLEL */", + "mysql": "DELAYED", + "postgresql": "/* comment */", + } + assert sql._process_sql_hints(hints, "oracle") == "/*+ PARALLEL */" + assert sql._process_sql_hints(hints, "mysql") == "DELAYED" + assert sql._process_sql_hints(hints, "postgresql") == "/* comment */" @pytest.mark.parametrize("conn", sqlalchemy_connectable) @@ -4471,7 +4476,10 @@ def test_to_sql_with_hints_parameter(conn, test_frame1, request): with pandasSQL_builder(conn, need_transaction=True) as pandasSQL: pandasSQL.to_sql( - test_frame1, "test_hints", hints={"oracle": ["APPEND"]}, if_exists="replace" + test_frame1, + "test_hints", + hints={"oracle": "/*+ APPEND */"}, + if_exists="replace", ) assert pandasSQL.has_table("test_hints") assert count_rows(conn, "test_hints") == len(test_frame1) @@ -4505,7 +4513,7 @@ def sample(pd_table, conn, keys, data_iter): test_frame1, "test_hints_method", method=sample, - hints={"oracle": ["APPEND"]}, + hints={"oracle": "/*+ APPEND */"}, ) assert pandasSQL.has_table("test_hints_method") @@ -4524,7 +4532,7 @@ def test_to_sql_hints_with_different_methods(conn, method, test_frame1, request) test_frame1, "test_hints_methods", method=method, - hints={"oracle": ["APPEND", "PARALLEL"]}, + hints={"oracle": "/*+ APPEND PARALLEL */"}, if_exists="replace", ) assert pandasSQL.has_table("test_hints_methods") @@ -4538,10 +4546,10 @@ def test_to_sql_hints_multidb_dict(conn, test_frame1, request): conn = request.getfixturevalue(conn) hints = { - "oracle": ["APPEND", "PARALLEL"], + "oracle": "/*+ APPEND PARALLEL */", "mysql": "HIGH_PRIORITY", - "postgresql": "some_pg_hint", - "sqlite": "ignored", + "postgresql": "/* pg hint */", + "sqlite": "IGNORED", } with pandasSQL_builder(conn, need_transaction=True) as pandasSQL: @@ -4561,7 +4569,7 @@ def test_to_sql_hints_adbc_not_supported(sqlite_adbc_conn, test_frame1): msg = "'hints' is not implemented for ADBC drivers" with pytest.raises(NotImplementedError, match=msg): - df.to_sql("test", sqlite_adbc_conn, hints={"oracle": ["APPEND"]}) + df.to_sql("test", sqlite_adbc_conn, hints={"mysql": "SOME_HINT"}) def test_to_sql_hints_sqlite_builtin(sqlite_buildin, test_frame1): From 8561808e9bd2e5431c9921ab8a9461ca5ef7e081 Mon Sep 17 00:00:00 2001 From: mpak123 Date: Wed, 22 Oct 2025 16:10:46 -0400 Subject: [PATCH 4/7] Added change to latest rst file --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index eb938a7140e29..11c95f686c000 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -156,6 +156,7 @@ All warnings for upcoming changes in pandas will have the base class :class:`pan Other enhancements ^^^^^^^^^^^^^^^^^^ +- :func:`DataFrame.to_sql` now accepts a ``hints`` parameter to pass database-specific query hints for optimizing insert performance. The hints are specified as a dictionary mapping dialect names to hint strings (e.g., ``{'oracle': '/*+ APPEND PARALLEL(4) */', 'mysql': 'DELAYED'}``). Users are responsible for providing correctly formatted hint strings for their target database (:issue:`XXXXX`) - :func:`pandas.merge` propagates the ``attrs`` attribute to the result if all inputs have identical ``attrs``, as has so far already been the case for :func:`pandas.concat`. From 52a748ed990ccdb6b9fee93ed26c7f3f95c2487b Mon Sep 17 00:00:00 2001 From: mpak123 Date: Wed, 22 Oct 2025 16:13:27 -0400 Subject: [PATCH 5/7] Added appropriate numbers --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/io/sql.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 11c95f686c000..4f71a991f5af3 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -156,7 +156,7 @@ All warnings for upcoming changes in pandas will have the base class :class:`pan Other enhancements ^^^^^^^^^^^^^^^^^^ -- :func:`DataFrame.to_sql` now accepts a ``hints`` parameter to pass database-specific query hints for optimizing insert performance. The hints are specified as a dictionary mapping dialect names to hint strings (e.g., ``{'oracle': '/*+ APPEND PARALLEL(4) */', 'mysql': 'DELAYED'}``). Users are responsible for providing correctly formatted hint strings for their target database (:issue:`XXXXX`) +- :func:`DataFrame.to_sql` now accepts a ``hints`` parameter to pass database-specific query hints for optimizing insert performance. The hints are specified as a dictionary mapping dialect names to hint strings (e.g., ``{'oracle': '/*+ APPEND PARALLEL(4) */', 'mysql': 'DELAYED'}``). Users are responsible for providing correctly formatted hint strings for their target database (:issue:`61370`) - :func:`pandas.merge` propagates the ``attrs`` attribute to the result if all inputs have identical ``attrs``, as has so far already been the case for :func:`pandas.concat`. diff --git a/pandas/io/sql.py b/pandas/io/sql.py index af44b1b8ac1bc..5aaab687d22ce 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -837,7 +837,7 @@ def to_sql( - SQLite will raise a UserWarning (hints not supported) - ADBC connections will raise NotImplementedError - .. versionadded:: + .. versionadded:: 3.0.0 **engine_kwargs Any additional kwargs are passed to the engine. From 6a5f365bbd29a41a978ac6295a3389884e3eb0b7 Mon Sep 17 00:00:00 2001 From: mpak123 Date: Wed, 22 Oct 2025 16:43:01 -0400 Subject: [PATCH 6/7] Fix mypy issues --- pandas/core/generic.py | 2 +- pandas/io/sql.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ae3c54d60bb3b..064a041f96358 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2798,7 +2798,7 @@ def to_sql( chunksize: int | None = None, dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, - hints: dict[str, str | list[str]] | None = None, + hints: dict[str, str] | None = None, ) -> int | None: """ Write records stored in a DataFrame to a SQL database. diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5aaab687d22ce..1d059bc05535c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2040,8 +2040,8 @@ def to_sql( chunksize: int | None = None, dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, - engine: str = "auto", hints: dict[str, str] | None = None, + engine: str = "auto", **engine_kwargs, ) -> int | None: """ @@ -2407,8 +2407,8 @@ def to_sql( chunksize: int | None = None, dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, - engine: str = "auto", hints: dict[str, str] | None = None, + engine: str = "auto", **engine_kwargs, ) -> int | None: """ @@ -2641,7 +2641,9 @@ def insert_statement(self, *, num_rows: int) -> str: ) return insert_statement - def _execute_insert(self, conn, keys, data_iter, hints) -> int: + def _execute_insert( + self, conn, keys: list[str], data_iter, hint_str: str | None = None + ) -> int: from sqlite3 import Error data_list = list(data_iter) @@ -2651,7 +2653,9 @@ def _execute_insert(self, conn, keys, data_iter, hints) -> int: raise DatabaseError("Execution failed") from exc return conn.rowcount - def _execute_insert_multi(self, conn, keys, data_iter, hints) -> int: + def _execute_insert_multi( + self, conn, keys: list[str], data_iter, hint_str: str | None = None + ) -> int: data_list = list(data_iter) flattened_data = [x for row in data_list for x in row] conn.execute(self.insert_statement(num_rows=len(data_list)), flattened_data) @@ -2887,8 +2891,8 @@ def to_sql( chunksize: int | None = None, dtype: DtypeArg | None = None, method: Literal["multi"] | Callable | None = None, - engine: str = "auto", hints: dict[str, str] | None = None, + engine: str = "auto", **engine_kwargs, ) -> int | None: """ From 33783ff15d03e974d3c2e33c3354a3f2342dac53 Mon Sep 17 00:00:00 2001 From: mpak123 Date: Wed, 22 Oct 2025 16:57:56 -0400 Subject: [PATCH 7/7] added stuff to docstrings --- pandas/core/generic.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 064a041f96358..0225e7994fd06 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2862,6 +2862,21 @@ def to_sql( Details and a sample callable implementation can be found in the section :ref:`insert method `. + hints : dict[str, str], optional + Dictionary of SQL hints to optimize insertion performance, keyed by + database dialect name (e.g., 'oracle', 'mysql', 'postgresql', 'mssql'). + Each value should be a complete hint string formatted exactly as required + by the target database. The user is responsible for providing correctly + formatted hint strings. + + Examples: ``{'oracle': '/*+ APPEND PARALLEL(4) */', 'mysql': 'DELAYED'}`` + + .. note:: + - Hints are database-specific and ignored for unsupported dialects. + - SQLite raises a ``UserWarning`` (hints not supported). + - ADBC connections raise ``NotImplementedError``. + + .. versionadded:: 3.0.0 Returns -------