diff --git a/ibis/backends/sql/__init__.py b/ibis/backends/sql/__init__.py index c3a03823cb7e..736eb46e8539 100644 --- a/ibis/backends/sql/__init__.py +++ b/ibis/backends/sql/__init__.py @@ -458,21 +458,23 @@ def _build_insert_from_table( ): compiler = self.compiler quoted = compiler.quoted - # Compare the columns between the target table and the object to be inserted - # If source is a subset of target, use source columns for insert list - # Otherwise, assume auto-generated column names and use positional ordering. - target_cols = self.get_schema(target, catalog=catalog, database=db).keys() - - columns = ( - source_cols - if (source_cols := source.schema().keys()) <= target_cols - else target_cols - ) + + target_col_names = self.get_schema(target, catalog=catalog, database=db).keys() + source_col_names = source.schema().keys() + # Error on unknown columns. + # We DO allow missing columns (they will be filled with NULLs or defaults) + unknown_cols = set(source_col_names) - set(target_col_names) + if unknown_cols: + raise exc.IbisTypeError( + f"Cannot insert into table {target} because the following " + f"columns are not present in the target table: " + f"{', '.join(sorted(unknown_cols))}" + ) query = sge.insert( expression=self.compile(source), into=sg.table(target, db=db, catalog=catalog, quoted=quoted), - columns=[sg.to_identifier(col, quoted=quoted) for col in columns], + columns=[sg.to_identifier(col, quoted=quoted) for col in source_col_names], dialect=compiler.dialect, ) return query diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 45a8d1af74b5..d68515a23b76 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -15,6 +15,7 @@ import pytest import rich.console +import sqlglot import sqlglot as sg import toolz from packaging.version import parse as vparse @@ -42,6 +43,7 @@ if TYPE_CHECKING: from ibis.backends import BaseBackend + from ibis.backends.sql import SQLBackend np = pytest.importorskip("numpy") @@ -1578,6 +1580,17 @@ def test_schema_with_caching(alltypes): assert pt2.schema() == t2.schema() +@contextlib.contextmanager +def temp_table(con: BaseBackend): + # Ideally we'd use a temp table for this test, but several backends don't + # support them and it's nice to know that data are being inserted correctly. + table_name = gen_name("temp_table") + try: + yield table_name + finally: + con.drop_table(table_name) + + @pytest.mark.notyet( ["druid"], raises=NotImplementedError, reason="doesn't support create_table" ) @@ -1593,33 +1606,61 @@ def test_schema_with_caching(alltypes): [ param([{"a": 1, "b": 2}], [{"b": 22, "a": 11}], id="column order reversed"), param([{"a": 1, "b": 2}], [{"a": 11, "b": 22}], id="column order matching"), - param( - [{"a": 1, "b": 2}], - [(11, 22)], - marks=[ - pytest.mark.notimpl( - ["impala"], - reason="Impala DDL has strict validation checks on schema", - ) - ], - id="auto generated cols", - ), ], ) def test_insert_using_col_name_not_position(con, first_row, second_row, monkeypatch): monkeypatch.setattr(ibis.options, "default_backend", con) - table_name = gen_name("table") - con.create_table(table_name, first_row) - con.insert(table_name, second_row) + with temp_table(con) as table_name: + con.create_table(table_name, first_row) + con.insert(table_name, second_row) - result = con.table(table_name).order_by("a").to_pyarrow() - expected_result = pa.table({"a": [1, 11], "b": [2, 22]}) + result = con.table(table_name).order_by("a").to_pyarrow() + expected_result = pa.table({"a": [1, 11], "b": [2, 22]}) - assert result.equals(expected_result) + assert result.equals(expected_result) - # Ideally we'd use a temp table for this test, but several backends don't - # support them and it's nice to know that data are being inserted correctly. - con.drop_table(table_name) + +@pytest.mark.notyet( + ["druid"], raises=NotImplementedError, reason="doesn't support create_table" +) +@pytest.mark.notyet(["polars"], reason="Doesn't support insert") +@pytest.mark.notyet( + ["datafusion"], reason="Doesn't support table creation from records" +) +@pytest.mark.notimpl( + ["flink"], reason="Temp tables are implemented as views, which don't support insert" +) +def test_insert_errors_on_unknown_columns(con, monkeypatch): + monkeypatch.setattr(ibis.options, "default_backend", con) + with temp_table(con) as table_name: + con.create_table(table_name, [{"a": 1, "b": 2}]) + with pytest.raises(com.IbisTypeError): + con.insert(table_name, [{"a": 11, "c": 22}]) + with pytest.raises(com.IbisTypeError): + con.insert(table_name, [{"a": 11, "b": 22, "c": 33}]) + + +@pytest.mark.notyet( + ["druid"], raises=NotImplementedError, reason="doesn't support create_table" +) +@pytest.mark.notyet(["polars"], reason="Doesn't support insert") +@pytest.mark.notimpl( + ["flink"], reason="Temp tables are implemented as views, which don't support insert" +) +def test_insert_works_for_missing_columns(con: SQLBackend, monkeypatch): + monkeypatch.setattr(ibis.options, "default_backend", con) + with temp_table(con) as table_name: + duckdb_create_table_sql = ( + f"CREATE TABLE {table_name} (a INTEGER, b INTEGER DEFAULT 42);" + ) + backend_create_table_sql = sqlglot.transpile( + duckdb_create_table_sql, read="duckdb", dialect=con.dialect + )[0] + con.raw_sql(backend_create_table_sql) + con.insert(table_name, [{"a": 11}]) + result = con.table(table_name).to_pyarrow().to_pydict() + expected_result = {"a": [11], "b": [42]} + assert result == expected_result CON_ATTR = {"bigquery": "client", "flink": "_table_env", "pyspark": "_session"}