From 2ed4fd822ef7ef03d79548f07791bcf9ae4a7153 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Fri, 3 Oct 2025 11:56:26 +0800 Subject: [PATCH 1/5] fix bug --- wren-ai-service/eval/data_curation/utils.py | 4 ++-- wren-ai-service/eval/utils.py | 8 ++++---- wren-ai-service/tools/dev/.env | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/wren-ai-service/eval/data_curation/utils.py b/wren-ai-service/eval/data_curation/utils.py index 86332ebec9..0bd44931f8 100644 --- a/wren-ai-service/eval/data_curation/utils.py +++ b/wren-ai-service/eval/data_curation/utils.py @@ -44,8 +44,8 @@ async def is_sql_valid( timeout: float = TIMEOUT_SECONDS, ) -> Tuple[bool, str]: sql = sql.rstrip(";") if sql.endswith(";") else sql - quoted_sql, no_error = add_quotes(sql) - assert no_error, f"Error in quoting SQL: {sql}" + quoted_sql, error = add_quotes(sql) + assert not error, f"Error in quoting SQL: {sql}, error: {error}" if data_source == "duckdb": async with aiohttp.request( diff --git a/wren-ai-service/eval/utils.py b/wren-ai-service/eval/utils.py index c81b7acedc..006a8079fd 100644 --- a/wren-ai-service/eval/utils.py +++ b/wren-ai-service/eval/utils.py @@ -34,7 +34,7 @@ async def get_data_from_wren_engine( limit: Optional[int] = None, ): quoted_sql, error = add_quotes(sql) - assert not error, f"Error in quoting SQL: {sql}" + assert not error, f"Error in quoting SQL: {sql}, error: {error}" if data_source == "duckdb": async with aiohttp.request( @@ -157,9 +157,9 @@ async def _get_sql_analysis( timeout: float = 300, ) -> List[dict]: sql = sql.rstrip(";") if sql.endswith(";") else sql - quoted_sql, no_error = add_quotes(sql) - if not no_error: - print(f"Error in quoting SQL: {sql}") + quoted_sql, error = add_quotes(sql) + if error: + print(f"Error in quoting SQL: {sql}, error: {error}") quoted_sql = sql manifest_str = base64.b64encode(orjson.dumps(mdl_json)).decode() diff --git a/wren-ai-service/tools/dev/.env b/wren-ai-service/tools/dev/.env index 9ba8fda536..7cc3c1573e 100644 --- a/wren-ai-service/tools/dev/.env +++ b/wren-ai-service/tools/dev/.env @@ -11,10 +11,10 @@ IBIS_SERVER_PORT=8000 # version # CHANGE THIS TO THE LATEST VERSION WREN_PRODUCT_VERSION=development -WREN_ENGINE_VERSION=0.17.1 -WREN_AI_SERVICE_VERSION=0.24.3 -IBIS_SERVER_VERSION=0.17.1 -WREN_UI_VERSION=0.30.0 +WREN_ENGINE_VERSION=0.20.2 +WREN_AI_SERVICE_VERSION=0.27.14 +IBIS_SERVER_VERSION=0.20.2 +WREN_UI_VERSION=0.31.2 WREN_BOOTSTRAP_VERSION=0.1.5 LAUNCH_CLI_PATH=./launch-cli.sh From e0b978ad8aa64b25df43ba426a6e0bce94d3b5f7 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Fri, 3 Oct 2025 11:59:37 +0800 Subject: [PATCH 2/5] update --- wren-ai-service/eval/preparation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wren-ai-service/eval/preparation.py b/wren-ai-service/eval/preparation.py index a139a2a30a..9753a42b1b 100644 --- a/wren-ai-service/eval/preparation.py +++ b/wren-ai-service/eval/preparation.py @@ -165,7 +165,7 @@ def _get_columns_by_table_index(columns, table_index): "primaryKey": ( tables_info["column_names_original"][primary_key_column_index][-1] if primary_key_column_index - else "", + else "" ), "columns": _build_mdl_columns( tables_info, i, database_info.get(table, None) From c06c96859fe60d7b3beb4f11df81bcc30202836f Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Fri, 3 Oct 2025 13:16:03 +0800 Subject: [PATCH 3/5] update --- wren-ai-service/eval/preparation.py | 11 ++--------- wren-ai-service/eval/utils.py | 15 +++++++++++++-- wren-ai-service/src/core/engine.py | 4 ++-- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/wren-ai-service/eval/preparation.py b/wren-ai-service/eval/preparation.py index 9753a42b1b..16c192f770 100644 --- a/wren-ai-service/eval/preparation.py +++ b/wren-ai-service/eval/preparation.py @@ -410,7 +410,8 @@ def get_mdls_and_question_sql_pairs_by_common_db(mdl_by_db, question_sql_pairs_b get_contexts_from_sql( ground_truth["sql"], values["mdl"], - WREN_ENGINE_API_URL, + args.dataset, + api_endpoint=WREN_ENGINE_API_URL, ) ) @@ -442,14 +443,6 @@ def get_mdls_and_question_sql_pairs_by_common_db(mdl_by_db, question_sql_pairs_b "instructions": instructions, } ) - # else: - # print( - # "Warning: context is empty, ignore this question sql pair as of now..." - # ) - # print(f"database: {db}") - # print(f'question: {ground_truth["question"]}') - # print(f'sql: {ground_truth["sql"]}') - # print() # save eval dataset if candidate_eval_dataset: diff --git a/wren-ai-service/eval/utils.py b/wren-ai-service/eval/utils.py index 006a8079fd..dbf88c2ffd 100644 --- a/wren-ai-service/eval/utils.py +++ b/wren-ai-service/eval/utils.py @@ -80,6 +80,7 @@ async def get_data_from_wren_engine( async def get_contexts_from_sql( sql: str, mdl_json: dict, + dataset: str, api_endpoint: str = WREN_ENGINE_API_URL, timeout: float = 300, **kwargs, @@ -154,10 +155,18 @@ async def _get_sql_analysis( sql: str, mdl_json: dict, api_endpoint: str, + dataset: str, timeout: float = 300, ) -> List[dict]: sql = sql.rstrip(";") if sql.endswith(";") else sql - quoted_sql, error = add_quotes(sql) + sql = sql.replace("`", '"') + if dataset == "spider1.0": + read = "sqlite" + elif dataset == "bird": + read = "postgres" + else: + read = None + quoted_sql, error = add_quotes(sql, read) if error: print(f"Error in quoting SQL: {sql}, error: {error}") quoted_sql = sql @@ -175,7 +184,9 @@ async def _get_sql_analysis( ) as response: return await response.json() - sql_analysis_results = await _get_sql_analysis(sql, mdl_json, api_endpoint, timeout) + sql_analysis_results = await _get_sql_analysis( + sql, mdl_json, api_endpoint, dataset, timeout=timeout + ) contexts = _get_contexts_from_sql_analysis_results(sql_analysis_results) return contexts diff --git a/wren-ai-service/src/core/engine.py b/wren-ai-service/src/core/engine.py index f21630d05e..95022d1d08 100644 --- a/wren-ai-service/src/core/engine.py +++ b/wren-ai-service/src/core/engine.py @@ -49,11 +49,11 @@ def remove_limit_statement(sql: str) -> str: return modified_sql -def add_quotes(sql: str) -> Tuple[str, str]: +def add_quotes(sql: str, read: str = None) -> Tuple[str, str]: try: quoted_sql = sqlglot.transpile( sql, - read=None, + read=read, identify=True, error_level=sqlglot.ErrorLevel.RAISE, unsupported_level=sqlglot.ErrorLevel.RAISE, From cf4ef3a6812c02f14ee45a6beebfaaf12b7ef086 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Fri, 3 Oct 2025 13:49:41 +0800 Subject: [PATCH 4/5] update --- wren-ai-service/eval/preparation.py | 1 - wren-ai-service/eval/utils.py | 13 ++----------- wren-ai-service/src/core/engine.py | 5 +++-- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/wren-ai-service/eval/preparation.py b/wren-ai-service/eval/preparation.py index 16c192f770..c176472379 100644 --- a/wren-ai-service/eval/preparation.py +++ b/wren-ai-service/eval/preparation.py @@ -410,7 +410,6 @@ def get_mdls_and_question_sql_pairs_by_common_db(mdl_by_db, question_sql_pairs_b get_contexts_from_sql( ground_truth["sql"], values["mdl"], - args.dataset, api_endpoint=WREN_ENGINE_API_URL, ) ) diff --git a/wren-ai-service/eval/utils.py b/wren-ai-service/eval/utils.py index dbf88c2ffd..a64e12f3dc 100644 --- a/wren-ai-service/eval/utils.py +++ b/wren-ai-service/eval/utils.py @@ -80,7 +80,6 @@ async def get_data_from_wren_engine( async def get_contexts_from_sql( sql: str, mdl_json: dict, - dataset: str, api_endpoint: str = WREN_ENGINE_API_URL, timeout: float = 300, **kwargs, @@ -155,18 +154,10 @@ async def _get_sql_analysis( sql: str, mdl_json: dict, api_endpoint: str, - dataset: str, timeout: float = 300, ) -> List[dict]: sql = sql.rstrip(";") if sql.endswith(";") else sql - sql = sql.replace("`", '"') - if dataset == "spider1.0": - read = "sqlite" - elif dataset == "bird": - read = "postgres" - else: - read = None - quoted_sql, error = add_quotes(sql, read) + quoted_sql, error = add_quotes(sql) if error: print(f"Error in quoting SQL: {sql}, error: {error}") quoted_sql = sql @@ -185,7 +176,7 @@ async def _get_sql_analysis( return await response.json() sql_analysis_results = await _get_sql_analysis( - sql, mdl_json, api_endpoint, dataset, timeout=timeout + sql, mdl_json, api_endpoint, timeout=timeout ) contexts = _get_contexts_from_sql_analysis_results(sql_analysis_results) return contexts diff --git a/wren-ai-service/src/core/engine.py b/wren-ai-service/src/core/engine.py index 95022d1d08..96ab173e9b 100644 --- a/wren-ai-service/src/core/engine.py +++ b/wren-ai-service/src/core/engine.py @@ -49,11 +49,12 @@ def remove_limit_statement(sql: str) -> str: return modified_sql -def add_quotes(sql: str, read: str = None) -> Tuple[str, str]: +def add_quotes(sql: str) -> Tuple[str, str]: try: + sql = sql.replace("`", '"') quoted_sql = sqlglot.transpile( sql, - read=read, + read=None, identify=True, error_level=sqlglot.ErrorLevel.RAISE, unsupported_level=sqlglot.ErrorLevel.RAISE, From 0c19010478f058d775f470cc8f9fc39093716590 Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Tue, 7 Oct 2025 10:32:45 +0800 Subject: [PATCH 5/5] fix eval issues --- wren-ai-service/eval/prediction.py | 1 + wren-ai-service/eval/utils.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/wren-ai-service/eval/prediction.py b/wren-ai-service/eval/prediction.py index 3433b9edc2..b640a41f52 100644 --- a/wren-ai-service/eval/prediction.py +++ b/wren-ai-service/eval/prediction.py @@ -109,6 +109,7 @@ def parse_args() -> Tuple[str, str]: db_name = parse_db_name(path) if "spider_" in path: settings.eval_data_db_path = "etc/spider1.0/database" + load_eval_data_db_to_postgres(db_name, settings.eval_data_db_path) elif "bird_" in path: settings.eval_data_db_path = "etc/bird/minidev/MINIDEV/dev_databases" load_eval_data_db_to_postgres(db_name, settings.eval_data_db_path) diff --git a/wren-ai-service/eval/utils.py b/wren-ai-service/eval/utils.py index a64e12f3dc..1989a3fc25 100644 --- a/wren-ai-service/eval/utils.py +++ b/wren-ai-service/eval/utils.py @@ -192,7 +192,7 @@ def parse_db_name(path: str) -> str: r"bird_(.+?)_eval_dataset\.toml|spider_(.+?)_eval_dataset\.toml", path ) if match: - return match.group(1) + return match.group(1) or match.group(2) else: raise ValueError( f"Invalid path format: {path}. Expected format: bird__eval_dataset.toml or spider__eval_dataset.toml"