Skip to content

Commit 196f124

Browse files
authored
Fix set explain regex (#20319)
* Reapply "fix: allow EXPLAIN on multi-statement SQL beginning with SET (#20106)" (#20282) This reverts commit 6486c7f. * Address catastrophic backtracking in EXPLAIN regex Took a bit to pin down why this was happening, but with a fuzzer and some articles about exponential regex performance, I could address the underlying issue without changing the code much. * Fix changelog * Formatting fixes * Pass query_signature down into explain_statement No need to recalculate this, and passing it down will allow trimmed SQL to reuse the original query's signature.
1 parent 0245920 commit 196f124

File tree

7 files changed

+147
-17
lines changed

7 files changed

+147
-17
lines changed

postgres/changelog.d/20319.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Allow EXPLAIN on multi-statement SQL where one or more SET commands appear before another supported statement type

postgres/datadog_checks/postgres/explain_parameterized_queries.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import psycopg2
99

10-
from datadog_checks.base.utils.db.sql import compute_sql_signature
1110
from datadog_checks.base.utils.tracking import tracked_method
1211
from datadog_checks.postgres.cursor import CommenterDictCursor
1312

@@ -73,7 +72,7 @@ def __init__(self, check, config, explain_function):
7372
self._explain_function = explain_function
7473

7574
@tracked_method(agent_check_getter=agent_check_getter)
76-
def explain_statement(self, dbname, statement, obfuscated_statement):
75+
def explain_statement(self, dbname, statement, obfuscated_statement, query_signature):
7776
if self._check.version < V12:
7877
# if pg version < 12, skip explaining parameterized queries because
7978
# plan_cache_mode is not supported
@@ -85,7 +84,6 @@ def explain_statement(self, dbname, statement, obfuscated_statement):
8584
return None, DBExplainError.parameterized_query, '{}'.format(type(e))
8685
self._set_plan_cache_mode(dbname)
8786

88-
query_signature = compute_sql_signature(obfuscated_statement)
8987
try:
9088
self._create_prepared_statement(dbname, statement, obfuscated_statement, query_signature)
9189
except psycopg2.errors.IndeterminateDatatype as e:

postgres/datadog_checks/postgres/statement_samples.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from datadog_checks.base.utils.tracking import tracked_method
3333
from datadog_checks.postgres.explain_parameterized_queries import ExplainParameterizedQueries
3434

35-
from .util import DatabaseConfigurationError, DBExplainError, warning_with_tags
35+
from .util import DatabaseConfigurationError, DBExplainError, trim_leading_set_stmts, warning_with_tags
3636
from .version_utils import V9_6, V10
3737

3838
# according to https://unicodebook.readthedocs.io/unicode_encodings.html, the max supported size of a UTF-8 encoded
@@ -749,15 +749,22 @@ def _run_and_track_explain(self, dbname, statement, obfuscated_statement, query_
749749
@tracked_method(agent_check_getter=agent_check_getter)
750750
def _run_explain_safe(self, dbname, statement, obfuscated_statement, query_signature):
751751
# type: (str, str, str, str) -> Tuple[Optional[Dict], Optional[DBExplainError], Optional[str]]
752+
753+
orig_statement = statement
754+
755+
# remove leading SET statements from our SQL
756+
if obfuscated_statement[:3].lower() == "set":
757+
statement = trim_leading_set_stmts(statement)
758+
obfuscated_statement = trim_leading_set_stmts(obfuscated_statement)
759+
752760
if not self._can_explain_statement(obfuscated_statement):
753761
return None, DBExplainError.no_plans_possible, None
754762

755763
track_activity_query_size = self._get_track_activity_query_size()
756764

757-
if (
758-
self._get_truncation_state(track_activity_query_size, statement, query_signature)
759-
== StatementTruncationState.truncated
760-
):
765+
# truncation check is on the original query, not the trimmed version
766+
stmt_trunc = self._get_truncation_state(track_activity_query_size, orig_statement, query_signature)
767+
if stmt_trunc == StatementTruncationState.truncated:
761768
return (
762769
None,
763770
DBExplainError.query_truncated,
@@ -779,7 +786,7 @@ def _run_explain_safe(self, dbname, statement, obfuscated_statement, query_signa
779786
if self._explain_parameterized_queries._is_parameterized_query(statement):
780787
if is_affirmative(self._config.statement_samples_config.get('explain_parameterized_queries', True)):
781788
return self._explain_parameterized_queries.explain_statement(
782-
dbname, statement, obfuscated_statement
789+
dbname, statement, obfuscated_statement, query_signature
783790
)
784791
e = psycopg2.errors.UndefinedParameter("Unable to explain parameterized query")
785792
self._log.debug(

postgres/datadog_checks/postgres/util.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# (C) Datadog, Inc. 2019-present
22
# All rights reserved
33
# Licensed under Simplified BSD License (see LICENSE)
4+
import re
45
import string
56
from enum import Enum
67
from typing import Any, List, Tuple # noqa: F401
@@ -130,6 +131,40 @@ def get_list_chunks(lst, n):
130131
yield lst[i : i + n]
131132

132133

134+
SET_TRIM_PATTERN = re.compile(
135+
r"""
136+
^
137+
(?:
138+
\s*
139+
# match one leading comment
140+
(?:
141+
/\*
142+
.*
143+
\*/
144+
\s*
145+
)?
146+
147+
# match leading SET commands
148+
SET\b
149+
(?:
150+
[^';] | # keywords, integer literals, etc.
151+
'[^']*' # single-quoted strings
152+
)+
153+
;
154+
)+
155+
""",
156+
flags=(re.I | re.X),
157+
)
158+
159+
160+
# Expects one or more SQL statements in a string. If the string
161+
# begins with any SET statements, they are removed and the rest
162+
# of the string is returned. Otherwise, the string is returned
163+
# as it was received.
164+
def trim_leading_set_stmts(sql):
165+
return SET_TRIM_PATTERN.sub('', sql, 1).lstrip()
166+
167+
133168
fmt = PartialFormatter()
134169

135170
AWS_RDS_HOSTNAME_SUFFIX = ".rds.amazonaws.com"

postgres/tests/test_explain_parameterized_queries.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ def test_explain_parameterized_queries(integration_check, dbm_instance, query, e
5151
if check.version < V12:
5252
return
5353

54-
plan_dict, explain_err_code, err = check.statement_samples._run_and_track_explain(DB_NAME, query, query, query)
54+
plan_dict, explain_err_code, err = check.statement_samples._run_and_track_explain(
55+
DB_NAME, query, query, "7231596c8b5536d1"
56+
)
5557
assert plan_dict is not None
5658
assert explain_err_code == expected_explain_err_code
5759
assert err is None
@@ -111,7 +113,10 @@ def test_explain_parameterized_queries_version_below_12(integration_check, dbm_i
111113
return
112114

113115
plan_dict, explain_err_code, err = check.statement_samples._run_and_track_explain(
114-
DB_NAME, "SELECT * FROM pg_settings WHERE name = $1", "SELECT * FROM pg_settings WHERE name = $1", ""
116+
DB_NAME,
117+
"SELECT * FROM pg_settings WHERE name = $1",
118+
"SELECT * FROM pg_settings WHERE name = $1",
119+
"7231596c8b5536d1",
115120
)
116121
assert plan_dict is None
117122
assert explain_err_code == DBExplainError.parameterized_query
@@ -133,7 +138,10 @@ def test_explain_parameterized_queries_create_prepared_statement_exception(integ
133138
side_effect=psycopg2.errors.DatabaseError("unexpected exception"),
134139
):
135140
plan_dict, explain_err_code, err = check.statement_samples._run_and_track_explain(
136-
DB_NAME, "SELECT * FROM pg_settings WHERE name = $1", "SELECT * FROM pg_settings WHERE name = $1", ""
141+
DB_NAME,
142+
"SELECT * FROM pg_settings WHERE name = $1",
143+
"SELECT * FROM pg_settings WHERE name = $1",
144+
"7231596c8b5536d1",
137145
)
138146
assert plan_dict is None
139147
assert explain_err_code == DBExplainError.failed_to_explain_with_prepared_statement
@@ -155,7 +163,9 @@ def test_explain_parameterized_queries_explain_prepared_statement_exception(inte
155163
side_effect=psycopg2.errors.DatabaseError("unexpected exception"),
156164
):
157165
query = "SELECT * FROM pg_settings WHERE name = $1"
158-
plan_dict, explain_err_code, err = check.statement_samples._run_and_track_explain(DB_NAME, query, query, "")
166+
plan_dict, explain_err_code, err = check.statement_samples._run_and_track_explain(
167+
DB_NAME, query, query, "7231596c8b5536d1"
168+
)
159169
assert plan_dict is None
160170
assert explain_err_code == DBExplainError.failed_to_explain_with_prepared_statement
161171
assert err is not None
@@ -184,7 +194,10 @@ def test_explain_parameterized_queries_explain_prepared_statement_no_plan_return
184194
return_value=None,
185195
):
186196
plan_dict, explain_err_code, err = check.statement_samples._run_and_track_explain(
187-
DB_NAME, "SELECT * FROM pg_settings WHERE name = $1", "SELECT * FROM pg_settings WHERE name = $1", ""
197+
DB_NAME,
198+
"SELECT * FROM pg_settings WHERE name = $1",
199+
"SELECT * FROM pg_settings WHERE name = $1",
200+
"7231596c8b5536d1",
188201
)
189202
assert plan_dict is None
190203
assert explain_err_code == DBExplainError.no_plan_returned_with_prepared_statement

postgres/tests/test_statements.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,44 @@ def test_get_db_explain_setup_state(integration_check, dbm_instance, dbname, exp
583583
failed_explain_test_repeat_count = 5
584584

585585

586+
@pytest.mark.parametrize(
587+
"query",
588+
[
589+
"SELECT * FROM pg_class",
590+
"SET LOCAL datestyle TO postgres; SELECT * FROM pg_class",
591+
],
592+
)
593+
def test_successful_explain(
594+
integration_check,
595+
dbm_instance,
596+
aggregator,
597+
query,
598+
):
599+
dbname = "datadog_test"
600+
# Don't need metrics for this one
601+
dbm_instance['query_metrics']['enabled'] = False
602+
dbm_instance['query_samples']['explain_parameterized_queries'] = False
603+
check = integration_check(dbm_instance)
604+
check._connect()
605+
606+
# run check so all internal state is correctly initialized
607+
run_one_check(check)
608+
609+
# clear out contents of aggregator so we measure only the metrics generated during this specific part of the test
610+
aggregator.reset()
611+
612+
db_explain_error, err = check.statement_samples._get_db_explain_setup_state(dbname)
613+
assert db_explain_error is None
614+
assert err is None
615+
616+
plan, *rest = check.statement_samples._run_and_track_explain(dbname, query, query, "7231596c8b5536d1")
617+
assert plan is not None
618+
619+
plan = plan['Plan']
620+
assert plan['Node Type'] == 'Seq Scan'
621+
assert plan['Relation Name'] == 'pg_class'
622+
623+
586624
@pytest.mark.parametrize(
587625
"query,expected_error_tag,explain_function_override,expected_fail_count,skip_on_versions",
588626
[
@@ -663,7 +701,7 @@ def test_failed_explain_handling(
663701
assert err is None
664702

665703
for _ in range(failed_explain_test_repeat_count):
666-
check.statement_samples._run_and_track_explain(dbname, query, query, query)
704+
check.statement_samples._run_and_track_explain(dbname, query, query, "7231596c8b5536d1")
667705

668706
expected_tags = _get_expected_tags(
669707
check, dbm_instance, with_host=False, with_db=True, agent_hostname='stubbed.hostname'
@@ -1480,7 +1518,9 @@ def test_statement_run_explain_errors(
14801518
check._connect()
14811519

14821520
run_one_check(check)
1483-
_, explain_err_code, err = check.statement_samples._run_and_track_explain("datadog_test", query, query, query)
1521+
_, explain_err_code, err = check.statement_samples._run_and_track_explain(
1522+
"datadog_test", query, query, "7231596c8b5536d1"
1523+
)
14841524
run_one_check(check)
14851525

14861526
assert explain_err_code == expected_explain_err_code
@@ -1534,7 +1574,9 @@ def test_statement_run_explain_parameterized_queries(
15341574
return
15351575

15361576
run_one_check(check)
1537-
_, explain_err_code, err = check.statement_samples._run_and_track_explain("datadog_test", query, query, query)
1577+
_, explain_err_code, err = check.statement_samples._run_and_track_explain(
1578+
"datadog_test", query, query, "7231596c8b5536d1"
1579+
)
15381580
run_one_check(check)
15391581

15401582
assert explain_err_code == expected_explain_err_code

postgres/tests/test_unit.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,37 @@ def test_database_identifier(pg_instance, template, expected, tags):
201201
pg_instance['tags'] = tags
202202
check = PostgreSql('postgres', {}, [pg_instance])
203203
assert check.database_identifier == expected
204+
205+
206+
@pytest.mark.unit
207+
@pytest.mark.parametrize(
208+
"query,expected_trimmed_query",
209+
[
210+
("SELECT * FROM pg_settings WHERE name = $1", "SELECT * FROM pg_settings WHERE name = $1"),
211+
("SELECT * FROM pg_settings; DELETE FROM pg_settings;", "SELECT * FROM pg_settings; DELETE FROM pg_settings;"),
212+
("SET search_path TO 'my_schema', public; SELECT * FROM pg_settings", "SELECT * FROM pg_settings"),
213+
("SET TIME ZONE 'Europe/Rome'; SELECT * FROM pg_settings", "SELECT * FROM pg_settings"),
214+
(
215+
"SET LOCAL request_id = 1234; SET LOCAL hostname TO 'Bob''s Laptop'; SELECT * FROM pg_settings",
216+
"SELECT * FROM pg_settings",
217+
),
218+
("SET LONG;" * 1024 + "SELECT *;", "SELECT *;"),
219+
("SET " + "'quotable'" * 1024 + "; SELECT *;", "SELECT *;"),
220+
("SET 'l" + "o" * 1024 + "ng'; SELECT *;", "SELECT *;"),
221+
(" /** pl/pgsql **/ SET 'comment'; SELECT *;", "SELECT *;"),
222+
("this isn't SQL", "this isn't SQL"),
223+
(
224+
"SET SESSION min_wal_size = 14400; "
225+
+ "SET LOCAL wal_buffers TO 2048; "
226+
+ "/* testing id 1234 */ set send_abort_for_kill TO 'stderr'; "
227+
+ "set id = case when (false) and ((((cast(null as box) ~= cast(null as box)) "
228+
+ "or (cast(null as point) <@ cast(null as line))) or (public.my table",
229+
"set id = case when (false) and ((((cast(null as box) ~= cast(null as box)) "
230+
+ "or (cast(null as point) <@ cast(null as line))) or (public.my table",
231+
),
232+
("", ""),
233+
],
234+
)
235+
def test_trim_set_stmts(query, expected_trimmed_query):
236+
trimmed_query = util.trim_leading_set_stmts(query)
237+
assert trimmed_query == expected_trimmed_query

0 commit comments

Comments
 (0)