diff --git a/src/builtins/regex.rs b/src/builtins/regex.rs index 46f8d7bb..54ae276f 100644 --- a/src/builtins/regex.rs +++ b/src/builtins/regex.rs @@ -10,7 +10,7 @@ use crate::value::Value; use crate::*; use anyhow::{bail, Result}; -use regex::Regex; +use regex::{Regex, RegexBuilder}; // --------------------------------------------------------------------------- // Compiled-regex cache (feature = "cache") @@ -21,6 +21,21 @@ use regex::Regex; // via regorus::cache::configure(). // --------------------------------------------------------------------------- +/// Maximum compiled NFA size (in bytes) for a regex pattern. +/// This bounds both compilation time and match-time cost by limiting the +/// automaton's structural complexity. At 100 KiB, every real-world policy +/// pattern (IPv4, hostname, semver, UUID, image-digest, CIDR, etc.) compiles +/// comfortably, while adversarial patterns that would otherwise cause +/// expensive DFA construction are rejected at compile time. +const REGEX_SIZE_LIMIT: usize = 100 * 1024; + +/// Compile a regex pattern with a size limit to bound resource consumption. +fn compile_regex(pattern: &str) -> core::result::Result { + RegexBuilder::new(pattern) + .size_limit(REGEX_SIZE_LIMIT) + .build() +} + /// Compile a regex pattern, using the cache when the `cache` feature /// is enabled and falling back to direct compilation otherwise. fn get_or_compile_regex(pattern: &str) -> core::result::Result { @@ -32,7 +47,7 @@ fn get_or_compile_regex(pattern: &str) -> core::result::Result core::result::Result Result { + get_or_compile_regex(pattern).map_err(|e| match e { + regex::Error::CompiledTooBig(_) => { + anyhow::Error::new(crate::utils::limits::LimitError::RegexSizeLimitExceeded { + limit: REGEX_SIZE_LIMIT, + }) + } + _ => anyhow::anyhow!(span.error("invalid regex")), + }) +} + pub fn register(m: &mut builtins::BuiltinsMap<&'static str, builtins::BuiltinFcn>) { m.insert( "regex.find_all_string_submatch_n", @@ -72,8 +104,7 @@ fn find_all_string_submatch_n( let value = ensure_string(name, ¶ms[1], &args[1])?; let n = ensure_numeric(name, ¶ms[2], &args[2])?; - let re = get_or_compile_regex(&pattern) - .or_else(|_| bail!(params[0].span().error("invalid regex")))?; + let re = compile_regex_for_builtin(params[0].span(), &pattern)?; if !n.is_integer() { bail!(params[2].span().error("n must be an integer")); @@ -118,8 +149,7 @@ fn find_n(span: &Span, params: &[Ref], args: &[Value], _strict: bool) -> R let value = ensure_string(name, ¶ms[1], &args[1])?; let n = ensure_numeric(name, ¶ms[2], &args[2])?; - let re = get_or_compile_regex(&pattern) - .or_else(|_| bail!(params[0].span().error("invalid regex")))?; + let re = compile_regex_for_builtin(params[0].span(), &pattern)?; if !n.is_integer() { bail!(params[2].span().error("n must be an integer")); @@ -147,11 +177,21 @@ fn find_n(span: &Span, params: &[Ref], args: &[Value], _strict: bool) -> R fn is_valid(span: &Span, params: &[Ref], args: &[Value], _strict: bool) -> Result { let name = "regex.is_valid"; ensure_args_count(span, name, params, args, 1)?; - Ok( - ensure_string(name, ¶ms[0], &args[0]).map_or(Value::Bool(false), |p| { - Value::Bool(get_or_compile_regex(&p).is_ok()) - }), - ) + let pattern = match ensure_string(name, ¶ms[0], &args[0]) { + Ok(p) => p, + Err(_) => return Ok(Value::Bool(false)), + }; + match get_or_compile_regex(&pattern) { + Ok(_) => Ok(Value::Bool(true)), + // Size-limit exceeded is a resource-limit violation; propagate as hard error. + Err(regex::Error::CompiledTooBig(_)) => Err(anyhow::Error::new( + crate::utils::limits::LimitError::RegexSizeLimitExceeded { + limit: REGEX_SIZE_LIMIT, + }, + )), + // Syntax errors mean the pattern is genuinely invalid. + Err(_) => Ok(Value::Bool(false)), + } } pub fn regex_match( @@ -165,8 +205,7 @@ pub fn regex_match( let pattern = ensure_string(name, ¶ms[0], &args[0])?; let value = ensure_string(name, ¶ms[1], &args[1])?; - let re = get_or_compile_regex(&pattern) - .or_else(|_| bail!(params[0].span().error("invalid regex")))?; + let re = compile_regex_for_builtin(params[0].span(), &pattern)?; Ok(Value::Bool(re.is_match(&value))) } @@ -185,6 +224,13 @@ fn regex_replace( let re = match get_or_compile_regex(&pattern) { Ok(p) => p, + Err(regex::Error::CompiledTooBig(_)) => { + return Err(anyhow::Error::new( + crate::utils::limits::LimitError::RegexSizeLimitExceeded { + limit: REGEX_SIZE_LIMIT, + }, + )); + } // TODO: This behavior is due to OPA test not raising error. Should we raise error? _ => return Ok(Value::Undefined), }; @@ -198,8 +244,7 @@ fn regex_split(span: &Span, params: &[Ref], args: &[Value], _strict: bool) let pattern = ensure_string(name, ¶ms[0], &args[0])?; let value = ensure_string(name, ¶ms[1], &args[1])?; - let re = get_or_compile_regex(&pattern) - .or_else(|_| bail!(params[0].span().error("invalid regex")))?; + let re = compile_regex_for_builtin(params[0].span(), &pattern)?; Ok(Value::from_array( re.split(&value) .map(|s| { @@ -242,8 +287,10 @@ fn regex_template_match( } // Fetch pattern, excluding delimiters. - let re = get_or_compile_regex(&template[start + delimiter_start.len()..end]) - .or_else(|_| bail!(params[0].span().error("invalid regex")))?; + let re = compile_regex_for_builtin( + params[0].span(), + &template[start + delimiter_start.len()..end], + )?; // Skip preceding literal in value. value = &value[start..]; diff --git a/src/interpreter.rs b/src/interpreter.rs index 04b0eb14..e44490a4 100644 --- a/src/interpreter.rs +++ b/src/interpreter.rs @@ -2405,8 +2405,14 @@ impl Interpreter { self.compiled_policy.strict_builtin_errors, ) { Ok(v) => v, - // Ignore errors if we are not evaluating in strict mode. - Err(_) if !self.compiled_policy.strict_builtin_errors => return Ok(Value::Undefined), + // Resource-limit errors must always propagate, even in non-strict + // mode, to prevent `not builtin(...)` from silently flipping to true. + Err(e) if !self.compiled_policy.strict_builtin_errors => { + if e.downcast_ref::().is_some() { + return Err(e); + } + return Ok(Value::Undefined); + } Err(e) => Err(e)?, }; diff --git a/src/rvm/vm/errors.rs b/src/rvm/vm/errors.rs index ff027940..4688c022 100644 --- a/src/rvm/vm/errors.rs +++ b/src/rvm/vm/errors.rs @@ -28,6 +28,9 @@ pub enum VmError { #[error("Execution exceeded memory limit (usage={usage} bytes, limit={limit} bytes, pc={pc})")] MemoryLimitExceeded { usage: u64, limit: u64, pc: usize }, + #[error("Compiled regex exceeded size limit ({limit} bytes, pc={pc})")] + RegexSizeLimitExceeded { limit: usize, pc: usize }, + #[error("Literal index {index} out of bounds (pc={pc})")] LiteralIndexOutOfBounds { index: u16, pc: usize }, @@ -298,6 +301,32 @@ pub enum VmError { impl From for VmError { fn from(err: anyhow::Error) -> Self { + // Preserve LimitError identity so that resource-limit violations are + // never silently swallowed to Undefined in non-strict mode. + // Note: pc is set to 0 because this conversion lacks instruction context. + // The error message itself (which includes the limit value) provides + // sufficient diagnostic information for users. + if let Some(limit_err) = err.downcast_ref::() { + return match *limit_err { + crate::LimitError::TimeLimitExceeded { elapsed, limit } => { + VmError::TimeLimitExceeded { + elapsed, + limit, + pc: 0, + } + } + crate::LimitError::MemoryLimitExceeded { usage, limit } => { + VmError::MemoryLimitExceeded { + usage, + limit, + pc: 0, + } + } + crate::LimitError::RegexSizeLimitExceeded { limit } => { + VmError::RegexSizeLimitExceeded { limit, pc: 0 } + } + }; + } VmError::ArithmeticError { message: alloc::format!("{}", err), pc: 0, diff --git a/src/rvm/vm/execution.rs b/src/rvm/vm/execution.rs index c4b66996..04de7ea5 100644 --- a/src/rvm/vm/execution.rs +++ b/src/rvm/vm/execution.rs @@ -570,7 +570,15 @@ impl RegoVM { Ok(()) } - fn handle_instruction_error(&mut self, _err: VmError, last_result: &mut Value) -> Result { + fn handle_instruction_error(&mut self, err: VmError, last_result: &mut Value) -> Result { + // Resource-limit errors must never be absorbed by rule evaluation. + // They represent engine-level constraints, not rule-level failures. + // Returning Ok(false) causes the caller to clear execution_stack and + // propagate the error, terminating the evaluation entirely. + if RegoVM::is_fatal_vm_error(&err) { + return Ok(false); + } + if let Some(frame) = self.execution_stack.pop() { match frame.kind { FrameKind::Rule(mut data) => { diff --git a/src/rvm/vm/functions.rs b/src/rvm/vm/functions.rs index 9a4f9aa0..6f79e620 100644 --- a/src/rvm/vm/functions.rs +++ b/src/rvm/vm/functions.rs @@ -122,7 +122,16 @@ impl RegoVM { self.strict_builtin_errors, ) { Ok(value) => value, - Err(_) if !self.strict_builtin_errors => Value::Undefined, + // Resource-limit errors must always propagate, even in non-strict + // mode, to prevent `not builtin(...)` from silently flipping to true. + Err(e) if !self.strict_builtin_errors => { + if e.downcast_ref::().is_some() { + self.dummy_exprs = dummy_exprs; + self.cached_builtin_args = args; + return Err(e.into()); + } + Value::Undefined + } Err(err) => { self.dummy_exprs = dummy_exprs; self.cached_builtin_args = args; diff --git a/src/rvm/vm/machine.rs b/src/rvm/vm/machine.rs index a0bb5ebd..48a07860 100644 --- a/src/rvm/vm/machine.rs +++ b/src/rvm/vm/machine.rs @@ -443,6 +443,9 @@ impl RegoVM { limit, pc: self.pc, }, + LimitError::RegexSizeLimitExceeded { limit } => { + VmError::RegexSizeLimitExceeded { limit, pc: self.pc } + } }) } diff --git a/src/rvm/vm/rules.rs b/src/rvm/vm/rules.rs index 9858367b..4ecf5354 100644 --- a/src/rvm/vm/rules.rs +++ b/src/rvm/vm/rules.rs @@ -17,6 +17,36 @@ use super::execution_model::{ use super::machine::RegoVM; impl RegoVM { + /// Returns true if the error represents a resource-limit violation that + /// must never be silently absorbed by rule evaluation. + pub(super) const fn is_fatal_vm_error(err: &VmError) -> bool { + matches!( + err, + VmError::TimeLimitExceeded { .. } + | VmError::MemoryLimitExceeded { .. } + | VmError::RegexSizeLimitExceeded { .. } + | VmError::InstructionLimitExceeded { .. } + ) + } + + /// Restore VM state that was swapped out for rule execution. + /// Must be called before returning an error from `execute_rule_definitions_common` + /// to avoid leaving the VM in an inconsistent state. + fn restore_rule_state( + &mut self, + previous_loop_stack: &mut Vec, + previous_comprehension_stack: &mut Vec, + ) { + if let Some(restored_registers) = self.register_stack.pop() { + let mut current_register_window = Vec::default(); + mem::swap(&mut current_register_window, &mut self.registers); + self.return_register_window(current_register_window); + self.registers = restored_registers; + } + mem::swap(&mut self.loop_stack, previous_loop_stack); + mem::swap(&mut self.comprehension_stack, previous_comprehension_stack); + } + pub(super) fn execute_rule_definitions_common( &mut self, rule_definitions: &[Vec], @@ -79,6 +109,13 @@ impl RegoVM { { match self.jump_to(destructuring_entry_point) { Ok(_result) => {} + Err(e) if Self::is_fatal_vm_error(&e) => { + self.restore_rule_state( + &mut previous_loop_stack, + &mut previous_comprehension_stack, + ); + return Err(e); + } Err(_e) => { continue 'outer; } @@ -111,6 +148,13 @@ impl RegoVM { // are treated as else-branches and must not be evaluated. break; } + Err(e) if Self::is_fatal_vm_error(&e) => { + self.restore_rule_state( + &mut previous_loop_stack, + &mut previous_comprehension_stack, + ); + return Err(e); + } Err(_e) => {} } } diff --git a/src/utils/limits/error.rs b/src/utils/limits/error.rs index c10cae7b..14ac37c1 100644 --- a/src/utils/limits/error.rs +++ b/src/utils/limits/error.rs @@ -23,6 +23,11 @@ pub enum LimitError { /// Configured memory ceiling in bytes. limit: u64, }, + /// Reported when a compiled regex NFA exceeds the configured size limit. + RegexSizeLimitExceeded { + /// Configured compiled-NFA size ceiling in bytes. + limit: usize, + }, } impl fmt::Debug for LimitError { @@ -38,6 +43,10 @@ impl fmt::Debug for LimitError { .field("usage", usage) .field("limit", limit) .finish(), + Self::RegexSizeLimitExceeded { limit } => f + .debug_struct("RegexSizeLimitExceeded") + .field("limit", limit) + .finish(), } } } @@ -61,6 +70,9 @@ impl fmt::Display for LimitError { usage, limit ) } + Self::RegexSizeLimitExceeded { limit } => { + write!(f, "compiled regex exceeded size limit ({} bytes)", limit) + } } } } diff --git a/tests/rvm/rego/cases/regex_size_limit.yaml b/tests/rvm/rego/cases/regex_size_limit.yaml new file mode 100644 index 00000000..7eac2278 --- /dev/null +++ b/tests/rvm/rego/cases/regex_size_limit.yaml @@ -0,0 +1,333 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +# Tests that regex compilation enforces a size limit on the compiled NFA. +# Patterns that produce an NFA exceeding the limit are rejected at compile +# time. This prevents adversarial patterns from consuming excessive CPU +# during DFA construction or matching. + +cases: + # ------------------------------------------------------------------------- + # Legitimate policy patterns — must continue to work + # ------------------------------------------------------------------------- + - note: legit-ipv4 + data: {} + modules: + - | + package test + main := regex.match(`^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$`, "192.168.1.1") + query: data.test.main + want_result: true + + - note: legit-hostname + data: {} + modules: + - | + package test + main := regex.match(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$`, "my-host.example.com") + query: data.test.main + want_result: true + + - note: legit-semver + data: {} + modules: + - | + package test + main := regex.match(`^v?\d+\.\d+\.\d+(-[a-zA-Z0-9.]+)?(\+[a-zA-Z0-9.]+)?$`, "v1.2.3") + query: data.test.main + want_result: true + + - note: legit-uuid + data: {} + modules: + - | + package test + main := regex.match(`^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`, "550e8400-e29b-41d4-a716-446655440000") + query: data.test.main + want_result: true + + - note: legit-email + data: {} + modules: + - | + package test + main := regex.match(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`, "user@example.com") + query: data.test.main + want_result: true + + - note: legit-cidr + data: {} + modules: + - | + package test + main := regex.match(`^(\d{1,3}\.){3}\d{1,3}/\d{1,2}$`, "10.0.0.0/8") + query: data.test.main + want_result: true + + - note: legit-image-digest + data: {} + modules: + - | + package test + main := regex.match(`^[a-z0-9]+([._-][a-z0-9]+)*(/[a-z0-9]+([._-][a-z0-9]+)*)*@sha256:[a-f0-9]{64}$`, "myregistry/myimage@sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef") + query: data.test.main + want_result: true + + - note: legit-rfc1123-label + data: {} + modules: + - | + package test + main := regex.match(`^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$`, "my-resource-name") + query: data.test.main + want_result: true + + - note: legit-tag-value + data: {} + modules: + - | + package test + main := regex.match(`^[a-zA-Z0-9 _.:/=+@-]+$`, "env:production/v2") + query: data.test.main + want_result: true + + - note: legit-naming-convention + data: {} + modules: + - | + package test + main := regex.match(`^[a-z][a-z0-9-]{2,62}$`, "my-resource-group") + query: data.test.main + want_result: true + + - note: legit-is-valid + data: {} + modules: + - | + package test + main := regex.is_valid(`^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$`) + query: data.test.main + want_result: true + + - note: legit-find-n + data: {} + modules: + - | + package test + main := regex.find_n(`[0-9]+`, "abc123def456", 2) + query: data.test.main + want_result: + - "123" + - "456" + + - note: legit-split + data: {} + modules: + - | + package test + main := regex.split(`[,;]+`, "a,b;;c,d") + query: data.test.main + want_result: + - "a" + - "b" + - "c" + - "d" + + - note: legit-replace + data: {} + modules: + - | + package test + main := regex.replace("hello-world_foo", `[-_]`, ".") + query: data.test.main + want_result: "hello.world.foo" + + - note: is-valid-syntax-error + data: {} + modules: + - | + package test + main := regex.is_valid(`[invalid`) + query: data.test.main + want_result: false + + # ------------------------------------------------------------------------- + # Adversarial patterns — must be rejected as hard errors. + # + # These patterns produce compiled NFAs exceeding the 100 KiB size limit. + # The error must propagate (not be swallowed to undefined) so that + # `not regex.match(...)` does not silently flip to true. + # ------------------------------------------------------------------------- + + - note: blocked-ascending-overlap-46 + data: {} + modules: + - | + package test + main := regex.match(`[a-f]{1,64}[a-g]{1,64}[a-h]{1,64}[a-i]{1,64}[a-j]{1,64}[a-k]{1,64}[a-l]{1,64}[a-m]{1,64}[a-n]{1,64}[a-o]{1,64}[a-p]{1,64}[a-q]{1,64}[a-r]{1,64}[a-s]{1,64}[a-t]{1,64}[a-u]{1,64}[a-v]{1,64}[a-w]{1,64}[a-x]{1,64}[a-y]{1,64}[a-z]{1,64}[a-f]{1,64}[a-g]{1,64}[a-h]{1,64}[a-i]{1,64}[a-j]{1,64}[a-k]{1,64}[a-l]{1,64}[a-m]{1,64}[a-n]{1,64}[a-o]{1,64}[a-p]{1,64}[a-q]{1,64}[a-r]{1,64}[a-s]{1,64}[a-t]{1,64}[a-u]{1,64}[a-v]{1,64}[a-f]{1,64}[d-k]{1,64}[h-p]{1,64}[m-u]{1,64}[r-z]{1,64}[a-h]{1,64}[e-n]{1,64}[k-z]{1,64}`, "test") + query: data.test.main + want_error: "size limit" + + - note: blocked-lower-shift-16 + data: {} + modules: + - | + package test + main := regex.match(`[a-p]{1,64}[b-q]{1,64}[c-r]{1,64}[d-s]{1,64}[e-t]{1,64}[f-u]{1,64}[g-v]{1,64}[h-w]{1,64}[i-x]{1,64}[j-y]{1,64}[k-z]{1,64}[a-l]{1,64}[a-m]{1,64}[a-n]{1,64}[a-o]{1,64}[a-z]{1,64}`, "test") + query: data.test.main + want_error: "size limit" + + - note: blocked-case-window-16 + data: {} + modules: + - | + package test + main := regex.match(`[A-Pa-p]{1,64}[B-Qb-q]{1,64}[C-Rc-r]{1,64}[D-Sd-s]{1,64}[E-Te-t]{1,64}[F-Uf-u]{1,64}[G-Vg-v]{1,64}[H-Wh-w]{1,64}[I-Xi-x]{1,64}[J-Yj-y]{1,64}[K-Zk-z]{1,64}[A-La-l]{1,64}[A-Ma-m]{1,64}[A-Na-n]{1,64}[A-Oa-o]{1,64}[A-Za-z]{1,64}`, "test") + query: data.test.main + want_error: "size limit" + + - note: blocked-ascii-range-ladder-16 + data: {} + modules: + - | + package test + main := regex.match(`[ -~]{1,64}[!-~]{1,64}[ -}]{1,64}[!-}]{1,64}[ -z]{1,64}[!-z]{1,64}[#-~]{1,64}[$-}]{1,64}[%-|]{1,64}[&-{]{1,64}['-z]{1,64}[(~)]{1,64}[)-~]{1,64}[*-}]{1,64}[+-|]{1,64}[,-{]{1,64}`, "test") + query: data.test.main + want_error: "size limit" + + - note: blocked-digit-letter-overlap-16 + data: {} + modules: + - | + package test + main := regex.match(`[A-Za-z0-9]{1,64}[A-Fa-f0-9]{1,64}[D-Kd-k3-9]{1,64}[H-Ph-p0-5]{1,64}[M-Tm-t2-8]{1,64}[Q-Zq-z1-7]{1,64}[A-Ma-m4-9]{1,64}[G-Zg-z0-6]{1,64}[A-Za-m0-9]{1,64}[A-Mn-z0-9]{1,64}[N-Za-z0-9]{1,64}[A-Zn-z0-9]{1,64}[A-Za-z_]{1,64}[A-Za-z-]{1,64}[A-Za-z.]{1,64}[A-Za-z0-9]{1,64}`, "test") + query: data.test.main + want_error: "size limit" + + - note: blocked-ascii-opt-branch-16 + data: {} + modules: + - | + package test + main := regex.match(`(?:[A-p]{1,64}|)(?:[B-q]{1,64}|)(?:[C-r]{1,64}|)(?:[D-s]{1,64}|)(?:[E-t]{1,64}|)(?:[F-u]{1,64}|)(?:[G-v]{1,64}|)(?:[H-w]{1,64}|)(?:[I-x]{1,64}|)(?:[J-y]{1,64}|)(?:[K-z]{1,64}|)(?:[L-{]{1,64}|)(?:[M-|]{1,64}|)(?:[N-}]{1,64}|)(?:[O-~]{1,64}|)(?:[P-\x7f]{1,64}|)`, "test") + query: data.test.main + want_error: "size limit" + + - note: blocked-ascii-two-way-16 + data: {} + modules: + - | + package test + main := regex.match(`(?:[A-p]{1,64}|[B-q]{1,64})(?:[B-q]{1,64}|[C-r]{1,64})(?:[C-r]{1,64}|[D-s]{1,64})(?:[D-s]{1,64}|[E-t]{1,64})(?:[E-t]{1,64}|[F-u]{1,64})(?:[F-u]{1,64}|[G-v]{1,64})(?:[G-v]{1,64}|[H-w]{1,64})(?:[H-w]{1,64}|[I-x]{1,64})(?:[I-x]{1,64}|[J-y]{1,64})(?:[J-y]{1,64}|[K-z]{1,64})(?:[K-z]{1,64}|[L-{]{1,64})(?:[L-{]{1,64}|[M-|]{1,64})(?:[M-|]{1,64}|[N-}]{1,64})(?:[N-}]{1,64}|[O-~]{1,64})(?:[O-~]{1,64}|[P-\x7f]{1,64})(?:[P-\x7f]{1,64}|[Q-\x80]{1,64})`, "test") + query: data.test.main + want_error: "size limit" + + - note: blocked-paired-range-branches + data: {} + modules: + - | + package test + main := regex.match(`(?:[A-p]{1,64}[B-q]{1,64}|[C-r]{1,64}[D-s]{1,64})(?:[E-t]{1,64}[F-u]{1,64}|[G-v]{1,64}[H-w]{1,64})(?:[I-x]{1,64}[J-y]{1,64}|[K-z]{1,64}[L-z]{1,64})(?:[M-z]{1,64}[N-z]{1,64}|[O-z]{1,64}[P-z]{1,64})(?:[A-p]{1,64}[B-q]{1,64}|[C-r]{1,64}[D-s]{1,64})(?:[E-t]{1,64}[F-u]{1,64}|[G-v]{1,64}[H-w]{1,64})(?:[I-x]{1,64}[J-y]{1,64}|[K-z]{1,64}[L-z]{1,64})(?:[M-z]{1,64}[N-z]{1,64}|[O-z]{1,64}[P-z]{1,64})`, "test") + query: data.test.main + want_error: "size limit" + + - note: blocked-baseline-az-16 + data: {} + modules: + - | + package test + main := regex.match(`[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}`, "test") + query: data.test.main + want_error: "size limit" + + - note: blocked-nested-quantifiers + data: {} + modules: + - | + package test + main := regex.match(`(((\w{1,8}\d{1,8}){1,8}\s{1,8}){1,4}\D{1,4})+`, "test") + query: data.test.main + want_error: "size limit" + + - note: blocked-is-valid-oversized + data: {} + modules: + - | + package test + main := regex.is_valid(`[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}`) + query: data.test.main + want_error: "size limit" + + # ------------------------------------------------------------------------- + # Negation — `not regex.match(...)` must error, not silently become true + # ------------------------------------------------------------------------- + - note: blocked-not-regex-match-oversized + data: {} + modules: + - | + package test + main if { + not regex.match(`([a-z]{1,64}){1,64}`, "test") + } + query: data.test.main + want_error: "size limit" + + # ------------------------------------------------------------------------- + # regex.replace with oversized pattern must error + # ------------------------------------------------------------------------- + - note: blocked-regex-replace-oversized + data: {} + modules: + - | + package test + main := regex.replace("hello", `([a-z]{1,64}){1,64}`, "x") + query: data.test.main + want_error: "size limit" + + # ------------------------------------------------------------------------- + # regex.find_all_string_submatch_n with oversized pattern must error + # ------------------------------------------------------------------------- + - note: blocked-find-submatch-oversized + data: {} + modules: + - | + package test + main := regex.find_all_string_submatch_n(`([a-z]{1,64}){1,64}`, "hello", 1) + query: data.test.main + want_error: "size limit" + + # ------------------------------------------------------------------------- + # regex.find_n with oversized pattern must error + # ------------------------------------------------------------------------- + - note: blocked-find-n-oversized + data: {} + modules: + - | + package test + main := regex.find_n(`([a-z]{1,64}){1,64}`, "hello", 1) + query: data.test.main + want_error: "size limit" + + # ------------------------------------------------------------------------- + # regex.split with oversized pattern must error + # ------------------------------------------------------------------------- + - note: blocked-split-oversized + data: {} + modules: + - | + package test + main := regex.split(`([a-z]{1,64}){1,64}`, "hello-world") + query: data.test.main + want_error: "size limit" + + # ------------------------------------------------------------------------- + # regex.template_match with oversized embedded pattern must error + # ------------------------------------------------------------------------- + - note: blocked-template-match-oversized + data: {} + modules: + - | + package test + main := regex.template_match(`<[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}[a-z]{1,64}>`, "hello", "<", ">") + query: data.test.main + want_error: "size limit"