Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 65 additions & 18 deletions src/builtins/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::value::Value;
use crate::*;

use anyhow::{bail, Result};
use regex::Regex;
use regex::{Regex, RegexBuilder};

// ---------------------------------------------------------------------------
// Compiled-regex cache (feature = "cache")
Expand All @@ -21,6 +21,21 @@ use regex::Regex;
// via regorus::cache::configure().
// ---------------------------------------------------------------------------

/// Maximum compiled NFA size (in bytes) for a regex pattern.
/// This bounds both compilation time and match-time cost by limiting the
/// automaton's structural complexity. At 100 KiB, every real-world policy
/// pattern (IPv4, hostname, semver, UUID, image-digest, CIDR, etc.) compiles
/// comfortably, while adversarial patterns that would otherwise cause
/// expensive DFA construction are rejected at compile time.
const REGEX_SIZE_LIMIT: usize = 100 * 1024;

/// Compile a regex pattern with a size limit to bound resource consumption.
fn compile_regex(pattern: &str) -> core::result::Result<Regex, regex::Error> {
RegexBuilder::new(pattern)
.size_limit(REGEX_SIZE_LIMIT)
.build()
}
Comment thread
anakrish marked this conversation as resolved.

/// Compile a regex pattern, using the cache when the `cache` feature
/// is enabled and falling back to direct compilation otherwise.
fn get_or_compile_regex(pattern: &str) -> core::result::Result<Regex, regex::Error> {
Expand All @@ -32,7 +47,7 @@ fn get_or_compile_regex(pattern: &str) -> core::result::Result<Regex, regex::Err
return Ok(re.clone());
}
}
let re = Regex::new(pattern)?;
let re = compile_regex(pattern)?;
{
let mut cache = crate::cache::REGEX_CACHE.lock();
cache.put(alloc::string::String::from(pattern), re.clone());
Expand All @@ -41,10 +56,27 @@ fn get_or_compile_regex(pattern: &str) -> core::result::Result<Regex, regex::Err
}
#[cfg(not(feature = "cache"))]
{
Regex::new(pattern)
compile_regex(pattern)
}
}

/// Compile a regex for use in a builtin function.
///
/// - `CompiledTooBig` is raised as [`LimitError::RegexSizeLimitExceeded`] so
/// that it propagates as a hard error even in non-strict mode.
/// - Syntax errors produce a span-attached "invalid regex" error that the
/// evaluator may swallow to `Undefined` in non-strict mode (OPA-compatible).
fn compile_regex_for_builtin(span: &Span, pattern: &str) -> Result<Regex> {
get_or_compile_regex(pattern).map_err(|e| match e {
regex::Error::CompiledTooBig(_) => {
anyhow::Error::new(crate::utils::limits::LimitError::RegexSizeLimitExceeded {
limit: REGEX_SIZE_LIMIT,
})
}
_ => anyhow::anyhow!(span.error("invalid regex")),
})
Comment thread
anakrish marked this conversation as resolved.
}
Comment thread
anakrish marked this conversation as resolved.

pub fn register(m: &mut builtins::BuiltinsMap<&'static str, builtins::BuiltinFcn>) {
m.insert(
"regex.find_all_string_submatch_n",
Expand Down Expand Up @@ -72,8 +104,7 @@ fn find_all_string_submatch_n(
let value = ensure_string(name, &params[1], &args[1])?;
let n = ensure_numeric(name, &params[2], &args[2])?;

let re = get_or_compile_regex(&pattern)
.or_else(|_| bail!(params[0].span().error("invalid regex")))?;
let re = compile_regex_for_builtin(params[0].span(), &pattern)?;
Comment thread
anakrish marked this conversation as resolved.

if !n.is_integer() {
bail!(params[2].span().error("n must be an integer"));
Expand Down Expand Up @@ -118,8 +149,7 @@ fn find_n(span: &Span, params: &[Ref<Expr>], args: &[Value], _strict: bool) -> R
let value = ensure_string(name, &params[1], &args[1])?;
let n = ensure_numeric(name, &params[2], &args[2])?;

let re = get_or_compile_regex(&pattern)
.or_else(|_| bail!(params[0].span().error("invalid regex")))?;
let re = compile_regex_for_builtin(params[0].span(), &pattern)?;
Comment thread
anakrish marked this conversation as resolved.

if !n.is_integer() {
bail!(params[2].span().error("n must be an integer"));
Expand Down Expand Up @@ -147,11 +177,21 @@ fn find_n(span: &Span, params: &[Ref<Expr>], args: &[Value], _strict: bool) -> R
fn is_valid(span: &Span, params: &[Ref<Expr>], args: &[Value], _strict: bool) -> Result<Value> {
let name = "regex.is_valid";
ensure_args_count(span, name, params, args, 1)?;
Ok(
ensure_string(name, &params[0], &args[0]).map_or(Value::Bool(false), |p| {
Value::Bool(get_or_compile_regex(&p).is_ok())
}),
)
let pattern = match ensure_string(name, &params[0], &args[0]) {
Ok(p) => p,
Err(_) => return Ok(Value::Bool(false)),
};
match get_or_compile_regex(&pattern) {
Ok(_) => Ok(Value::Bool(true)),
// Size-limit exceeded is a resource-limit violation; propagate as hard error.
Err(regex::Error::CompiledTooBig(_)) => Err(anyhow::Error::new(
crate::utils::limits::LimitError::RegexSizeLimitExceeded {
limit: REGEX_SIZE_LIMIT,
},
)),
Comment thread
anakrish marked this conversation as resolved.
// Syntax errors mean the pattern is genuinely invalid.
Comment thread
anakrish marked this conversation as resolved.
Err(_) => Ok(Value::Bool(false)),
}
Comment thread
anakrish marked this conversation as resolved.
}

pub fn regex_match(
Expand All @@ -165,8 +205,7 @@ pub fn regex_match(
let pattern = ensure_string(name, &params[0], &args[0])?;
let value = ensure_string(name, &params[1], &args[1])?;

let re = get_or_compile_regex(&pattern)
.or_else(|_| bail!(params[0].span().error("invalid regex")))?;
let re = compile_regex_for_builtin(params[0].span(), &pattern)?;
Ok(Value::Bool(re.is_match(&value)))
}

Expand All @@ -185,6 +224,13 @@ fn regex_replace(

let re = match get_or_compile_regex(&pattern) {
Ok(p) => p,
Err(regex::Error::CompiledTooBig(_)) => {
return Err(anyhow::Error::new(
crate::utils::limits::LimitError::RegexSizeLimitExceeded {
limit: REGEX_SIZE_LIMIT,
},
));
}
Comment thread
anakrish marked this conversation as resolved.
Comment thread
anakrish marked this conversation as resolved.
// TODO: This behavior is due to OPA test not raising error. Should we raise error?
_ => return Ok(Value::Undefined),
Comment thread
anakrish marked this conversation as resolved.
};
Expand All @@ -198,8 +244,7 @@ fn regex_split(span: &Span, params: &[Ref<Expr>], args: &[Value], _strict: bool)
let pattern = ensure_string(name, &params[0], &args[0])?;
let value = ensure_string(name, &params[1], &args[1])?;

let re = get_or_compile_regex(&pattern)
.or_else(|_| bail!(params[0].span().error("invalid regex")))?;
let re = compile_regex_for_builtin(params[0].span(), &pattern)?;
Comment thread
anakrish marked this conversation as resolved.
Ok(Value::from_array(
re.split(&value)
.map(|s| {
Expand Down Expand Up @@ -242,8 +287,10 @@ fn regex_template_match(
}

// Fetch pattern, excluding delimiters.
let re = get_or_compile_regex(&template[start + delimiter_start.len()..end])
.or_else(|_| bail!(params[0].span().error("invalid regex")))?;
let re = compile_regex_for_builtin(
params[0].span(),
&template[start + delimiter_start.len()..end],
)?;
Comment thread
anakrish marked this conversation as resolved.

// Skip preceding literal in value.
value = &value[start..];
Expand Down
10 changes: 8 additions & 2 deletions src/interpreter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2405,8 +2405,14 @@ impl Interpreter {
self.compiled_policy.strict_builtin_errors,
) {
Ok(v) => v,
// Ignore errors if we are not evaluating in strict mode.
Err(_) if !self.compiled_policy.strict_builtin_errors => return Ok(Value::Undefined),
// Resource-limit errors must always propagate, even in non-strict
// mode, to prevent `not builtin(...)` from silently flipping to true.
Err(e) if !self.compiled_policy.strict_builtin_errors => {
if e.downcast_ref::<crate::LimitError>().is_some() {
return Err(e);
}
return Ok(Value::Undefined);
}
Comment thread
anakrish marked this conversation as resolved.
Comment thread
anakrish marked this conversation as resolved.
Err(e) => Err(e)?,
};

Expand Down
29 changes: 29 additions & 0 deletions src/rvm/vm/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ pub enum VmError {
#[error("Execution exceeded memory limit (usage={usage} bytes, limit={limit} bytes, pc={pc})")]
MemoryLimitExceeded { usage: u64, limit: u64, pc: usize },

#[error("Compiled regex exceeded size limit ({limit} bytes, pc={pc})")]
RegexSizeLimitExceeded { limit: usize, pc: usize },

#[error("Literal index {index} out of bounds (pc={pc})")]
LiteralIndexOutOfBounds { index: u16, pc: usize },

Expand Down Expand Up @@ -298,6 +301,32 @@ pub enum VmError {

impl From<anyhow::Error> for VmError {
fn from(err: anyhow::Error) -> Self {
// Preserve LimitError identity so that resource-limit violations are
// never silently swallowed to Undefined in non-strict mode.
// Note: pc is set to 0 because this conversion lacks instruction context.
// The error message itself (which includes the limit value) provides
// sufficient diagnostic information for users.
if let Some(limit_err) = err.downcast_ref::<crate::LimitError>() {
return match *limit_err {
crate::LimitError::TimeLimitExceeded { elapsed, limit } => {
VmError::TimeLimitExceeded {
elapsed,
limit,
pc: 0,
}
}
crate::LimitError::MemoryLimitExceeded { usage, limit } => {
VmError::MemoryLimitExceeded {
usage,
limit,
pc: 0,
}
}
crate::LimitError::RegexSizeLimitExceeded { limit } => {
VmError::RegexSizeLimitExceeded { limit, pc: 0 }
}
};
Comment thread
anakrish marked this conversation as resolved.
}
VmError::ArithmeticError {
message: alloc::format!("{}", err),
pc: 0,
Expand Down
10 changes: 9 additions & 1 deletion src/rvm/vm/execution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,15 @@ impl RegoVM {
Ok(())
}

fn handle_instruction_error(&mut self, _err: VmError, last_result: &mut Value) -> Result<bool> {
fn handle_instruction_error(&mut self, err: VmError, last_result: &mut Value) -> Result<bool> {
// Resource-limit errors must never be absorbed by rule evaluation.
// They represent engine-level constraints, not rule-level failures.
// Returning Ok(false) causes the caller to clear execution_stack and
// propagate the error, terminating the evaluation entirely.
if RegoVM::is_fatal_vm_error(&err) {
return Ok(false);
Comment thread
anakrish marked this conversation as resolved.
}

if let Some(frame) = self.execution_stack.pop() {
match frame.kind {
FrameKind::Rule(mut data) => {
Expand Down
11 changes: 10 additions & 1 deletion src/rvm/vm/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,16 @@ impl RegoVM {
self.strict_builtin_errors,
) {
Ok(value) => value,
Err(_) if !self.strict_builtin_errors => Value::Undefined,
// Resource-limit errors must always propagate, even in non-strict
// mode, to prevent `not builtin(...)` from silently flipping to true.
Err(e) if !self.strict_builtin_errors => {
if e.downcast_ref::<crate::LimitError>().is_some() {
self.dummy_exprs = dummy_exprs;
self.cached_builtin_args = args;
return Err(e.into());
}
Comment thread
anakrish marked this conversation as resolved.
Value::Undefined
}
Comment thread
anakrish marked this conversation as resolved.
Comment thread
anakrish marked this conversation as resolved.
Err(err) => {
self.dummy_exprs = dummy_exprs;
self.cached_builtin_args = args;
Expand Down
3 changes: 3 additions & 0 deletions src/rvm/vm/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,9 @@ impl RegoVM {
limit,
pc: self.pc,
},
LimitError::RegexSizeLimitExceeded { limit } => {
VmError::RegexSizeLimitExceeded { limit, pc: self.pc }
}
})
}

Expand Down
44 changes: 44 additions & 0 deletions src/rvm/vm/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,36 @@ use super::execution_model::{
use super::machine::RegoVM;

impl RegoVM {
/// Returns true if the error represents a resource-limit violation that
/// must never be silently absorbed by rule evaluation.
pub(super) const fn is_fatal_vm_error(err: &VmError) -> bool {
matches!(
err,
VmError::TimeLimitExceeded { .. }
| VmError::MemoryLimitExceeded { .. }
| VmError::RegexSizeLimitExceeded { .. }
| VmError::InstructionLimitExceeded { .. }
)
}

/// Restore VM state that was swapped out for rule execution.
/// Must be called before returning an error from `execute_rule_definitions_common`
/// to avoid leaving the VM in an inconsistent state.
fn restore_rule_state(
&mut self,
previous_loop_stack: &mut Vec<super::context::LoopContext>,
previous_comprehension_stack: &mut Vec<super::context::ComprehensionContext>,
) {
if let Some(restored_registers) = self.register_stack.pop() {
let mut current_register_window = Vec::default();
mem::swap(&mut current_register_window, &mut self.registers);
self.return_register_window(current_register_window);
self.registers = restored_registers;
}
mem::swap(&mut self.loop_stack, previous_loop_stack);
mem::swap(&mut self.comprehension_stack, previous_comprehension_stack);
}

pub(super) fn execute_rule_definitions_common(
&mut self,
rule_definitions: &[Vec<u32>],
Expand Down Expand Up @@ -79,6 +109,13 @@ impl RegoVM {
{
match self.jump_to(destructuring_entry_point) {
Ok(_result) => {}
Err(e) if Self::is_fatal_vm_error(&e) => {
self.restore_rule_state(
&mut previous_loop_stack,
&mut previous_comprehension_stack,
);
return Err(e);
}
Comment thread
anakrish marked this conversation as resolved.
Err(_e) => {
Comment thread
anakrish marked this conversation as resolved.
continue 'outer;
}
Expand Down Expand Up @@ -111,6 +148,13 @@ impl RegoVM {
// are treated as else-branches and must not be evaluated.
break;
}
Err(e) if Self::is_fatal_vm_error(&e) => {
self.restore_rule_state(
&mut previous_loop_stack,
&mut previous_comprehension_stack,
);
return Err(e);
}
Err(_e) => {}
}
}
Expand Down
12 changes: 12 additions & 0 deletions src/utils/limits/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ pub enum LimitError {
/// Configured memory ceiling in bytes.
limit: u64,
},
/// Reported when a compiled regex NFA exceeds the configured size limit.
RegexSizeLimitExceeded {
/// Configured compiled-NFA size ceiling in bytes.
limit: usize,
},
}

impl fmt::Debug for LimitError {
Expand All @@ -38,6 +43,10 @@ impl fmt::Debug for LimitError {
.field("usage", usage)
.field("limit", limit)
.finish(),
Self::RegexSizeLimitExceeded { limit } => f
.debug_struct("RegexSizeLimitExceeded")
.field("limit", limit)
.finish(),
}
}
}
Expand All @@ -61,6 +70,9 @@ impl fmt::Display for LimitError {
usage, limit
)
}
Self::RegexSizeLimitExceeded { limit } => {
write!(f, "compiled regex exceeded size limit ({} bytes)", limit)
Comment thread
anakrish marked this conversation as resolved.
}
}
}
}
Expand Down
Loading
Loading