Skip to content

Commit 94970f7

Browse files
committed
revert: going back to regex implem, but change to regex-lite
1 parent e93a573 commit 94970f7

File tree

9 files changed

+75
-148
lines changed

9 files changed

+75
-148
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datadog-remote-config/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ serde_with = "3"
4949

5050
# Test feature
5151
hyper-util = { workspace = true, features = ["service"], optional = true }
52+
regex-lite = "0.1.9"
5253

5354
[dev-dependencies]
5455
futures = "0.3"

datadog-remote-config/src/config/agent_task.rs

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,9 @@ use serde::Deserialize;
66
#[cfg(feature = "test")]
77
use serde::Serialize;
88

9+
use regex_lite::Regex;
910
use serde::de::{self, Deserializer};
1011

11-
fn is_valid_suffixed_case_id(s: &str) -> bool {
12-
let Some(rest) = s
13-
.strip_suffix("-with-debug")
14-
.or_else(|| s.strip_suffix("-with-content"))
15-
else {
16-
return false;
17-
};
18-
!rest.is_empty() && rest.bytes().all(|b| b.is_ascii_digit())
19-
}
20-
2112
fn deserialize_case_id<'de, D>(deserializer: D) -> Result<String, D::Error>
2213
where
2314
D: Deserializer<'de>,
@@ -32,7 +23,9 @@ where
3223
if s.chars().all(|c| c.is_ascii_digit()) {
3324
return Ok(s);
3425
}
35-
if is_valid_suffixed_case_id(&s) {
26+
let re = Regex::new(r"^\d+-(with-debug|with-content)$")
27+
.map_err(|_| de::Error::custom("Invalid case_id format"))?;
28+
if re.is_match(&s) {
3629
return Ok(s);
3730
}
3831
Err(de::Error::custom(

libdd-common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ static_assertions = "1.1.0"
4747
libc = "0.2"
4848
const_format = "0.2.34"
4949
nix = { version = "0.29", features = ["process"] }
50+
regex-lite = "0.1.9"
5051
[target.'cfg(windows)'.dependencies.windows-sys]
5152
version = "0.52"
5253
features = [

libdd-common/src/azure_app_services.rs

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
22
// SPDX-License-Identifier: Apache-2.0
33

4+
use regex_lite::Regex;
45
use std::env;
56
use std::sync::LazyLock;
67

@@ -103,23 +104,13 @@ impl AzureMetadata {
103104
}
104105

105106
fn extract_resource_group(s: Option<String>) -> Option<String> {
106-
// /.+\+(.+)-.+webspace(-Linux)?/
107-
let text = s.as_ref()?;
108-
let (before_plus, after_plus) = text.rsplit_once('+')?;
109-
if before_plus.is_empty() {
110-
return None;
111-
}
112-
let webspace_pos = after_plus.rfind("webspace")?;
113-
let before_webspace = &after_plus[..webspace_pos];
114-
let dash_pos = before_webspace.rfind('-')?;
115-
if dash_pos + 1 >= before_webspace.len() {
116-
return None;
117-
}
118-
let resource_group = &before_webspace[..dash_pos];
119-
if resource_group.is_empty() {
120-
return None;
121-
}
122-
Some(resource_group.to_string())
107+
#[allow(clippy::unwrap_used)]
108+
let re: Regex = Regex::new(r".+\+(.+)-.+webspace(-Linux)?").unwrap();
109+
110+
s.as_ref().and_then(|text| {
111+
re.captures(text)
112+
.and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()))
113+
})
123114
}
124115

125116
/*

libdd-common/src/entity_id/unix/container_id.rs

Lines changed: 26 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -3,85 +3,37 @@
33

44
//! This module provides functions to parse the container id from the cgroup file
55
use super::CgroupFileParsingError;
6+
use regex_lite::Regex;
67
use std::fs::File;
78
use std::io::{BufRead, BufReader};
89
use std::path::Path;
10+
use std::sync::LazyLock;
11+
12+
const UUID_SOURCE: &str =
13+
r"[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}";
14+
const CONTAINER_SOURCE: &str = r"[0-9a-f]{64}";
15+
const TASK_SOURCE: &str = r"[0-9a-f]{32}-\d+";
16+
17+
pub(crate) static LINE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
18+
#[allow(clippy::unwrap_used)]
19+
Regex::new(r"^\d+:[^:]*:(.+)$").unwrap()
20+
});
21+
22+
pub(crate) static CONTAINER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
23+
#[allow(clippy::unwrap_used)]
24+
Regex::new(&format!(
25+
r"({UUID_SOURCE}|{CONTAINER_SOURCE}|{TASK_SOURCE})(?:.scope)? *$"
26+
))
27+
.unwrap()
28+
});
929

10-
fn is_lowercase_hex(b: u8) -> bool {
11-
matches!(b, b'0'..=b'9' | b'a'..=b'f')
12-
}
13-
14-
/// Try to match `[0-9a-f]{64}` at the end of `s`.
15-
fn try_match_hex64(s: &str) -> Option<&str> {
16-
if s.len() < 64 {
17-
return None;
18-
}
19-
20-
let candidate = &s[s.len() - 64..];
21-
candidate.bytes().all(is_lowercase_hex).then_some(candidate)
22-
}
23-
24-
/// Try to match a UUID `[0-9a-f]{8}[-_][0-9a-f]{4}[-_]..[-_][0-9a-f]{12}` (36 chars) at the end.
25-
fn try_match_uuid(s: &str) -> Option<&str> {
26-
if s.len() < 36 {
27-
return None;
28-
}
29-
30-
let candidate = &s[s.len() - 36..];
31-
const TEMPLATE: &[u8; 36] = b"hhhhhhhh-hhhh-hhhh-hhhh-hhhhhhhhhhhh";
32-
candidate
33-
.as_bytes()
34-
.iter()
35-
.zip(TEMPLATE)
36-
.all(|(&c, &t)| match t {
37-
b'h' => is_lowercase_hex(c),
38-
b'-' => matches!(c, b'-' | b'_'),
39-
_ => false,
40-
})
41-
.then_some(candidate)
42-
}
43-
44-
/// Try to match `[0-9a-f]{32}-\d+` at the end of `s`.
45-
fn try_match_task_id(s: &str) -> Option<&str> {
46-
let (prefix, digits) = s.rsplit_once('-')?;
47-
if digits.is_empty() || !digits.bytes().all(|b| b.is_ascii_digit()) || prefix.len() < 32 {
48-
return None;
49-
}
50-
51-
let hex_start = prefix.len() - 32;
52-
prefix[hex_start..]
53-
.bytes()
54-
.all(is_lowercase_hex)
55-
.then_some(&s[hex_start..])
56-
}
57-
58-
/// Extract a container ID from a cgroup path, matching the pattern
59-
/// `(UUID|HEX64|TASK_ID)(?:.scope)? *$`
60-
pub(super) fn extract_container_id_from_path(path: &str) -> Option<&str> {
61-
let path = {
62-
let trimmed = path.trim_end();
63-
trimmed.strip_suffix(".scope").unwrap_or(trimmed)
64-
};
65-
66-
try_match_hex64(path)
67-
.or_else(|| try_match_uuid(path))
68-
.or_else(|| try_match_task_id(path))
69-
}
70-
71-
/// Parse a cgroup line (`^\d+:[^:]*:(.+)$`) and extract a container ID from the path component.
7230
fn parse_line(line: &str) -> Option<&str> {
73-
let mut parts = line.splitn(3, ':');
74-
let hierarchy_id = parts.next()?;
75-
let _controllers = parts.next()?;
76-
let path = parts.next()?;
77-
78-
if hierarchy_id.is_empty()
79-
|| !hierarchy_id.bytes().all(|b| b.is_ascii_digit())
80-
|| path.is_empty()
81-
{
82-
return None;
83-
}
84-
extract_container_id_from_path(path)
31+
// unwrap is OK since if regex matches then the groups must exist
32+
#[allow(clippy::unwrap_used)]
33+
LINE_REGEX
34+
.captures(line)
35+
.and_then(|captures| CONTAINER_REGEX.captures(captures.get(1).unwrap().as_str()))
36+
.map(|captures| captures.get(1).unwrap().as_str())
8537
}
8638

8739
/// Extract container id contained in the cgroup file located at `cgroup_path`

libdd-common/src/entity_id/unix/mod.rs

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -104,27 +104,15 @@ pub static ENTITY_ID: LazyLock<Option<&'static str>> = LazyLock::new(|| {
104104
#[cfg(test)]
105105
mod tests {
106106
use super::*;
107+
use regex_lite::Regex;
107108

108-
enum EntityIdKind {
109-
Inode,
110-
ContainerId,
111-
}
112-
113-
fn matches_entity_id_kind(entity_id: &str, kind: &EntityIdKind) -> bool {
114-
match kind {
115-
EntityIdKind::Inode => {
116-
entity_id.starts_with("in-")
117-
&& entity_id[3..].bytes().all(|b| b.is_ascii_digit())
118-
&& entity_id.len() > 3
119-
}
120-
EntityIdKind::ContainerId => {
121-
entity_id.starts_with("ci-")
122-
&& container_id::extract_container_id_from_path(&entity_id[3..]).is_some()
123-
}
124-
}
125-
}
109+
static IN_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"in-\d+").unwrap());
110+
static CI_REGEX: LazyLock<Regex> = LazyLock::new(|| {
111+
Regex::new(&format!(r"ci-{}", container_id::CONTAINER_REGEX.as_str())).unwrap()
112+
});
126113

127-
fn test_entity_id(filename: &str, expected_kind: Option<EntityIdKind>) {
114+
/// The following test can only be run in isolation because of caching behaviour
115+
fn test_entity_id(filename: &str, expected_result: Option<&Regex>) {
128116
let test_root_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/tests"));
129117

130118
let entity_id = compute_entity_id(
@@ -133,11 +121,12 @@ mod tests {
133121
test_root_dir.join("cgroup").as_path(),
134122
);
135123

136-
if let Some(kind) = expected_kind {
137-
let id = entity_id.as_deref().unwrap();
124+
if let Some(regex) = expected_result {
138125
assert!(
139-
matches_entity_id_kind(id, &kind),
140-
"testing get_entity_id with file {filename}: {id} did not match expected format",
126+
regex.is_match(entity_id.as_deref().unwrap()),
127+
"testing get_entity_id with file {}: {} is not matching the expected regex",
128+
filename,
129+
entity_id.as_deref().unwrap_or("None")
141130
);
142131
} else {
143132
assert_eq!(
@@ -150,19 +139,19 @@ mod tests {
150139
#[cfg_attr(miri, ignore)]
151140
#[test]
152141
fn test_entity_id_for_v2() {
153-
test_entity_id("cgroup.v2", Some(EntityIdKind::Inode))
142+
test_entity_id("cgroup.v2", Some(&*IN_REGEX))
154143
}
155144

156145
#[cfg_attr(miri, ignore)]
157146
#[test]
158147
fn test_entity_id_for_v1() {
159-
test_entity_id("cgroup.linux", Some(EntityIdKind::Inode))
148+
test_entity_id("cgroup.linux", Some(&*IN_REGEX))
160149
}
161150

162151
#[cfg_attr(miri, ignore)]
163152
#[test]
164153
fn test_entity_id_for_container_id() {
165-
test_entity_id("cgroup.docker", Some(EntityIdKind::ContainerId))
154+
test_entity_id("cgroup.docker", Some(&*CI_REGEX))
166155
}
167156

168157
#[cfg_attr(miri, ignore)]

libdd-trace-obfuscation/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ log = "0.4"
2020
libdd-trace-protobuf = { version = "1.1.0", path = "../libdd-trace-protobuf" }
2121
libdd-trace-utils = { version = "2.0.0", path = "../libdd-trace-utils" }
2222
libdd-common = { version = "2.0.0", path = "../libdd-common" }
23+
regex-lite = "0.1.9"
2324

2425
[dev-dependencies]
2526
duplicate = "0.4.1"

libdd-trace-obfuscation/src/ip_address.rs

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/
22
// SPDX-License-Identifier: Apache-2.0
33

4-
use std::{borrow::Cow, collections::HashSet, net::Ipv6Addr};
4+
use regex_lite::Regex;
5+
use std::{borrow::Cow, collections::HashSet, net::Ipv6Addr, sync::LazyLock};
56

67
const ALLOWED_IP_ADDRESSES: [&str; 5] = [
78
// localhost
@@ -14,21 +15,11 @@ const ALLOWED_IP_ADDRESSES: [&str; 5] = [
1415
"169.254.170.2",
1516
];
1617

17-
const PROTOCOL_PREFIXES: &[&str] = &["dnspoll", "ftp", "file", "http", "https"];
18-
19-
fn find_protocol_prefix(s: &str) -> Option<usize> {
20-
for &proto in PROTOCOL_PREFIXES {
21-
if let Some(rest) = s.strip_prefix(proto) {
22-
if rest.starts_with(":///") {
23-
return Some(proto.len() + 4);
24-
}
25-
if rest.starts_with("://") {
26-
return Some(proto.len() + 3);
27-
}
28-
}
29-
}
30-
None
31-
}
18+
const PREFIX_REGEX_LITERAL: &str = r"^((?:dnspoll|ftp|file|http|https):/{2,3})";
19+
static PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| {
20+
#[allow(clippy::unwrap_used)]
21+
Regex::new(PREFIX_REGEX_LITERAL).unwrap()
22+
});
3223

3324
/// Quantizes a comma separated list of hosts.
3425
///
@@ -96,10 +87,11 @@ fn quantize_ip(s: &str) -> Option<String> {
9687

9788
/// Split the ip prefix, can be either a provider specific prefix or a protocol
9889
fn split_prefix(s: &str) -> (&str, &str) {
90+
#[allow(clippy::unwrap_used)]
9991
if let Some(tail) = s.strip_prefix("ip-") {
10092
("ip-", tail)
101-
} else if let Some(end) = find_protocol_prefix(s) {
102-
s.split_at(end)
93+
} else if let Some(protocol) = PREFIX_REGEX.find(s) {
94+
s.split_at(protocol.end())
10395
} else {
10496
("", s)
10597
}
@@ -112,8 +104,12 @@ fn parse_ip(s: &str) -> Option<(&str, &str)> {
112104
match ch {
113105
'0'..='9' => continue,
114106
'.' | '-' | '_' => return parse_ip_v4(s, ch),
115-
':' | 'A'..='F' | 'a'..='f' if s.parse::<Ipv6Addr>().is_ok() => {
116-
return Some((s, ""));
107+
':' | 'A'..='F' | 'a'..='f' => {
108+
if s.parse::<Ipv6Addr>().is_ok() {
109+
return Some((s, ""));
110+
} else {
111+
return None;
112+
}
117113
}
118114
'[' => {
119115
// Parse IPv6 in [host]:port format

0 commit comments

Comments
 (0)