Skip to content

Commit

Permalink
Adding 'single_word_only' option to obfuscate processor (#4476) (#4550)
Browse files Browse the repository at this point in the history
Adding 'single_word_only' option to obfuscate processor

Signed-off-by: Utkarsh Agarwal <[email protected]>
(cherry picked from commit 6d48efb)

Co-authored-by: Utkarsh Agarwal <[email protected]>
  • Loading branch information
opensearch-trigger-bot[bot] and Utkarsh-Aga authored May 16, 2024
1 parent 738936a commit 10b94f1
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 3 deletions.
2 changes: 2 additions & 0 deletions data-prepper-plugins/obfuscate-processor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,10 @@ Below are the list of configuration options.
the source field will be updated with obfuscated value.
* `patterns` - (optional) - A list of Regex patterns. You can define multiple patterns for the same field. Only the
parts that matched the Regex patterns to be obfuscated. If not provided, the full field will be obfuscated.
* `single_word_only` - (optional) - When set to `true`, a word boundary `\b` is added to the pattern, due to which obfuscation would be applied only to words that are standalone in the input text. By default, it is `false`, meaning obfuscation patterns are applied to all occurrences.
* `action` - (optional) - Obfuscation action, default to `mask`. Currently, `mask` is the only supported action.


### Configuration - Mask Action

There are some additional configuration options for Mask action.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class ObfuscationProcessor extends AbstractProcessor<Record<Event>, Recor

private final String source;
private final String target;
private final boolean singleWordOnly;

private final List<Pattern> patterns;
private final ObfuscationAction action;
Expand All @@ -60,6 +61,7 @@ public ObfuscationProcessor(final PluginMetrics pluginMetrics,
this.patterns = new ArrayList<>();
this.expressionEvaluator = expressionEvaluator;
this.obfuscationProcessorConfig = config;
this.singleWordOnly = config.getSingleWordOnly();

config.validateObfuscateWhen(expressionEvaluator);

Expand Down Expand Up @@ -90,6 +92,9 @@ public ObfuscationProcessor(final PluginMetrics pluginMetrics,
throw new InvalidPluginConfigurationException("Unable to find a predefined pattern for \"" + rawPattern + "\".");
}
}
if (singleWordOnly) {
rawPattern = "\\b" + rawPattern + "\\b";
}
try {
Pattern p = Pattern.compile(rawPattern);
patterns.add(p);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,19 @@ public class ObfuscationProcessorConfig {
@JsonProperty("tags_on_match_failure")
private List<String> tagsOnMatchFailure;

@JsonProperty("single_word_only")
private boolean singleWordOnly = false;

public ObfuscationProcessorConfig() {
}

public ObfuscationProcessorConfig(String source, List<String> patterns, String target, PluginModel action, List<String> tagsOnMatchFailure) {
public ObfuscationProcessorConfig(String source, List<String> patterns, String target, PluginModel action, List<String> tagsOnMatchFailure, boolean singleWordOnly) {
this.source = source;
this.patterns = patterns;
this.target = target;
this.action = action;
this.tagsOnMatchFailure = tagsOnMatchFailure;
this.singleWordOnly = singleWordOnly;
}

public String getSource() {
Expand All @@ -71,6 +75,10 @@ public List<String> getTagsOnMatchFailure() {
return tagsOnMatchFailure;
}

public boolean getSingleWordOnly() {
return singleWordOnly;
}

void validateObfuscateWhen(final ExpressionEvaluator expressionEvaluator) {
if (obfuscateWhen != null && !expressionEvaluator.isValidExpressionStatement(obfuscateWhen)) {
throw new InvalidPluginConfigurationException(String.format("obfuscate_when value %s is not a valid Data Prepper expression statement", obfuscateWhen));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,14 @@ private Record<Event> createRecord(String message) {

@BeforeEach
void setup() {
final ObfuscationProcessorConfig defaultConfig = new ObfuscationProcessorConfig("message", null, null, null, null);
final ObfuscationProcessorConfig defaultConfig = new ObfuscationProcessorConfig("message", null, null, null, null, false);
lenient().when(mockConfig.getSource()).thenReturn(defaultConfig.getSource());
lenient().when(mockConfig.getAction()).thenReturn(defaultConfig.getAction());
lenient().when(mockConfig.getPatterns()).thenReturn(defaultConfig.getPatterns());
lenient().when(mockConfig.getTarget()).thenReturn(defaultConfig.getTarget());
lenient().when(mockConfig.getObfuscateWhen()).thenReturn(null);
lenient().when(mockConfig.getTagsOnMatchFailure()).thenReturn(List.of(UUID.randomUUID().toString()));
lenient().when(mockConfig.getSingleWordOnly()).thenReturn(defaultConfig.getSingleWordOnly());
obfuscationProcessor = new ObfuscationProcessor(pluginMetrics, mockConfig, mockFactory, expressionEvaluator);
}

Expand Down Expand Up @@ -368,7 +369,7 @@ void testProcessorWithBaseNumberPattern(String message, String expected) {
Event data = editedRecords.get(0).getData();
assertThat(data.get("message", String.class), equalTo(expected));
}

@ParameterizedTest
@CsvSource({
"My email is [email protected],My email is ***",
Expand All @@ -388,6 +389,49 @@ void testProcessorWithMultiplePatterns(String message, String expected) {
assertThat(data.get("message", String.class), equalTo(expected));
}

@ParameterizedTest
@CsvSource({
"My email is [email protected],%{EMAIL_ADDRESS},My email is ***",
"testing this functionality, test, testing this functionality",
"test this functionality, test, *** this functionality",
"My IP is 1.1.1.1,%{IP_ADDRESS_V4},My IP is ***",
"fd55555069-e7a9-11ee4111111111111111,%{CREDIT_CARD_NUMBER},fd55555069-e7a9-11ee4111111111111111",
"4111111111111111,%{CREDIT_CARD_NUMBER},***",
"visa4111111111111111,%{CREDIT_CARD_NUMBER},visa4111111111111111"
})
void testProcessorWithSingleWordOnly(String message, String pattern, String expected) {
when(mockConfig.getSingleWordOnly()).thenReturn(true);
when(mockConfig.getPatterns()).thenReturn(List.of(pattern));
obfuscationProcessor = new ObfuscationProcessor(pluginMetrics, mockConfig, mockFactory, expressionEvaluator);

final Record<Event> record = createRecord(message);
final List<Record<Event>> editedRecords = (List<Record<Event>>) obfuscationProcessor.doExecute(Collections.singletonList(record));

assertThat(editedRecords.size(), equalTo(1));
Event data = editedRecords.get(0).getData();
assertThat(data.get("message", String.class), equalTo(expected));
}

@ParameterizedTest
@CsvSource({
"My email is [email protected],My email is ***",
"My IP is 1.1.1.1,My IP is ***",
"My IP is 1.1.1.1 and tracking id is fd55555069-e7a9-11ee4111111111111111,My IP is *** and tracking id is fd55555069-e7a9-11ee4111111111111111",
"My IP is 1.1.1.1 and credit card number is 4111111111111111,My IP is *** and credit card number is ***",
"My IP is 1.1.1.1 and credit card number is visa4111111111111111,My IP is *** and credit card number is visa4111111111111111"
})
void testProcessorWithMultiplePatternsWithSingleWordOnly(String message, String expected) {
when(mockConfig.getSingleWordOnly()).thenReturn(true);
when(mockConfig.getPatterns()).thenReturn(List.of("%{EMAIL_ADDRESS}", "%{IP_ADDRESS_V4}", "%{CREDIT_CARD_NUMBER}"));
obfuscationProcessor = new ObfuscationProcessor(pluginMetrics, mockConfig, mockFactory, expressionEvaluator);

final Record<Event> record = createRecord(message);
final List<Record<Event>> editedRecords = (List<Record<Event>>) obfuscationProcessor.doExecute(Collections.singletonList(record));

assertThat(editedRecords.size(), equalTo(1));
Event data = editedRecords.get(0).getData();
assertThat(data.get("message", String.class), equalTo(expected));
}

@Test
void testIsReadyForShutdown() {
Expand Down

0 comments on commit 10b94f1

Please sign in to comment.