From 10b94f1313349e20393312ea4f11a6271ba57a7c Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 16 May 2024 09:37:36 -0700 Subject: [PATCH] Adding 'single_word_only' option to obfuscate processor (#4476) (#4550) Adding 'single_word_only' option to obfuscate processor Signed-off-by: Utkarsh Agarwal (cherry picked from commit 6d48efba0d71ae0e0674b65ef33ce492b8b215ee) Co-authored-by: Utkarsh Agarwal <126544832+Utkarsh-Aga@users.noreply.github.com> --- .../obfuscate-processor/README.md | 2 + .../obfuscation/ObfuscationProcessor.java | 5 ++ .../ObfuscationProcessorConfig.java | 10 +++- .../obfuscation/ObfuscationProcessorTest.java | 48 ++++++++++++++++++- 4 files changed, 62 insertions(+), 3 deletions(-) diff --git a/data-prepper-plugins/obfuscate-processor/README.md b/data-prepper-plugins/obfuscate-processor/README.md index e365acfea1..8e48582cf1 100644 --- a/data-prepper-plugins/obfuscate-processor/README.md +++ b/data-prepper-plugins/obfuscate-processor/README.md @@ -63,8 +63,10 @@ Below are the list of configuration options. the source field will be updated with obfuscated value. * `patterns` - (optional) - A list of Regex patterns. You can define multiple patterns for the same field. Only the parts that matched the Regex patterns to be obfuscated. If not provided, the full field will be obfuscated. +* `single_word_only` - (optional) - When set to `true`, a word boundary `\b` is added to the pattern, due to which obfuscation would be applied only to words that are standalone in the input text. By default, it is `false`, meaning obfuscation patterns are applied to all occurrences. * `action` - (optional) - Obfuscation action, default to `mask`. Currently, `mask` is the only supported action. + ### Configuration - Mask Action There are some additional configuration options for Mask action. diff --git a/data-prepper-plugins/obfuscate-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessor.java b/data-prepper-plugins/obfuscate-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessor.java index 21167bc747..bbb1a1600a 100644 --- a/data-prepper-plugins/obfuscate-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessor.java +++ b/data-prepper-plugins/obfuscate-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessor.java @@ -42,6 +42,7 @@ public class ObfuscationProcessor extends AbstractProcessor, Recor private final String source; private final String target; + private final boolean singleWordOnly; private final List patterns; private final ObfuscationAction action; @@ -60,6 +61,7 @@ public ObfuscationProcessor(final PluginMetrics pluginMetrics, this.patterns = new ArrayList<>(); this.expressionEvaluator = expressionEvaluator; this.obfuscationProcessorConfig = config; + this.singleWordOnly = config.getSingleWordOnly(); config.validateObfuscateWhen(expressionEvaluator); @@ -90,6 +92,9 @@ public ObfuscationProcessor(final PluginMetrics pluginMetrics, throw new InvalidPluginConfigurationException("Unable to find a predefined pattern for \"" + rawPattern + "\"."); } } + if (singleWordOnly) { + rawPattern = "\\b" + rawPattern + "\\b"; + } try { Pattern p = Pattern.compile(rawPattern); patterns.add(p); diff --git a/data-prepper-plugins/obfuscate-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessorConfig.java b/data-prepper-plugins/obfuscate-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessorConfig.java index 56defb6baf..b99753bc9f 100644 --- a/data-prepper-plugins/obfuscate-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessorConfig.java +++ b/data-prepper-plugins/obfuscate-processor/src/main/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessorConfig.java @@ -36,15 +36,19 @@ public class ObfuscationProcessorConfig { @JsonProperty("tags_on_match_failure") private List tagsOnMatchFailure; + @JsonProperty("single_word_only") + private boolean singleWordOnly = false; + public ObfuscationProcessorConfig() { } - public ObfuscationProcessorConfig(String source, List patterns, String target, PluginModel action, List tagsOnMatchFailure) { + public ObfuscationProcessorConfig(String source, List patterns, String target, PluginModel action, List tagsOnMatchFailure, boolean singleWordOnly) { this.source = source; this.patterns = patterns; this.target = target; this.action = action; this.tagsOnMatchFailure = tagsOnMatchFailure; + this.singleWordOnly = singleWordOnly; } public String getSource() { @@ -71,6 +75,10 @@ public List getTagsOnMatchFailure() { return tagsOnMatchFailure; } + public boolean getSingleWordOnly() { + return singleWordOnly; + } + void validateObfuscateWhen(final ExpressionEvaluator expressionEvaluator) { if (obfuscateWhen != null && !expressionEvaluator.isValidExpressionStatement(obfuscateWhen)) { throw new InvalidPluginConfigurationException(String.format("obfuscate_when value %s is not a valid Data Prepper expression statement", obfuscateWhen)); diff --git a/data-prepper-plugins/obfuscate-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessorTest.java b/data-prepper-plugins/obfuscate-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessorTest.java index b29ad3b0f4..be35b2cf01 100644 --- a/data-prepper-plugins/obfuscate-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessorTest.java +++ b/data-prepper-plugins/obfuscate-processor/src/test/java/org/opensearch/dataprepper/plugins/processor/obfuscation/ObfuscationProcessorTest.java @@ -76,13 +76,14 @@ private Record createRecord(String message) { @BeforeEach void setup() { - final ObfuscationProcessorConfig defaultConfig = new ObfuscationProcessorConfig("message", null, null, null, null); + final ObfuscationProcessorConfig defaultConfig = new ObfuscationProcessorConfig("message", null, null, null, null, false); lenient().when(mockConfig.getSource()).thenReturn(defaultConfig.getSource()); lenient().when(mockConfig.getAction()).thenReturn(defaultConfig.getAction()); lenient().when(mockConfig.getPatterns()).thenReturn(defaultConfig.getPatterns()); lenient().when(mockConfig.getTarget()).thenReturn(defaultConfig.getTarget()); lenient().when(mockConfig.getObfuscateWhen()).thenReturn(null); lenient().when(mockConfig.getTagsOnMatchFailure()).thenReturn(List.of(UUID.randomUUID().toString())); + lenient().when(mockConfig.getSingleWordOnly()).thenReturn(defaultConfig.getSingleWordOnly()); obfuscationProcessor = new ObfuscationProcessor(pluginMetrics, mockConfig, mockFactory, expressionEvaluator); } @@ -368,7 +369,7 @@ void testProcessorWithBaseNumberPattern(String message, String expected) { Event data = editedRecords.get(0).getData(); assertThat(data.get("message", String.class), equalTo(expected)); } - + @ParameterizedTest @CsvSource({ "My email is abc@test.com,My email is ***", @@ -388,6 +389,49 @@ void testProcessorWithMultiplePatterns(String message, String expected) { assertThat(data.get("message", String.class), equalTo(expected)); } + @ParameterizedTest + @CsvSource({ + "My email is abc@test.com,%{EMAIL_ADDRESS},My email is ***", + "testing this functionality, test, testing this functionality", + "test this functionality, test, *** this functionality", + "My IP is 1.1.1.1,%{IP_ADDRESS_V4},My IP is ***", + "fd55555069-e7a9-11ee4111111111111111,%{CREDIT_CARD_NUMBER},fd55555069-e7a9-11ee4111111111111111", + "4111111111111111,%{CREDIT_CARD_NUMBER},***", + "visa4111111111111111,%{CREDIT_CARD_NUMBER},visa4111111111111111" + }) + void testProcessorWithSingleWordOnly(String message, String pattern, String expected) { + when(mockConfig.getSingleWordOnly()).thenReturn(true); + when(mockConfig.getPatterns()).thenReturn(List.of(pattern)); + obfuscationProcessor = new ObfuscationProcessor(pluginMetrics, mockConfig, mockFactory, expressionEvaluator); + + final Record record = createRecord(message); + final List> editedRecords = (List>) obfuscationProcessor.doExecute(Collections.singletonList(record)); + + assertThat(editedRecords.size(), equalTo(1)); + Event data = editedRecords.get(0).getData(); + assertThat(data.get("message", String.class), equalTo(expected)); + } + + @ParameterizedTest + @CsvSource({ + "My email is abc@test.com,My email is ***", + "My IP is 1.1.1.1,My IP is ***", + "My IP is 1.1.1.1 and tracking id is fd55555069-e7a9-11ee4111111111111111,My IP is *** and tracking id is fd55555069-e7a9-11ee4111111111111111", + "My IP is 1.1.1.1 and credit card number is 4111111111111111,My IP is *** and credit card number is ***", + "My IP is 1.1.1.1 and credit card number is visa4111111111111111,My IP is *** and credit card number is visa4111111111111111" + }) + void testProcessorWithMultiplePatternsWithSingleWordOnly(String message, String expected) { + when(mockConfig.getSingleWordOnly()).thenReturn(true); + when(mockConfig.getPatterns()).thenReturn(List.of("%{EMAIL_ADDRESS}", "%{IP_ADDRESS_V4}", "%{CREDIT_CARD_NUMBER}")); + obfuscationProcessor = new ObfuscationProcessor(pluginMetrics, mockConfig, mockFactory, expressionEvaluator); + + final Record record = createRecord(message); + final List> editedRecords = (List>) obfuscationProcessor.doExecute(Collections.singletonList(record)); + + assertThat(editedRecords.size(), equalTo(1)); + Event data = editedRecords.get(0).getData(); + assertThat(data.get("message", String.class), equalTo(expected)); + } @Test void testIsReadyForShutdown() {