Skip to content

Commit

Permalink
phone-search analyzer: don't emit sip/tel prefix
Browse files Browse the repository at this point in the history
in line with the previous two commits, this is something else the search
analyzer shouldn't emit since otherwise searching for any number with
such a prefix will match _any_ document with the same prefix.

Signed-off-by: Ralph Ursprung <[email protected]>
  • Loading branch information
rursprung committed Jan 10, 2025
1 parent ff3c8da commit ed0014f
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 4 deletions.
3 changes: 1 addition & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Always use `constant_score` query for `match_only_text` field ([#16964](https://github.com/opensearch-project/OpenSearch/pull/16964))
- Fix Shallow copy snapshot failures on closed index ([#16868](https://github.com/opensearch-project/OpenSearch/pull/16868))
- Fix multi-value sort for unsigned long ([#16732](https://github.com/opensearch-project/OpenSearch/pull/16732))
- The `phone-search` analyzer no longer emits the international calling code as a token ([#16993](https://github.com/opensearch-project/OpenSearch/pull/16993))
- The `phone-search` analyzer no longer emits extension numbers and unformatted input as a token ([#16993](https://github.com/opensearch-project/OpenSearch/pull/16993))
- The `phone-search` analyzer no longer emits the tel/sip prefix, international calling code, extension numbers and unformatted input as a token ([#16993](https://github.com/opensearch-project/OpenSearch/pull/16993))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ private Set<String> getTokens() throws IOException {

// Rip off the "tel:" or "sip:" prefix
if (input.indexOf("tel:") == 0 || input.indexOf("sip:") == 0) {
tokens.add(input.substring(0, 4));
if (addNgrams) {
tokens.add(input.substring(0, 4));
}
input = input.substring(4);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public void testEuropeDetailled() throws IOException {
* Test for all tokens which are emitted by the "phone" analyzer.
*/
public void testEuropeDetailledSearch() throws IOException {
assertTokensAreInAnyOrder(phoneSearchAnalyzer, "tel:+441344840400", Arrays.asList("tel:+441344840400", "tel:", "441344840400"));
assertTokensAreInAnyOrder(phoneSearchAnalyzer, "tel:+441344840400", Arrays.asList("tel:+441344840400", "441344840400"));
}

public void testEurope() throws IOException {
Expand Down Expand Up @@ -162,6 +162,10 @@ public void testTelPrefix() throws IOException {
assertTokensInclude("tel:+1228", Arrays.asList("1228", "122", "228"));
}

public void testTelPrefixSearch() throws IOException {
assertTokensInclude("tel:+1228", Arrays.asList("1228"));
}

public void testNumberPrefix() throws IOException {
assertTokensInclude("+1228", Arrays.asList("1228", "122", "228"));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@
index: test
id: 5
body: { "phone": "+1-888-280-4331", "phone-ch": "+1-888-280-4331" }
- do:
index:
index: test
id: 6
body: { "phone": "tel:+441344840400", "phone-ch": "tel:+441344840400" }
- do:
indices.refresh: {}

Expand Down Expand Up @@ -155,3 +160,36 @@
match:
"phone": "888 280 4331"
- match: { hits.total: 0 }

# document & search have a tel: prefix
- do:
search:
rest_total_hits_as_int: true
index: test
body:
query:
match:
"phone": "tel:+441344840400"
- match: { hits.total: 1 }

# only document has a tel: prefix
- do:
search:
rest_total_hits_as_int: true
index: test
body:
query:
match:
"phone": "+441344840400"
- match: { hits.total: 1 }

# only search has a tel: prefix
- do:
search:
rest_total_hits_as_int: true
index: test
body:
query:
match:
"phone": "tel:+1 888 280 4331"
- match: { hits.total: 1 }

0 comments on commit ed0014f

Please sign in to comment.