Handle apostrophes and other punctuation when muting words (#2344)

Support muted words with apostrophes/punct
bluesky-social · Mar 19, 2024 · abc6f82 · abc6f82
1 parent b434672
commit abc6f82
Show file tree

Hide file tree

Showing 4 changed files with 100 additions and 32 deletions.
diff --git a/.changeset/big-houses-talk.md b/.changeset/big-houses-talk.md
@@ -0,0 +1,5 @@
+---
+'@atproto/api': patch
+---
+
+Support muting words that contain apostrophes and other punctuation
diff --git a/packages/api/src/moderation/mutewords.ts b/packages/api/src/moderation/mutewords.ts
@@ -82,38 +82,16 @@ export function hasMutedWord({
       if (mutedWord === wordTrimmedPunctuation) return true
       if (mutedWord.length > wordTrimmedPunctuation.length) continue
 
-      // handle hyphenated, slash separated words, etc
-      if (REGEX.SEPARATORS.test(wordTrimmedPunctuation)) {
-        // check against full normalized phrase
-        const wordNormalizedSeparators = wordTrimmedPunctuation.replace(
-          REGEX.SEPARATORS,
-          ' ',
-        )
-        const mutedWordNormalizedSeparators = mutedWord.replace(
-          REGEX.SEPARATORS,
-          ' ',
-        )
-        // hyphenated (or other sep) to spaced words
-        if (wordNormalizedSeparators === mutedWordNormalizedSeparators)
-          return true
+      if (/\p{P}+/u.test(wordTrimmedPunctuation)) {
+        const spacedWord = wordTrimmedPunctuation.replace(/\p{P}+/gu, ' ')
+        if (spacedWord === mutedWord) return true
 
-        /* Disabled for now e.g. `super-cool` to `supercool`
-        const wordNormalizedCompressed = wordNormalizedSeparators.replace(
-          REGEX.WORD_BOUNDARY,
-          '',
-        )
-        const mutedWordNormalizedCompressed =
-          mutedWordNormalizedSeparators.replace(/\s+?/g, '')
-        // hyphenated (or other sep) to non-hyphenated contiguous word
-        if (mutedWordNormalizedCompressed === wordNormalizedCompressed)
-          return true
-        */
+        const contiguousWord = spacedWord.replace(/\s/gu, '')
+        if (contiguousWord === mutedWord) return true
 
-        // then individual parts of separated phrases/words
-        const wordParts = wordTrimmedPunctuation.split(REGEX.SEPARATORS)
-        for (const wp of wordParts) {
-          // still retain internal punctuation
-          if (wp === mutedWord) return true
+        const wordParts = wordTrimmedPunctuation.split(/\p{P}+/u)
+        for (const wordPart of wordParts) {
+          if (wordPart === mutedWord) return true
         }
       }
     }

diff --git a/packages/api/tests/bsky-agent.test.ts b/packages/api/tests/bsky-agent.test.ts
@@ -1582,6 +1582,13 @@ describe('agent', () => {
         expect(end.mutedWords.find((m) => m.value === '##️⃣')).toBeFalsy()
       })
 
+      it(`apostrophe: Bluesky's`, async () => {
+        await agent.upsertMutedWords([{ value: `Bluesky's`, targets: [] }])
+        const { mutedWords } = (await agent.getPreferences()).moderationPrefs
+
+        expect(mutedWords.find((m) => m.value === `Bluesky's`)).toBeTruthy()
+      })
+
       describe(`invalid characters`, () => {
         it('zero width space', async () => {
           const prev = (await agent.getPreferences()).moderationPrefs

diff --git a/packages/api/tests/moderation-mutewords.test.ts b/packages/api/tests/moderation-mutewords.test.ts
@@ -89,6 +89,22 @@ describe(`hasMutedWord`, () => {
       expect(match).toBe(true)
     })
 
+    it(`match: single char with length > 1 ☠︎`, () => {
+      const rt = new RichText({
+        text: `Idk why ☠︎ but maybe`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      const match = hasMutedWord({
+        mutedWords: [{ value: '☠︎', targets: ['content'] }],
+        text: rt.text,
+        facets: rt.facets,
+        outlineTags: [],
+      })
+
+      expect(match).toBe(true)
+    })
+
     it(`no match: long muted word, short post`, () => {
       const rt = new RichText({
         text: `hey`,
@@ -248,6 +264,57 @@ describe(`hasMutedWord`, () => {
       })
     })
 
+    describe(`apostrophes: Bluesky's`, () => {
+      const rt = new RichText({
+        text: `Yay, Bluesky's mutewords work`,
+      })
+      rt.detectFacetsWithoutResolution()
+
+      it(`match: Bluesky's`, () => {
+        const match = hasMutedWord({
+          mutedWords: [{ value: `Bluesky's`, targets: ['content'] }],
+          text: rt.text,
+          facets: rt.facets,
+          outlineTags: [],
+        })
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: Bluesky`, () => {
+        const match = hasMutedWord({
+          mutedWords: [{ value: 'Bluesky', targets: ['content'] }],
+          text: rt.text,
+          facets: rt.facets,
+          outlineTags: [],
+        })
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: bluesky`, () => {
+        const match = hasMutedWord({
+          mutedWords: [{ value: 'bluesky', targets: ['content'] }],
+          text: rt.text,
+          facets: rt.facets,
+          outlineTags: [],
+        })
+
+        expect(match).toBe(true)
+      })
+
+      it(`match: blueskys`, () => {
+        const match = hasMutedWord({
+          mutedWords: [{ value: 'blueskys', targets: ['content'] }],
+          text: rt.text,
+          facets: rt.facets,
+          outlineTags: [],
+        })
+
+        expect(match).toBe(true)
+      })
+    })
+
     describe(`Why so S@assy?`, () => {
       const rt = new RichText({
         text: `Why so S@assy?`,
@@ -398,6 +465,17 @@ describe(`hasMutedWord`, () => {
         expect(match).toBe(true)
       })
 
+      it(`match: bad`, () => {
+        const match = hasMutedWord({
+          mutedWords: [{ value: `bad`, targets: ['content'] }],
+          text: rt.text,
+          facets: rt.facets,
+          outlineTags: [],
+        })
+
+        expect(match).toBe(true)
+      })
+
       it(`match: super bad`, () => {
         const match = hasMutedWord({
           mutedWords: [{ value: `super bad`, targets: ['content'] }],
@@ -417,7 +495,7 @@ describe(`hasMutedWord`, () => {
           outlineTags: [],
         })
 
-        expect(match).toBe(false)
+        expect(match).toBe(true)
       })
     })
 
@@ -474,7 +552,7 @@ describe(`hasMutedWord`, () => {
           outlineTags: [],
         })
 
-        expect(match).toBe(false)
+        expect(match).toBe(true)
       })
     })