Skip to content

Commit

Permalink
Handle apostrophes and other punctuation when muting words (#2344)
Browse files Browse the repository at this point in the history
Support muted words with apostrophes/punct
  • Loading branch information
estrattonbailey authored Mar 19, 2024
1 parent b434672 commit abc6f82
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 32 deletions.
5 changes: 5 additions & 0 deletions .changeset/big-houses-talk.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@atproto/api': patch
---

Support muting words that contain apostrophes and other punctuation
38 changes: 8 additions & 30 deletions packages/api/src/moderation/mutewords.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,38 +82,16 @@ export function hasMutedWord({
if (mutedWord === wordTrimmedPunctuation) return true
if (mutedWord.length > wordTrimmedPunctuation.length) continue

// handle hyphenated, slash separated words, etc
if (REGEX.SEPARATORS.test(wordTrimmedPunctuation)) {
// check against full normalized phrase
const wordNormalizedSeparators = wordTrimmedPunctuation.replace(
REGEX.SEPARATORS,
' ',
)
const mutedWordNormalizedSeparators = mutedWord.replace(
REGEX.SEPARATORS,
' ',
)
// hyphenated (or other sep) to spaced words
if (wordNormalizedSeparators === mutedWordNormalizedSeparators)
return true
if (/\p{P}+/u.test(wordTrimmedPunctuation)) {
const spacedWord = wordTrimmedPunctuation.replace(/\p{P}+/gu, ' ')
if (spacedWord === mutedWord) return true

/* Disabled for now e.g. `super-cool` to `supercool`
const wordNormalizedCompressed = wordNormalizedSeparators.replace(
REGEX.WORD_BOUNDARY,
'',
)
const mutedWordNormalizedCompressed =
mutedWordNormalizedSeparators.replace(/\s+?/g, '')
// hyphenated (or other sep) to non-hyphenated contiguous word
if (mutedWordNormalizedCompressed === wordNormalizedCompressed)
return true
*/
const contiguousWord = spacedWord.replace(/\s/gu, '')
if (contiguousWord === mutedWord) return true

// then individual parts of separated phrases/words
const wordParts = wordTrimmedPunctuation.split(REGEX.SEPARATORS)
for (const wp of wordParts) {
// still retain internal punctuation
if (wp === mutedWord) return true
const wordParts = wordTrimmedPunctuation.split(/\p{P}+/u)
for (const wordPart of wordParts) {
if (wordPart === mutedWord) return true
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions packages/api/tests/bsky-agent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1582,6 +1582,13 @@ describe('agent', () => {
expect(end.mutedWords.find((m) => m.value === '##️⃣')).toBeFalsy()
})

it(`apostrophe: Bluesky's`, async () => {
await agent.upsertMutedWords([{ value: `Bluesky's`, targets: [] }])
const { mutedWords } = (await agent.getPreferences()).moderationPrefs

expect(mutedWords.find((m) => m.value === `Bluesky's`)).toBeTruthy()
})

describe(`invalid characters`, () => {
it('zero width space', async () => {
const prev = (await agent.getPreferences()).moderationPrefs
Expand Down
82 changes: 80 additions & 2 deletions packages/api/tests/moderation-mutewords.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,22 @@ describe(`hasMutedWord`, () => {
expect(match).toBe(true)
})

it(`match: single char with length > 1 ☠︎`, () => {
const rt = new RichText({
text: `Idk why ☠︎ but maybe`,
})
rt.detectFacetsWithoutResolution()

const match = hasMutedWord({
mutedWords: [{ value: '☠︎', targets: ['content'] }],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})

expect(match).toBe(true)
})

it(`no match: long muted word, short post`, () => {
const rt = new RichText({
text: `hey`,
Expand Down Expand Up @@ -248,6 +264,57 @@ describe(`hasMutedWord`, () => {
})
})

describe(`apostrophes: Bluesky's`, () => {
const rt = new RichText({
text: `Yay, Bluesky's mutewords work`,
})
rt.detectFacetsWithoutResolution()

it(`match: Bluesky's`, () => {
const match = hasMutedWord({
mutedWords: [{ value: `Bluesky's`, targets: ['content'] }],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})

expect(match).toBe(true)
})

it(`match: Bluesky`, () => {
const match = hasMutedWord({
mutedWords: [{ value: 'Bluesky', targets: ['content'] }],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})

expect(match).toBe(true)
})

it(`match: bluesky`, () => {
const match = hasMutedWord({
mutedWords: [{ value: 'bluesky', targets: ['content'] }],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})

expect(match).toBe(true)
})

it(`match: blueskys`, () => {
const match = hasMutedWord({
mutedWords: [{ value: 'blueskys', targets: ['content'] }],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})

expect(match).toBe(true)
})
})

describe(`Why so S@assy?`, () => {
const rt = new RichText({
text: `Why so S@assy?`,
Expand Down Expand Up @@ -398,6 +465,17 @@ describe(`hasMutedWord`, () => {
expect(match).toBe(true)
})

it(`match: bad`, () => {
const match = hasMutedWord({
mutedWords: [{ value: `bad`, targets: ['content'] }],
text: rt.text,
facets: rt.facets,
outlineTags: [],
})

expect(match).toBe(true)
})

it(`match: super bad`, () => {
const match = hasMutedWord({
mutedWords: [{ value: `super bad`, targets: ['content'] }],
Expand All @@ -417,7 +495,7 @@ describe(`hasMutedWord`, () => {
outlineTags: [],
})

expect(match).toBe(false)
expect(match).toBe(true)
})
})

Expand Down Expand Up @@ -474,7 +552,7 @@ describe(`hasMutedWord`, () => {
outlineTags: [],
})

expect(match).toBe(false)
expect(match).toBe(true)
})
})

Expand Down

0 comments on commit abc6f82

Please sign in to comment.