diff --git a/.changeset/witty-ads-visit.md b/.changeset/witty-ads-visit.md new file mode 100644 index 00000000000..31df7813edc --- /dev/null +++ b/.changeset/witty-ads-visit.md @@ -0,0 +1,5 @@ +--- +'@rocket.chat/meteor': patch +--- + +Adds support for filtering bad words from messages for languages other than English diff --git a/apps/meteor/server/services/messages/hooks/BeforeSaveBadWords.ts b/apps/meteor/server/services/messages/hooks/BeforeSaveBadWords.ts index 92e0a57ac7f..b9240634023 100644 --- a/apps/meteor/server/services/messages/hooks/BeforeSaveBadWords.ts +++ b/apps/meteor/server/services/messages/hooks/BeforeSaveBadWords.ts @@ -1,18 +1,28 @@ import { type IMessage } from '@rocket.chat/core-typings'; +import { Logger } from '@rocket.chat/logger'; import type BadWordsFilter from 'bad-words'; export class BeforeSaveBadWords { badWords: BadWordsFilter | null = null; + badWordsRegex: RegExp | null = null; + + protected logger: Logger; + + constructor() { + this.logger = new Logger('BadWordsFilter'); + } + async configure(badWordsList?: string, goodWordsList?: string) { const { default: Filter } = await import('bad-words'); + const badWords = + badWordsList + ?.split(',') + .map((word) => word.trim()) + .filter(Boolean) || []; const options = { - list: - badWordsList - ?.split(',') - .map((word) => word.trim()) - .filter(Boolean) || [], + list: badWords, // library definition does not allow optional definition exclude: undefined, splitRegex: undefined, @@ -24,6 +34,13 @@ export class BeforeSaveBadWords { this.badWords = new Filter(options); + try { + this.badWordsRegex = new RegExp(`(?<=^|[\\p{P}\\p{Z}])(${badWords.join('|')})(?=$|[\\p{P}\\p{Z}])`, 'gmiu'); + } catch (error) { + this.badWordsRegex = null; + this.logger.error('Erorr when initializing bad words filter', error); + } + if (goodWordsList?.length) { this.badWords.removeWords(...goodWordsList.split(',').map((word) => word.trim())); } @@ -31,6 +48,7 @@ export class BeforeSaveBadWords { disable() { this.badWords = null; + this.badWordsRegex = null; } async filterBadWords({ message }: { message: IMessage }): Promise { @@ -42,6 +60,10 @@ export class BeforeSaveBadWords { message.msg = this.badWords.clean(message.msg); } catch (error) { // ignore + } finally { + if (this.badWordsRegex) { + message.msg = message.msg.replace(this.badWordsRegex, (match) => '*'.repeat(match.length)); + } } return message; diff --git a/apps/meteor/tests/unit/server/services/messages/hooks/BeforeSaveBadWords.tests.ts b/apps/meteor/tests/unit/server/services/messages/hooks/BeforeSaveBadWords.tests.ts index adaecc6cd2e..384a451ae05 100644 --- a/apps/meteor/tests/unit/server/services/messages/hooks/BeforeSaveBadWords.tests.ts +++ b/apps/meteor/tests/unit/server/services/messages/hooks/BeforeSaveBadWords.tests.ts @@ -80,4 +80,48 @@ describe('Filter bad words before saving message', () => { return expect(result.msg).to.be.equal('hell'); }); + + it('should filter non ascii bad words', async () => { + const badWords = new BeforeSaveBadWords(); + await badWords.configure('バカ'); + + const message = createMessage('hell is バカ'); + + const result = await badWords.filterBadWords({ message }); + + return expect(result.msg).to.be.equal('**** is **'); + }); + + it('should filter just the non ascii bad words', async () => { + const badWords = new BeforeSaveBadWords(); + await badWords.configure('バカ'); + + const message = createMessage('バカ'); + + const result = await badWords.filterBadWords({ message }); + + return expect(result.msg).to.be.equal('**'); + }); + + it('should filter non ascii bad words with punctuation', async () => { + const badWords = new BeforeSaveBadWords(); + await badWords.configure('バカ'); + + const message = createMessage('バカ.'); + + const result = await badWords.filterBadWords({ message }); + + return expect(result.msg).to.be.equal('**.'); + }); + + it('should not filter non ascii bad words, if part of another word ', async () => { + const badWords = new BeforeSaveBadWords(); + await badWords.configure('バカ'); + + const message = createMessage('TESTバカTEST'); + + const result = await badWords.filterBadWords({ message }); + + return expect(result.msg).to.be.equal('TESTバカTEST'); + }); });