fix: Filtering non ascii bad words (#35418)

pull/35545/head^2
Yash Rajpal 10 months ago committed by GitHub
parent 721c132ba1
commit b9edd4837d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 5
      .changeset/witty-ads-visit.md
  2. 32
      apps/meteor/server/services/messages/hooks/BeforeSaveBadWords.ts
  3. 44
      apps/meteor/tests/unit/server/services/messages/hooks/BeforeSaveBadWords.tests.ts

@ -0,0 +1,5 @@
---
'@rocket.chat/meteor': patch
---
Adds support for filtering bad words from messages for languages other than English

@ -1,18 +1,28 @@
import { type IMessage } from '@rocket.chat/core-typings';
import { Logger } from '@rocket.chat/logger';
import type BadWordsFilter from 'bad-words';
export class BeforeSaveBadWords {
badWords: BadWordsFilter | null = null;
badWordsRegex: RegExp | null = null;
protected logger: Logger;
constructor() {
this.logger = new Logger('BadWordsFilter');
}
async configure(badWordsList?: string, goodWordsList?: string) {
const { default: Filter } = await import('bad-words');
const badWords =
badWordsList
?.split(',')
.map((word) => word.trim())
.filter(Boolean) || [];
const options = {
list:
badWordsList
?.split(',')
.map((word) => word.trim())
.filter(Boolean) || [],
list: badWords,
// library definition does not allow optional definition
exclude: undefined,
splitRegex: undefined,
@ -24,6 +34,13 @@ export class BeforeSaveBadWords {
this.badWords = new Filter(options);
try {
this.badWordsRegex = new RegExp(`(?<=^|[\\p{P}\\p{Z}])(${badWords.join('|')})(?=$|[\\p{P}\\p{Z}])`, 'gmiu');
} catch (error) {
this.badWordsRegex = null;
this.logger.error('Erorr when initializing bad words filter', error);
}
if (goodWordsList?.length) {
this.badWords.removeWords(...goodWordsList.split(',').map((word) => word.trim()));
}
@ -31,6 +48,7 @@ export class BeforeSaveBadWords {
disable() {
this.badWords = null;
this.badWordsRegex = null;
}
async filterBadWords({ message }: { message: IMessage }): Promise<IMessage> {
@ -42,6 +60,10 @@ export class BeforeSaveBadWords {
message.msg = this.badWords.clean(message.msg);
} catch (error) {
// ignore
} finally {
if (this.badWordsRegex) {
message.msg = message.msg.replace(this.badWordsRegex, (match) => '*'.repeat(match.length));
}
}
return message;

@ -80,4 +80,48 @@ describe('Filter bad words before saving message', () => {
return expect(result.msg).to.be.equal('hell');
});
it('should filter non ascii bad words', async () => {
const badWords = new BeforeSaveBadWords();
await badWords.configure('バカ');
const message = createMessage('hell is バカ');
const result = await badWords.filterBadWords({ message });
return expect(result.msg).to.be.equal('**** is **');
});
it('should filter just the non ascii bad words', async () => {
const badWords = new BeforeSaveBadWords();
await badWords.configure('バカ');
const message = createMessage('バカ');
const result = await badWords.filterBadWords({ message });
return expect(result.msg).to.be.equal('**');
});
it('should filter non ascii bad words with punctuation', async () => {
const badWords = new BeforeSaveBadWords();
await badWords.configure('バカ');
const message = createMessage('バカ.');
const result = await badWords.filterBadWords({ message });
return expect(result.msg).to.be.equal('**.');
});
it('should not filter non ascii bad words, if part of another word ', async () => {
const badWords = new BeforeSaveBadWords();
await badWords.configure('バカ');
const message = createMessage('TESTバカTEST');
const result = await badWords.filterBadWords({ message });
return expect(result.msg).to.be.equal('TESTバカTEST');
});
});

Loading…
Cancel
Save