From b252d69909bc899507ee201407cb3c44d7c2d969 Mon Sep 17 00:00:00 2001 From: gabriellsh <40830821+gabriellsh@users.noreply.github.com> Date: Mon, 2 Oct 2023 11:12:36 -0300 Subject: [PATCH] refactor: Oembed backend (#30228) --- apps/meteor/app/oembed/server/providers.ts | 56 ++---- apps/meteor/app/oembed/server/server.ts | 191 +++++++++------------ apps/meteor/lib/callbacks.ts | 20 +-- packages/core-typings/src/IOembed.ts | 9 +- 4 files changed, 107 insertions(+), 169 deletions(-) diff --git a/apps/meteor/app/oembed/server/providers.ts b/apps/meteor/app/oembed/server/providers.ts index e80e456c679..d2d0f85d19c 100644 --- a/apps/meteor/app/oembed/server/providers.ts +++ b/apps/meteor/app/oembed/server/providers.ts @@ -1,9 +1,5 @@ -import QueryString from 'querystring'; -import URL from 'url'; - -import type { OEmbedMeta, OEmbedUrlContent, ParsedUrl, OEmbedProvider } from '@rocket.chat/core-typings'; +import type { OEmbedMeta, OEmbedUrlContent, OEmbedProvider } from '@rocket.chat/core-typings'; import { camelCase } from 'change-case'; -import _ from 'underscore'; import { callbacks } from '../../../lib/callbacks'; import { SystemLogger } from '../../../server/lib/logger/system'; @@ -16,10 +12,10 @@ class Providers { } static getConsumerUrl(provider: OEmbedProvider, url: string): string { - const urlObj = new URL.URL(provider.endPoint); + const urlObj = new URL(provider.endPoint); urlObj.searchParams.set('url', url); - return URL.format(urlObj); + return urlObj.toString(); } registerProvider(provider: OEmbedProvider): number { @@ -95,25 +91,20 @@ providers.registerProvider({ callbacks.add( 'oembed:beforeGetUrlContent', (data) => { - if (data.parsedUrl != null) { - const url = URL.format(data.parsedUrl); - const provider = providers.getProviderForUrl(url); - if (provider != null) { - const consumerUrl = Providers.getConsumerUrl(provider, url); - - const parsedConsumerUrl = URL.parse(consumerUrl, true); - _.extend(data.parsedUrl, parsedConsumerUrl); - - data.urlObj.port = parsedConsumerUrl.port; - data.urlObj.hostname = parsedConsumerUrl.hostname; - data.urlObj.pathname = parsedConsumerUrl.pathname; - data.urlObj.query = parsedConsumerUrl.query; - - delete data.urlObj.search; - delete data.urlObj.host; - } + if (!data.urlObj) { + return data; } - return data; + + const url = data.urlObj.toString(); + const provider = providers.getProviderForUrl(url); + + if (!provider) { + return data; + } + + const consumerUrl = Providers.getConsumerUrl(provider, url); + + return { ...data, urlObj: new URL(consumerUrl) }; }, callbacks.priority.MEDIUM, 'oembed-providers-before', @@ -123,13 +114,11 @@ const cleanupOembed = (data: { url: string; meta: OEmbedMeta; headers: { [k: string]: string }; - parsedUrl: ParsedUrl; content: OEmbedUrlContent; }): { url: string; meta: Omit; headers: { [k: string]: string }; - parsedUrl: ParsedUrl; content: OEmbedUrlContent; } => { if (!data?.meta) { @@ -148,24 +137,17 @@ const cleanupOembed = (data: { callbacks.add( 'oembed:afterParseContent', (data) => { - if (!data?.url || !data.content?.body || !data.parsedUrl?.query) { + if (!data?.url || !data.content?.body) { return cleanupOembed(data); } - const queryString = typeof data.parsedUrl.query === 'string' ? QueryString.parse(data.parsedUrl.query) : data.parsedUrl.query; - - if (!queryString.url) { - return cleanupOembed(data); - } + const provider = providers.getProviderForUrl(data.url); - const { url: originalUrl } = data; - const provider = providers.getProviderForUrl(originalUrl); if (!provider) { return cleanupOembed(data); } - const { url } = queryString; - data.meta.oembedUrl = url; + data.meta.oembedUrl = data.url; try { const metas = JSON.parse(data.content.body); diff --git a/apps/meteor/app/oembed/server/server.ts b/apps/meteor/app/oembed/server/server.ts index 256722cdd3d..79de0402043 100644 --- a/apps/meteor/app/oembed/server/server.ts +++ b/apps/meteor/app/oembed/server/server.ts @@ -1,6 +1,3 @@ -import querystring from 'querystring'; -import URL from 'url'; - import type { OEmbedUrlContentResult, OEmbedUrlWithMetadata, IMessage, MessageAttachment, OEmbedMeta } from '@rocket.chat/core-typings'; import { isOEmbedUrlContentResult, isOEmbedUrlWithMetadata } from '@rocket.chat/core-typings'; import { Logger } from '@rocket.chat/logger'; @@ -11,7 +8,6 @@ import he from 'he'; import iconv from 'iconv-lite'; import ipRangeCheck from 'ip-range-check'; import jschardet from 'jschardet'; -import _ from 'underscore'; import { callbacks } from '../../../lib/callbacks'; import { isURL } from '../../../lib/utils/isURL'; @@ -62,14 +58,7 @@ const toUtf8 = function (contentType: string, body: Buffer): string { return iconv.decode(body, getCharset(contentType, body)); }; -const getUrlContent = async function (urlObjStr: string | URL.UrlWithStringQuery, redirectCount = 5): Promise { - let urlObj: URL.UrlWithStringQuery; - if (typeof urlObjStr === 'string') { - urlObj = URL.parse(urlObjStr); - } else { - urlObj = urlObjStr; - } - +const getUrlContent = async (urlObj: URL, redirectCount = 5): Promise => { const portsProtocol = new Map( Object.entries({ 80: 'http:', @@ -78,34 +67,28 @@ const getUrlContent = async function (urlObjStr: string | URL.UrlWithStringQuery }), ); - const parsedUrl = _.pick(urlObj, ['host', 'hash', 'pathname', 'protocol', 'port', 'query', 'search', 'hostname']); const ignoredHosts = settings.get('API_EmbedIgnoredHosts').replace(/\s/g, '').split(',') || []; - if (parsedUrl.hostname && (ignoredHosts.includes(parsedUrl.hostname) || ipRangeCheck(parsedUrl.hostname, ignoredHosts))) { + if (urlObj.hostname && (ignoredHosts.includes(urlObj.hostname) || ipRangeCheck(urlObj.hostname, ignoredHosts))) { throw new Error('invalid host'); } const safePorts = settings.get('API_EmbedSafePorts').replace(/\s/g, '').split(',') || []; - if (safePorts.length > 0 && parsedUrl.port && !safePorts.includes(parsedUrl.port)) { + // checks if the URL port is in the safe ports list + if (safePorts.length > 0 && urlObj.port && !safePorts.includes(urlObj.port)) { throw new Error('invalid/unsafe port'); } - if (safePorts.length > 0 && !parsedUrl.port && !safePorts.some((port) => portsProtocol.get(port) === parsedUrl.protocol)) { + // if port is not detected, use protocol to verify instead + if (safePorts.length > 0 && !urlObj.port && !safePorts.some((port) => portsProtocol.get(port) === urlObj.protocol)) { throw new Error('invalid/unsafe port'); } const data = await callbacks.run('oembed:beforeGetUrlContent', { urlObj, - parsedUrl, }); - /* This prop is neither passed or returned by the callback, so I'll just comment it for now - if (data.attachments != null) { - return data; - } */ - - const url = URL.format(data.urlObj); - + const url = data.urlObj.toString(); const sizeLimit = 250000; log.debug(`Fetching ${url} following redirects ${redirectCount} times`); @@ -137,10 +120,10 @@ const getUrlContent = async function (urlObjStr: string | URL.UrlWithStringQuery log.debug('Obtained response from server with length of', totalSize); const buffer = Buffer.concat(chunks); + return { headers: Object.fromEntries(response.headers), body: toUtf8(response.headers.get('content-type') || 'text/plain', buffer), - parsedUrl, statusCode: response.status, }; }; @@ -150,19 +133,13 @@ const getUrlMeta = async function ( withFragment?: boolean, ): Promise { log.debug('Obtaining metadata for URL', url); - const urlObj = URL.parse(url); - if (withFragment != null) { - const queryStringObj = querystring.parse(urlObj.query || ''); - queryStringObj._escaped_fragment_ = ''; - urlObj.query = querystring.stringify(queryStringObj); - let path = urlObj.pathname; - if (urlObj.query != null) { - path += `?${urlObj.query}`; - urlObj.search = `?${urlObj.query}`; - } - urlObj.path = path; + const urlObj = new URL(url); + + if (withFragment) { + urlObj.searchParams.set('_escaped_fragment_', ''); } - log.debug('Fetching url content', urlObj.path); + + log.debug('Fetching url content', urlObj.toString()); let content: OEmbedUrlContentResult | undefined; try { content = await getUrlContent(urlObj, 5); @@ -174,7 +151,7 @@ const getUrlMeta = async function ( return; } - if (content.attachments != null) { + if (content.attachments) { return content; } @@ -221,7 +198,6 @@ const getUrlMeta = async function ( url, meta: metas, headers, - parsedUrl: content.parsedUrl, content, }); }; @@ -233,38 +209,25 @@ const getUrlMetaWithCache = async function ( log.debug('Getting oembed metadata for', url); const cache = await OEmbedCache.findOneById(url); - if (cache != null) { + if (cache) { log.debug('Found oembed metadata in cache for', url); return cache.data; } + const data = await getUrlMeta(url, withFragment); - if (data != null) { - try { - log.debug('Saving oembed metadata in cache for', url); - await OEmbedCache.createWithIdAndData(url, data); - } catch (_error) { - log.error({ msg: 'OEmbed duplicated record', url }); - } - return data; - } -}; -const hasOnlyContentLength = (obj: any): obj is { contentLength: string } => 'contentLength' in obj && Object.keys(obj).length === 1; -const hasOnlyContentType = (obj: any): obj is { contentType: string } => 'contentType' in obj && Object.keys(obj).length === 1; -const hasContentLengthAndContentType = (obj: any): obj is { contentLength: string; contentType: string } => - 'contentLength' in obj && 'contentType' in obj && Object.keys(obj).length === 2; - -const getRelevantHeaders = function (headersObj: { - [key: string]: string; -}): { contentLength: string } | { contentType: string } | { contentLength: string; contentType: string } | void { - const headers = { - ...(headersObj.contentLength && { contentLength: headersObj.contentLength }), - ...(headersObj.contentType && { contentType: headersObj.contentType }), - }; + if (!data) { + return; + } - if (hasOnlyContentLength(headers) || hasOnlyContentType(headers) || hasContentLengthAndContentType(headers)) { - return headers; + try { + log.debug('Saving oembed metadata in cache for', url); + await OEmbedCache.createWithIdAndData(url, data); + } catch (_error) { + log.error({ msg: 'OEmbed duplicated record', url }); } + + return data; }; const getRelevantMetaTags = function (metaObj: OEmbedMeta): Record | void { @@ -286,57 +249,71 @@ const insertMaxWidthInOembedHtml = (oembedHtml?: string): string | undefined => const rocketUrlParser = async function (message: IMessage): Promise { log.debug('Parsing message URLs'); - if (Array.isArray(message.urls)) { - log.debug('URLs found', message.urls.length); - - if ( - (message.attachments && message.attachments.length > 0) || - message.urls.filter((item) => !item.url.includes(settings.get('Site_Url'))).length > MAX_EXTERNAL_URL_PREVIEWS - ) { - log.debug('All URL ignored'); - return message; + + if (!Array.isArray(message.urls)) { + return message; + } + + log.debug('URLs found', message.urls.length); + + if ( + (message.attachments && message.attachments.length > 0) || + message.urls.filter((item) => !item.url.includes(settings.get('Site_Url'))).length > MAX_EXTERNAL_URL_PREVIEWS + ) { + log.debug('All URL ignored'); + return message; + } + + const attachments: MessageAttachment[] = []; + + let changed = false; + for await (const item of message.urls) { + if (item.ignoreParse === true) { + log.debug('URL ignored', item.url); + continue; } - const attachments: MessageAttachment[] = []; + if (!isURL(item.url)) { + continue; + } - let changed = false; - for await (const item of message.urls) { - if (item.ignoreParse === true) { - log.debug('URL ignored', item.url); - continue; - } - if (!isURL(item.url)) { - continue; - } - const data = await getUrlMetaWithCache(item.url); - if (data != null) { - if (isOEmbedUrlContentResult(data) && data.attachments) { - attachments.push(...data.attachments); - break; - } - if (isOEmbedUrlWithMetadata(data) && data.meta != null) { - item.meta = getRelevantMetaTags(data.meta) || {}; - if (item.meta?.oembedHtml) { - item.meta.oembedHtml = insertMaxWidthInOembedHtml(item.meta.oembedHtml) || ''; - } - } - if (data.headers != null) { - const headers = getRelevantHeaders(data.headers); - if (headers) { - item.headers = headers; - } - } - item.parsedUrl = data.parsedUrl; - changed = true; + const data = await getUrlMetaWithCache(item.url); + + if (!data) { + continue; + } + + if (isOEmbedUrlContentResult(data) && data.attachments) { + attachments.push(...data.attachments); + break; + } + + if (isOEmbedUrlWithMetadata(data) && data.meta) { + item.meta = getRelevantMetaTags(data.meta) || {}; + if (item.meta?.oembedHtml) { + item.meta.oembedHtml = insertMaxWidthInOembedHtml(item.meta.oembedHtml) || ''; } } - if (attachments.length > 0) { - await Messages.setMessageAttachments(message._id, attachments); + + if (data.headers?.contentLength) { + item.headers = { ...item.headers, contentLength: data.headers.contentLength }; } - if (changed === true) { - await Messages.setUrlsById(message._id, message.urls); + + if (data.headers?.contentType) { + item.headers = { ...item.headers, contentType: data.headers.contentType }; } + + changed = true; } + + if (attachments.length) { + await Messages.setMessageAttachments(message._id, attachments); + } + + if (changed === true) { + await Messages.setUrlsById(message._id, message.urls); + } + return message; }; diff --git a/apps/meteor/lib/callbacks.ts b/apps/meteor/lib/callbacks.ts index 46a27357f54..9c7333a355b 100644 --- a/apps/meteor/lib/callbacks.ts +++ b/apps/meteor/lib/callbacks.ts @@ -1,5 +1,3 @@ -import type { UrlWithParsedQuery } from 'url'; - import type { IMessage, IRoom, @@ -10,7 +8,6 @@ import type { ILivechatInquiryRecord, ILivechatVisitor, VideoConference, - ParsedUrl, OEmbedMeta, OEmbedUrlContent, Username, @@ -167,24 +164,13 @@ type ChainedCallbackSignatures = { BusinessHourBehaviorClass: { new (): IBusinessHourBehavior }; }; 'renderMessage': (message: T) => T; - 'oembed:beforeGetUrlContent': (data: { - urlObj: Omit & { host?: unknown; search?: unknown }; - parsedUrl: ParsedUrl; - }) => { - urlObj: UrlWithParsedQuery; - parsedUrl: ParsedUrl; + 'oembed:beforeGetUrlContent': (data: { urlObj: URL }) => { + urlObj: URL; }; - 'oembed:afterParseContent': (data: { - url: string; - meta: OEmbedMeta; - headers: { [k: string]: string }; - parsedUrl: ParsedUrl; - content: OEmbedUrlContent; - }) => { + 'oembed:afterParseContent': (data: { url: string; meta: OEmbedMeta; headers: { [k: string]: string }; content: OEmbedUrlContent }) => { url: string; meta: OEmbedMeta; headers: { [k: string]: string }; - parsedUrl: ParsedUrl; content: OEmbedUrlContent; }; 'livechat.beforeListTags': () => ILivechatTag[]; diff --git a/packages/core-typings/src/IOembed.ts b/packages/core-typings/src/IOembed.ts index c540cb89381..0b781aa07fc 100644 --- a/packages/core-typings/src/IOembed.ts +++ b/packages/core-typings/src/IOembed.ts @@ -1,9 +1,5 @@ -import type Url from 'url'; - import type { MessageAttachment } from './IMessage'; -export type ParsedUrl = Pick; - export type OEmbedMeta = { [key: string]: string; } & { @@ -12,8 +8,7 @@ export type OEmbedMeta = { }; export type OEmbedUrlContent = { - urlObj: Url.UrlWithParsedQuery; - parsedUrl: ParsedUrl; + urlObj: URL; headers: { [k: string]: string }; body: string; statusCode: number; @@ -27,7 +22,6 @@ export type OEmbedProvider = { export type OEmbedUrlContentResult = { headers: { [key: string]: string }; body: string; - parsedUrl: Pick; statusCode: number; attachments?: MessageAttachment[]; }; @@ -38,7 +32,6 @@ export type OEmbedUrlWithMetadata = { url: string; meta: OEmbedMeta; headers: { [k: string]: string }; - parsedUrl: Pick; content: OEmbedUrlContent; };