refactor: Oembed backend (#30228)

pull/30531/head^2
gabriellsh 2 years ago committed by GitHub
parent 2f74b2c99a
commit b252d69909
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 56
      apps/meteor/app/oembed/server/providers.ts
  2. 191
      apps/meteor/app/oembed/server/server.ts
  3. 20
      apps/meteor/lib/callbacks.ts
  4. 9
      packages/core-typings/src/IOembed.ts

@ -1,9 +1,5 @@
import QueryString from 'querystring';
import URL from 'url';
import type { OEmbedMeta, OEmbedUrlContent, ParsedUrl, OEmbedProvider } from '@rocket.chat/core-typings';
import type { OEmbedMeta, OEmbedUrlContent, OEmbedProvider } from '@rocket.chat/core-typings';
import { camelCase } from 'change-case';
import _ from 'underscore';
import { callbacks } from '../../../lib/callbacks';
import { SystemLogger } from '../../../server/lib/logger/system';
@ -16,10 +12,10 @@ class Providers {
}
static getConsumerUrl(provider: OEmbedProvider, url: string): string {
const urlObj = new URL.URL(provider.endPoint);
const urlObj = new URL(provider.endPoint);
urlObj.searchParams.set('url', url);
return URL.format(urlObj);
return urlObj.toString();
}
registerProvider(provider: OEmbedProvider): number {
@ -95,25 +91,20 @@ providers.registerProvider({
callbacks.add(
'oembed:beforeGetUrlContent',
(data) => {
if (data.parsedUrl != null) {
const url = URL.format(data.parsedUrl);
const provider = providers.getProviderForUrl(url);
if (provider != null) {
const consumerUrl = Providers.getConsumerUrl(provider, url);
const parsedConsumerUrl = URL.parse(consumerUrl, true);
_.extend(data.parsedUrl, parsedConsumerUrl);
data.urlObj.port = parsedConsumerUrl.port;
data.urlObj.hostname = parsedConsumerUrl.hostname;
data.urlObj.pathname = parsedConsumerUrl.pathname;
data.urlObj.query = parsedConsumerUrl.query;
delete data.urlObj.search;
delete data.urlObj.host;
}
if (!data.urlObj) {
return data;
}
return data;
const url = data.urlObj.toString();
const provider = providers.getProviderForUrl(url);
if (!provider) {
return data;
}
const consumerUrl = Providers.getConsumerUrl(provider, url);
return { ...data, urlObj: new URL(consumerUrl) };
},
callbacks.priority.MEDIUM,
'oembed-providers-before',
@ -123,13 +114,11 @@ const cleanupOembed = (data: {
url: string;
meta: OEmbedMeta;
headers: { [k: string]: string };
parsedUrl: ParsedUrl;
content: OEmbedUrlContent;
}): {
url: string;
meta: Omit<OEmbedMeta, 'oembedHtml'>;
headers: { [k: string]: string };
parsedUrl: ParsedUrl;
content: OEmbedUrlContent;
} => {
if (!data?.meta) {
@ -148,24 +137,17 @@ const cleanupOembed = (data: {
callbacks.add(
'oembed:afterParseContent',
(data) => {
if (!data?.url || !data.content?.body || !data.parsedUrl?.query) {
if (!data?.url || !data.content?.body) {
return cleanupOembed(data);
}
const queryString = typeof data.parsedUrl.query === 'string' ? QueryString.parse(data.parsedUrl.query) : data.parsedUrl.query;
if (!queryString.url) {
return cleanupOembed(data);
}
const provider = providers.getProviderForUrl(data.url);
const { url: originalUrl } = data;
const provider = providers.getProviderForUrl(originalUrl);
if (!provider) {
return cleanupOembed(data);
}
const { url } = queryString;
data.meta.oembedUrl = url;
data.meta.oembedUrl = data.url;
try {
const metas = JSON.parse(data.content.body);

@ -1,6 +1,3 @@
import querystring from 'querystring';
import URL from 'url';
import type { OEmbedUrlContentResult, OEmbedUrlWithMetadata, IMessage, MessageAttachment, OEmbedMeta } from '@rocket.chat/core-typings';
import { isOEmbedUrlContentResult, isOEmbedUrlWithMetadata } from '@rocket.chat/core-typings';
import { Logger } from '@rocket.chat/logger';
@ -11,7 +8,6 @@ import he from 'he';
import iconv from 'iconv-lite';
import ipRangeCheck from 'ip-range-check';
import jschardet from 'jschardet';
import _ from 'underscore';
import { callbacks } from '../../../lib/callbacks';
import { isURL } from '../../../lib/utils/isURL';
@ -62,14 +58,7 @@ const toUtf8 = function (contentType: string, body: Buffer): string {
return iconv.decode(body, getCharset(contentType, body));
};
const getUrlContent = async function (urlObjStr: string | URL.UrlWithStringQuery, redirectCount = 5): Promise<OEmbedUrlContentResult> {
let urlObj: URL.UrlWithStringQuery;
if (typeof urlObjStr === 'string') {
urlObj = URL.parse(urlObjStr);
} else {
urlObj = urlObjStr;
}
const getUrlContent = async (urlObj: URL, redirectCount = 5): Promise<OEmbedUrlContentResult> => {
const portsProtocol = new Map<string, string>(
Object.entries({
80: 'http:',
@ -78,34 +67,28 @@ const getUrlContent = async function (urlObjStr: string | URL.UrlWithStringQuery
}),
);
const parsedUrl = _.pick(urlObj, ['host', 'hash', 'pathname', 'protocol', 'port', 'query', 'search', 'hostname']);
const ignoredHosts = settings.get<string>('API_EmbedIgnoredHosts').replace(/\s/g, '').split(',') || [];
if (parsedUrl.hostname && (ignoredHosts.includes(parsedUrl.hostname) || ipRangeCheck(parsedUrl.hostname, ignoredHosts))) {
if (urlObj.hostname && (ignoredHosts.includes(urlObj.hostname) || ipRangeCheck(urlObj.hostname, ignoredHosts))) {
throw new Error('invalid host');
}
const safePorts = settings.get<string>('API_EmbedSafePorts').replace(/\s/g, '').split(',') || [];
if (safePorts.length > 0 && parsedUrl.port && !safePorts.includes(parsedUrl.port)) {
// checks if the URL port is in the safe ports list
if (safePorts.length > 0 && urlObj.port && !safePorts.includes(urlObj.port)) {
throw new Error('invalid/unsafe port');
}
if (safePorts.length > 0 && !parsedUrl.port && !safePorts.some((port) => portsProtocol.get(port) === parsedUrl.protocol)) {
// if port is not detected, use protocol to verify instead
if (safePorts.length > 0 && !urlObj.port && !safePorts.some((port) => portsProtocol.get(port) === urlObj.protocol)) {
throw new Error('invalid/unsafe port');
}
const data = await callbacks.run('oembed:beforeGetUrlContent', {
urlObj,
parsedUrl,
});
/* This prop is neither passed or returned by the callback, so I'll just comment it for now
if (data.attachments != null) {
return data;
} */
const url = URL.format(data.urlObj);
const url = data.urlObj.toString();
const sizeLimit = 250000;
log.debug(`Fetching ${url} following redirects ${redirectCount} times`);
@ -137,10 +120,10 @@ const getUrlContent = async function (urlObjStr: string | URL.UrlWithStringQuery
log.debug('Obtained response from server with length of', totalSize);
const buffer = Buffer.concat(chunks);
return {
headers: Object.fromEntries(response.headers),
body: toUtf8(response.headers.get('content-type') || 'text/plain', buffer),
parsedUrl,
statusCode: response.status,
};
};
@ -150,19 +133,13 @@ const getUrlMeta = async function (
withFragment?: boolean,
): Promise<OEmbedUrlWithMetadata | OEmbedUrlContentResult | undefined> {
log.debug('Obtaining metadata for URL', url);
const urlObj = URL.parse(url);
if (withFragment != null) {
const queryStringObj = querystring.parse(urlObj.query || '');
queryStringObj._escaped_fragment_ = '';
urlObj.query = querystring.stringify(queryStringObj);
let path = urlObj.pathname;
if (urlObj.query != null) {
path += `?${urlObj.query}`;
urlObj.search = `?${urlObj.query}`;
}
urlObj.path = path;
const urlObj = new URL(url);
if (withFragment) {
urlObj.searchParams.set('_escaped_fragment_', '');
}
log.debug('Fetching url content', urlObj.path);
log.debug('Fetching url content', urlObj.toString());
let content: OEmbedUrlContentResult | undefined;
try {
content = await getUrlContent(urlObj, 5);
@ -174,7 +151,7 @@ const getUrlMeta = async function (
return;
}
if (content.attachments != null) {
if (content.attachments) {
return content;
}
@ -221,7 +198,6 @@ const getUrlMeta = async function (
url,
meta: metas,
headers,
parsedUrl: content.parsedUrl,
content,
});
};
@ -233,38 +209,25 @@ const getUrlMetaWithCache = async function (
log.debug('Getting oembed metadata for', url);
const cache = await OEmbedCache.findOneById(url);
if (cache != null) {
if (cache) {
log.debug('Found oembed metadata in cache for', url);
return cache.data;
}
const data = await getUrlMeta(url, withFragment);
if (data != null) {
try {
log.debug('Saving oembed metadata in cache for', url);
await OEmbedCache.createWithIdAndData(url, data);
} catch (_error) {
log.error({ msg: 'OEmbed duplicated record', url });
}
return data;
}
};
const hasOnlyContentLength = (obj: any): obj is { contentLength: string } => 'contentLength' in obj && Object.keys(obj).length === 1;
const hasOnlyContentType = (obj: any): obj is { contentType: string } => 'contentType' in obj && Object.keys(obj).length === 1;
const hasContentLengthAndContentType = (obj: any): obj is { contentLength: string; contentType: string } =>
'contentLength' in obj && 'contentType' in obj && Object.keys(obj).length === 2;
const getRelevantHeaders = function (headersObj: {
[key: string]: string;
}): { contentLength: string } | { contentType: string } | { contentLength: string; contentType: string } | void {
const headers = {
...(headersObj.contentLength && { contentLength: headersObj.contentLength }),
...(headersObj.contentType && { contentType: headersObj.contentType }),
};
if (!data) {
return;
}
if (hasOnlyContentLength(headers) || hasOnlyContentType(headers) || hasContentLengthAndContentType(headers)) {
return headers;
try {
log.debug('Saving oembed metadata in cache for', url);
await OEmbedCache.createWithIdAndData(url, data);
} catch (_error) {
log.error({ msg: 'OEmbed duplicated record', url });
}
return data;
};
const getRelevantMetaTags = function (metaObj: OEmbedMeta): Record<string, string> | void {
@ -286,57 +249,71 @@ const insertMaxWidthInOembedHtml = (oembedHtml?: string): string | undefined =>
const rocketUrlParser = async function (message: IMessage): Promise<IMessage> {
log.debug('Parsing message URLs');
if (Array.isArray(message.urls)) {
log.debug('URLs found', message.urls.length);
if (
(message.attachments && message.attachments.length > 0) ||
message.urls.filter((item) => !item.url.includes(settings.get('Site_Url'))).length > MAX_EXTERNAL_URL_PREVIEWS
) {
log.debug('All URL ignored');
return message;
if (!Array.isArray(message.urls)) {
return message;
}
log.debug('URLs found', message.urls.length);
if (
(message.attachments && message.attachments.length > 0) ||
message.urls.filter((item) => !item.url.includes(settings.get('Site_Url'))).length > MAX_EXTERNAL_URL_PREVIEWS
) {
log.debug('All URL ignored');
return message;
}
const attachments: MessageAttachment[] = [];
let changed = false;
for await (const item of message.urls) {
if (item.ignoreParse === true) {
log.debug('URL ignored', item.url);
continue;
}
const attachments: MessageAttachment[] = [];
if (!isURL(item.url)) {
continue;
}
let changed = false;
for await (const item of message.urls) {
if (item.ignoreParse === true) {
log.debug('URL ignored', item.url);
continue;
}
if (!isURL(item.url)) {
continue;
}
const data = await getUrlMetaWithCache(item.url);
if (data != null) {
if (isOEmbedUrlContentResult(data) && data.attachments) {
attachments.push(...data.attachments);
break;
}
if (isOEmbedUrlWithMetadata(data) && data.meta != null) {
item.meta = getRelevantMetaTags(data.meta) || {};
if (item.meta?.oembedHtml) {
item.meta.oembedHtml = insertMaxWidthInOembedHtml(item.meta.oembedHtml) || '';
}
}
if (data.headers != null) {
const headers = getRelevantHeaders(data.headers);
if (headers) {
item.headers = headers;
}
}
item.parsedUrl = data.parsedUrl;
changed = true;
const data = await getUrlMetaWithCache(item.url);
if (!data) {
continue;
}
if (isOEmbedUrlContentResult(data) && data.attachments) {
attachments.push(...data.attachments);
break;
}
if (isOEmbedUrlWithMetadata(data) && data.meta) {
item.meta = getRelevantMetaTags(data.meta) || {};
if (item.meta?.oembedHtml) {
item.meta.oembedHtml = insertMaxWidthInOembedHtml(item.meta.oembedHtml) || '';
}
}
if (attachments.length > 0) {
await Messages.setMessageAttachments(message._id, attachments);
if (data.headers?.contentLength) {
item.headers = { ...item.headers, contentLength: data.headers.contentLength };
}
if (changed === true) {
await Messages.setUrlsById(message._id, message.urls);
if (data.headers?.contentType) {
item.headers = { ...item.headers, contentType: data.headers.contentType };
}
changed = true;
}
if (attachments.length) {
await Messages.setMessageAttachments(message._id, attachments);
}
if (changed === true) {
await Messages.setUrlsById(message._id, message.urls);
}
return message;
};

@ -1,5 +1,3 @@
import type { UrlWithParsedQuery } from 'url';
import type {
IMessage,
IRoom,
@ -10,7 +8,6 @@ import type {
ILivechatInquiryRecord,
ILivechatVisitor,
VideoConference,
ParsedUrl,
OEmbedMeta,
OEmbedUrlContent,
Username,
@ -167,24 +164,13 @@ type ChainedCallbackSignatures = {
BusinessHourBehaviorClass: { new (): IBusinessHourBehavior };
};
'renderMessage': <T extends IMessage & { html: string }>(message: T) => T;
'oembed:beforeGetUrlContent': (data: {
urlObj: Omit<UrlWithParsedQuery, 'host' | 'search'> & { host?: unknown; search?: unknown };
parsedUrl: ParsedUrl;
}) => {
urlObj: UrlWithParsedQuery;
parsedUrl: ParsedUrl;
'oembed:beforeGetUrlContent': (data: { urlObj: URL }) => {
urlObj: URL;
};
'oembed:afterParseContent': (data: {
url: string;
meta: OEmbedMeta;
headers: { [k: string]: string };
parsedUrl: ParsedUrl;
content: OEmbedUrlContent;
}) => {
'oembed:afterParseContent': (data: { url: string; meta: OEmbedMeta; headers: { [k: string]: string }; content: OEmbedUrlContent }) => {
url: string;
meta: OEmbedMeta;
headers: { [k: string]: string };
parsedUrl: ParsedUrl;
content: OEmbedUrlContent;
};
'livechat.beforeListTags': () => ILivechatTag[];

@ -1,9 +1,5 @@
import type Url from 'url';
import type { MessageAttachment } from './IMessage';
export type ParsedUrl = Pick<Url.UrlWithParsedQuery, 'host' | 'hash' | 'pathname' | 'protocol' | 'port' | 'query' | 'search' | 'hostname'>;
export type OEmbedMeta = {
[key: string]: string;
} & {
@ -12,8 +8,7 @@ export type OEmbedMeta = {
};
export type OEmbedUrlContent = {
urlObj: Url.UrlWithParsedQuery;
parsedUrl: ParsedUrl;
urlObj: URL;
headers: { [k: string]: string };
body: string;
statusCode: number;
@ -27,7 +22,6 @@ export type OEmbedProvider = {
export type OEmbedUrlContentResult = {
headers: { [key: string]: string };
body: string;
parsedUrl: Pick<Url.UrlWithStringQuery, 'host' | 'hash' | 'pathname' | 'protocol' | 'port' | 'query' | 'search' | 'hostname'>;
statusCode: number;
attachments?: MessageAttachment[];
};
@ -38,7 +32,6 @@ export type OEmbedUrlWithMetadata = {
url: string;
meta: OEmbedMeta;
headers: { [k: string]: string };
parsedUrl: Pick<Url.UrlWithStringQuery, 'host' | 'hash' | 'pathname' | 'protocol' | 'port' | 'query' | 'search' | 'hostname'>;
content: OEmbedUrlContent;
};

Loading…
Cancel
Save