import URL from 'url'; import querystring from 'querystring'; import { Meteor } from 'meteor/meteor'; import { HTTPInternals } from 'meteor/http'; import { changeCase } from 'meteor/konecty:change-case'; import _ from 'underscore'; import iconv from 'iconv-lite'; import ipRangeCheck from 'ip-range-check'; import he from 'he'; import jschardet from 'jschardet'; import { OEmbedCache, Messages } from '../../models'; import { callbacks } from '../../callbacks'; import { settings } from '../../settings'; import { isURL } from '../../utils/lib/isURL'; const request = HTTPInternals.NpmModules.request.module; const OEmbed = {}; // Detect encoding // Priority: // Detected == HTTP Header > Detected == HTML meta > HTTP Header > HTML meta > Detected > Default (utf-8) // See also: https://www.w3.org/International/questions/qa-html-encoding-declarations.en#quickanswer const getCharset = function(contentType, body) { let detectedCharset; let httpHeaderCharset; let htmlMetaCharset; let result; contentType = contentType || ''; const binary = body.toString('binary'); const detected = jschardet.detect(binary); if (detected.confidence > 0.8) { detectedCharset = detected.encoding.toLowerCase(); } const m1 = contentType.match(/charset=([\w\-]+)/i); if (m1) { httpHeaderCharset = m1[1].toLowerCase(); } const m2 = binary.match(/]*charset=["']?([\w\-]+)/i); if (m2) { htmlMetaCharset = m2[1].toLowerCase(); } if (detectedCharset) { if (detectedCharset === httpHeaderCharset) { result = httpHeaderCharset; } else if (detectedCharset === htmlMetaCharset) { result = htmlMetaCharset; } } if (!result) { result = httpHeaderCharset || htmlMetaCharset || detectedCharset; } return result || 'utf-8'; }; const toUtf8 = function(contentType, body) { return iconv.decode(body, getCharset(contentType, body)); }; const getUrlContent = function(urlObj, redirectCount = 5, callback) { if (_.isString(urlObj)) { urlObj = URL.parse(urlObj); } const parsedUrl = _.pick(urlObj, ['host', 'hash', 'pathname', 'protocol', 'port', 'query', 'search', 'hostname']); const ignoredHosts = settings.get('API_EmbedIgnoredHosts').replace(/\s/g, '').split(',') || []; if (ignoredHosts.includes(parsedUrl.hostname) || ipRangeCheck(parsedUrl.hostname, ignoredHosts)) { return callback(); } const safePorts = settings.get('API_EmbedSafePorts').replace(/\s/g, '').split(',') || []; if (parsedUrl.port && safePorts.length > 0 && !safePorts.includes(parsedUrl.port)) { return callback(); } const data = callbacks.run('oembed:beforeGetUrlContent', { urlObj, parsedUrl, }); if (data.attachments != null) { return callback(null, data); } const url = URL.format(data.urlObj); const opts = { url, strictSSL: !settings.get('Allow_Invalid_SelfSigned_Certs'), gzip: true, maxRedirects: redirectCount, headers: { 'User-Agent': settings.get('API_Embed_UserAgent'), }, }; let headers = null; let statusCode = null; let error = null; const chunks = []; let chunksTotalLength = 0; const stream = request(opts); stream.on('response', function(response) { statusCode = response.statusCode; headers = response.headers; if (response.statusCode !== 200) { return stream.abort(); } }); stream.on('data', function(chunk) { chunks.push(chunk); chunksTotalLength += chunk.length; if (chunksTotalLength > 250000) { return stream.abort(); } }); stream.on('end', Meteor.bindEnvironment(function() { if (error != null) { return callback(null, { error, parsedUrl, }); } const buffer = Buffer.concat(chunks); return callback(null, { headers, body: toUtf8(headers['content-type'], buffer), parsedUrl, statusCode, }); })); return stream.on('error', function(err) { error = err; }); }; OEmbed.getUrlMeta = function(url, withFragment) { const getUrlContentSync = Meteor.wrapAsync(getUrlContent); const urlObj = URL.parse(url); if (withFragment != null) { const queryStringObj = querystring.parse(urlObj.query); queryStringObj._escaped_fragment_ = ''; urlObj.query = querystring.stringify(queryStringObj); let path = urlObj.pathname; if (urlObj.query != null) { path += `?${ urlObj.query }`; urlObj.search = `?${ urlObj.query }`; } urlObj.path = path; } const content = getUrlContentSync(urlObj, 5); if (!content) { return; } if (content.attachments != null) { return content; } let metas = undefined; if (content && content.body) { metas = {}; const escapeMeta = (name, value) => { metas[name] = metas[name] || he.unescape(value); return metas[name]; }; content.body.replace(/