mirror of https://github.com/jitsi/jitsi-meet
pull/4716/head
jitsi-meet_4022
parent
11d3a343e5
commit
761ac6a730
@ -0,0 +1,44 @@ |
|||||||
|
// @flow
|
||||||
|
|
||||||
|
import { getJitsiMeetGlobalNS, loadScript } from '../base/util'; |
||||||
|
|
||||||
|
let loadRnnoisePromise; |
||||||
|
|
||||||
|
/** |
||||||
|
* Returns promise that resolves with a RnnoiseProcessor instance. |
||||||
|
* |
||||||
|
* @returns {Promise<RnnoiseProcessor>} - Resolves with the blur effect instance. |
||||||
|
*/ |
||||||
|
export function createRnnoiseProcessorPromise() { |
||||||
|
// Subsequent calls should not attempt to load the script multiple times.
|
||||||
|
if (!loadRnnoisePromise) { |
||||||
|
loadRnnoisePromise = loadScript('libs/rnnoise-processor.min.js'); |
||||||
|
} |
||||||
|
|
||||||
|
return loadRnnoisePromise.then(() => { |
||||||
|
const ns = getJitsiMeetGlobalNS(); |
||||||
|
|
||||||
|
if (ns?.effects?.rnnoise?.createRnnoiseProcessor) { |
||||||
|
return ns.effects.rnnoise.createRnnoiseProcessor(); |
||||||
|
} |
||||||
|
|
||||||
|
throw new Error('Rnnoise module binding createRnnoiseProcessor not found!'); |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Get the accepted sample length for the rnnoise library. We might want to expose it with flow libdefs. |
||||||
|
* |
||||||
|
* @returns {number} |
||||||
|
*/ |
||||||
|
export function getSampleLength() { |
||||||
|
const ns = getJitsiMeetGlobalNS(); |
||||||
|
|
||||||
|
const rnnoiseSample = ns?.effects?.rnnoise?.RNNOISE_SAMPLE_LENGTH; |
||||||
|
|
||||||
|
if (!rnnoiseSample) { |
||||||
|
throw new Error('Please call createRnnoiseProcessorPromise first or wait for promise to resolve!'); |
||||||
|
} |
||||||
|
|
||||||
|
return rnnoiseSample; |
||||||
|
} |
@ -0,0 +1,2 @@ |
|||||||
|
|
||||||
|
export * from './functions'; |
@ -0,0 +1,174 @@ |
|||||||
|
// @flow
|
||||||
|
|
||||||
|
/** |
||||||
|
* Constant. Rnnoise default sample size, samples of different size won't work. |
||||||
|
*/ |
||||||
|
export const RNNOISE_SAMPLE_LENGTH: number = 480; |
||||||
|
|
||||||
|
/** |
||||||
|
* Constant. Rnnoise only takes inputs of 480 PCM float32 samples thus 480*4. |
||||||
|
*/ |
||||||
|
const RNNOISE_BUFFER_SIZE: number = RNNOISE_SAMPLE_LENGTH * 4; |
||||||
|
|
||||||
|
/** |
||||||
|
* Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly |
||||||
|
* memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity |
||||||
|
* detection) scores. |
||||||
|
*/ |
||||||
|
export default class RnnoiseProcessor { |
||||||
|
/** |
||||||
|
* Rnnoise context object needed to perform the audio processing. |
||||||
|
*/ |
||||||
|
_context: ?Object; |
||||||
|
|
||||||
|
/** |
||||||
|
* State flag, check if the instance was destroyed. |
||||||
|
*/ |
||||||
|
_destroyed: boolean = false; |
||||||
|
|
||||||
|
/** |
||||||
|
* WASM interface through which calls to rnnoise are made. |
||||||
|
*/ |
||||||
|
_wasmInterface: Object; |
||||||
|
|
||||||
|
/** |
||||||
|
* WASM dynamic memory buffer used as input for rnnoise processing method. |
||||||
|
*/ |
||||||
|
_wasmPcmInput: Object; |
||||||
|
|
||||||
|
/** |
||||||
|
* The Float32Array index representing the start point in the wasm heap of the _wasmPcmInput buffer. |
||||||
|
*/ |
||||||
|
_wasmPcmInputF32Index: number; |
||||||
|
|
||||||
|
/** |
||||||
|
* WASM dynamic memory buffer used as output for rnnoise processing method. |
||||||
|
*/ |
||||||
|
_wasmPcmOutput: Object; |
||||||
|
|
||||||
|
/** |
||||||
|
* Constructor. |
||||||
|
* |
||||||
|
* @class |
||||||
|
* @param {Object} wasmInterface - WebAssembly module interface that exposes rnnoise functionality. |
||||||
|
*/ |
||||||
|
constructor(wasmInterface: Object) { |
||||||
|
// Considering that we deal with dynamic allocated memory employ exception safety strong guarantee
|
||||||
|
// i.e. in case of exception there are no side effects.
|
||||||
|
try { |
||||||
|
this._wasmInterface = wasmInterface; |
||||||
|
|
||||||
|
// For VAD score purposes only allocate the buffers once and reuse them
|
||||||
|
this._wasmPcmInput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE); |
||||||
|
|
||||||
|
if (!this._wasmPcmInput) { |
||||||
|
throw Error('Failed to create wasm input memory buffer!'); |
||||||
|
} |
||||||
|
|
||||||
|
this._wasmPcmOutput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE); |
||||||
|
|
||||||
|
if (!this._wasmPcmOutput) { |
||||||
|
wasmInterface._free(this._wasmPcmInput); |
||||||
|
throw Error('Failed to create wasm output memory buffer!'); |
||||||
|
} |
||||||
|
|
||||||
|
// The HEAPF32.set function requires an index relative to a Float32 array view of the wasm memory model
|
||||||
|
// which is an array of bytes. This means we have to divide it by the size of a float to get the index
|
||||||
|
// relative to a Float32 Array.
|
||||||
|
this._wasmPcmInputF32Index = this._wasmPcmInput / 4; |
||||||
|
|
||||||
|
this._context = this._wasmInterface._rnnoise_create(); |
||||||
|
} catch (error) { |
||||||
|
// release can be called even if not all the components were initialized.
|
||||||
|
this._releaseWasmResources(); |
||||||
|
throw error; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Copy the input PCM Audio Sample to the wasm input buffer. |
||||||
|
* |
||||||
|
* @param {Float32Array} pcmSample - Array containing 16 bit format PCM sample stored in 32 Floats . |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
_copyPCMSampleToWasmBuffer(pcmSample: Float32Array) { |
||||||
|
this._wasmInterface.HEAPF32.set(pcmSample, this._wasmPcmInputF32Index); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Convert 32 bit Float PCM samples to 16 bit Float PCM samples and store them in 32 bit Floats. |
||||||
|
* |
||||||
|
* @param {Float32Array} f32Array - Array containing 32 bit PCM samples. |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
_convertTo16BitPCM(f32Array: Float32Array) { |
||||||
|
for (const [ index, value ] of f32Array.entries()) { |
||||||
|
f32Array[index] = value * 0x7fff; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Release resources associated with the wasm context. If something goes downhill here |
||||||
|
* i.e. Exception is thrown, there is nothing much we can do. |
||||||
|
* |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
_releaseWasmResources() { |
||||||
|
// For VAD score purposes only allocate the buffers once and reuse them
|
||||||
|
if (this._wasmPcmInput) { |
||||||
|
this._wasmInterface._free(this._wasmPcmInput); |
||||||
|
this._wasmPcmInput = null; |
||||||
|
} |
||||||
|
|
||||||
|
if (this._wasmPcmOutput) { |
||||||
|
this._wasmInterface._free(this._wasmPcmOutput); |
||||||
|
this._wasmPcmOutput = null; |
||||||
|
} |
||||||
|
|
||||||
|
if (this._context) { |
||||||
|
this._wasmInterface._rnnoise_destroy(this._context); |
||||||
|
this._context = null; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Release any resources required by the rnnoise context this needs to be called |
||||||
|
* before destroying any context that uses the processor. |
||||||
|
* |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
destroy() { |
||||||
|
// Attempting to release a non initialized processor, do nothing.
|
||||||
|
if (this._destroyed) { |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
this._releaseWasmResources(); |
||||||
|
|
||||||
|
this._destroyed = true; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Calculate the Voice Activity Detection for a raw Float32 PCM sample Array. |
||||||
|
* The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library. |
||||||
|
* |
||||||
|
* @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples. |
||||||
|
* @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90 . |
||||||
|
*/ |
||||||
|
calculateAudioFrameVAD(pcmFrame: Float32Array) { |
||||||
|
if (this._destroyed) { |
||||||
|
throw new Error('RnnoiseProcessor instance is destroyed, please create another one!'); |
||||||
|
} |
||||||
|
|
||||||
|
const pcmFrameLength = pcmFrame.length; |
||||||
|
|
||||||
|
if (pcmFrameLength !== RNNOISE_SAMPLE_LENGTH) { |
||||||
|
throw new Error(`Rnnoise can only process PCM frames of 480 samples! Input sample was:${pcmFrameLength}`); |
||||||
|
} |
||||||
|
|
||||||
|
this._convertTo16BitPCM(pcmFrame); |
||||||
|
this._copyPCMSampleToWasmBuffer(pcmFrame); |
||||||
|
|
||||||
|
return this._wasmInterface._rnnoise_process_frame(this._context, this._wasmPcmOutput, this._wasmPcmInput); |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,36 @@ |
|||||||
|
// @flow
|
||||||
|
|
||||||
|
// Script expects to find rnnoise webassembly binary in the same public path root, otherwise it won't load
|
||||||
|
// During the build phase this needs to be taken care of manually
|
||||||
|
import rnnoiseWasmInit from 'rnnoise-wasm'; |
||||||
|
import RnnoiseProcessor from './RnnoiseProcessor'; |
||||||
|
|
||||||
|
export { RNNOISE_SAMPLE_LENGTH } from './RnnoiseProcessor'; |
||||||
|
export type { RnnoiseProcessor }; |
||||||
|
|
||||||
|
let rnnoiseWasmInterface; |
||||||
|
let initializePromise; |
||||||
|
|
||||||
|
/** |
||||||
|
* Creates a new instance of RnnoiseProcessor. |
||||||
|
* |
||||||
|
* @returns {Promise<RnnoiseProcessor>} |
||||||
|
*/ |
||||||
|
export function createRnnoiseProcessor() { |
||||||
|
if (!initializePromise) { |
||||||
|
initializePromise = new Promise((resolve, reject) => { |
||||||
|
rnnoiseWasmInterface = rnnoiseWasmInit({ |
||||||
|
onRuntimeInitialized() { |
||||||
|
resolve(); |
||||||
|
}, |
||||||
|
onAbort(reason) { |
||||||
|
reject(reason); |
||||||
|
} |
||||||
|
}); |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
return initializePromise.then( |
||||||
|
() => new RnnoiseProcessor(rnnoiseWasmInterface) |
||||||
|
); |
||||||
|
} |
@ -0,0 +1,258 @@ |
|||||||
|
// @flow
|
||||||
|
|
||||||
|
import { createRnnoiseProcessorPromise, getSampleLength } from '../rnnoise/'; |
||||||
|
import EventEmitter from 'events'; |
||||||
|
import JitsiMeetJS from '../base/lib-jitsi-meet'; |
||||||
|
import logger from './logger'; |
||||||
|
import { VAD_SCORE_PUBLISHED } from './VADEvents'; |
||||||
|
|
||||||
|
/** |
||||||
|
* The structure used by TrackVADEmitter to relay a score |
||||||
|
*/ |
||||||
|
export type VADScore = { |
||||||
|
|
||||||
|
/** |
||||||
|
* Device ID associated with the VAD score |
||||||
|
*/ |
||||||
|
deviceId: string, |
||||||
|
|
||||||
|
/** |
||||||
|
* The PCM score from 0 - 1 i.e. 0.60 |
||||||
|
*/ |
||||||
|
score: number, |
||||||
|
|
||||||
|
/** |
||||||
|
* Epoch time at which PCM was recorded |
||||||
|
*/ |
||||||
|
timestamp: number |
||||||
|
|
||||||
|
}; |
||||||
|
|
||||||
|
/** |
||||||
|
* Connects an audio JitsiLocalTrack to a RnnoiseProcessor using WebAudio ScriptProcessorNode. |
||||||
|
* Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM. |
||||||
|
* The PCM is processed by the rnnoise module and a VAD (voice activity detection) score is obtained, the |
||||||
|
* score is published to consumers via an EventEmitter. |
||||||
|
* After work is done with this service the destroy method needs to be called for a proper cleanup. |
||||||
|
*/ |
||||||
|
export default class TrackVADEmitter extends EventEmitter { |
||||||
|
/** |
||||||
|
* The AudioContext instance. |
||||||
|
*/ |
||||||
|
_audioContext: AudioContext; |
||||||
|
|
||||||
|
/** |
||||||
|
* The MediaStreamAudioSourceNode instance. |
||||||
|
*/ |
||||||
|
_audioSource: MediaStreamAudioSourceNode; |
||||||
|
|
||||||
|
/** |
||||||
|
* The ScriptProcessorNode instance. |
||||||
|
*/ |
||||||
|
_audioProcessingNode: ScriptProcessorNode; |
||||||
|
|
||||||
|
/** |
||||||
|
* Buffer to hold residue PCM resulting after a ScriptProcessorNode callback |
||||||
|
*/ |
||||||
|
_bufferResidue: Float32Array; |
||||||
|
|
||||||
|
/** |
||||||
|
* State flag, check if the instance was destroyed |
||||||
|
*/ |
||||||
|
_destroyed: boolean = false; |
||||||
|
|
||||||
|
/** |
||||||
|
* The JitsiLocalTrack instance. |
||||||
|
*/ |
||||||
|
_localTrack: Object; |
||||||
|
|
||||||
|
/** |
||||||
|
* Device ID of the target microphone. |
||||||
|
*/ |
||||||
|
_micDeviceId: string; |
||||||
|
|
||||||
|
/** |
||||||
|
* Callback function that will be called by the ScriptProcessNode with raw PCM data, depending on the set sample |
||||||
|
* rate. |
||||||
|
*/ |
||||||
|
_onAudioProcess: (audioEvent: Object) => void; |
||||||
|
|
||||||
|
/** |
||||||
|
* Sample rate of the ScriptProcessorNode. |
||||||
|
*/ |
||||||
|
_procNodeSampleRate: number; |
||||||
|
|
||||||
|
/** |
||||||
|
* Rnnoise adapter that allows us to calculate VAD score for PCM samples |
||||||
|
*/ |
||||||
|
_rnnoiseProcessor: Object; |
||||||
|
|
||||||
|
/** |
||||||
|
* PCM Sample size expected by the RnnoiseProcessor instance. |
||||||
|
*/ |
||||||
|
_rnnoiseSampleSize: number; |
||||||
|
|
||||||
|
/** |
||||||
|
* Constructor. |
||||||
|
* |
||||||
|
* @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values 256, 512, 1024, |
||||||
|
* 2048, 4096, 8192, 16384. Passing other values will default to closes neighbor. |
||||||
|
* @param {Object} rnnoiseProcessor - Rnnoise adapter that allows us to calculate VAD score |
||||||
|
* for PCM samples. |
||||||
|
* @param {Object} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId. |
||||||
|
*/ |
||||||
|
constructor(procNodeSampleRate: number, rnnoiseProcessor: Object, jitsiLocalTrack: Object) { |
||||||
|
super(); |
||||||
|
this._procNodeSampleRate = procNodeSampleRate; |
||||||
|
this._rnnoiseProcessor = rnnoiseProcessor; |
||||||
|
this._localTrack = jitsiLocalTrack; |
||||||
|
this._micDeviceId = jitsiLocalTrack.getDeviceId(); |
||||||
|
this._bufferResidue = new Float32Array([]); |
||||||
|
this._audioContext = new AudioContext(); |
||||||
|
this._rnnoiseSampleSize = getSampleLength(); |
||||||
|
this._onAudioProcess = this._onAudioProcess.bind(this); |
||||||
|
|
||||||
|
this._initializeAudioContext(); |
||||||
|
this._connectAudioGraph(); |
||||||
|
|
||||||
|
logger.log(`Constructed VAD emitter for device: ${this._micDeviceId}`); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Factory method that sets up all the necessary components for the creation of the TrackVADEmitter. |
||||||
|
* |
||||||
|
* @param {string} micDeviceId - Target microphone device id. |
||||||
|
* @param {number} procNodeSampleRate - Sample rate of the proc node. |
||||||
|
* @returns {Promise<TrackVADEmitter>} - Promise resolving in a new instance of TrackVADEmitter. |
||||||
|
*/ |
||||||
|
static async create(micDeviceId: string, procNodeSampleRate: number) { |
||||||
|
let rnnoiseProcessor = null; |
||||||
|
let localTrack = null; |
||||||
|
|
||||||
|
try { |
||||||
|
logger.log(`Initializing TrackVADEmitter for device: ${micDeviceId}`); |
||||||
|
|
||||||
|
rnnoiseProcessor = await createRnnoiseProcessorPromise(); |
||||||
|
localTrack = await JitsiMeetJS.createLocalTracks({ |
||||||
|
devices: [ 'audio' ], |
||||||
|
micDeviceId |
||||||
|
}); |
||||||
|
|
||||||
|
// We only expect one audio track when specifying a device id.
|
||||||
|
if (!localTrack[0]) { |
||||||
|
throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`); |
||||||
|
} |
||||||
|
|
||||||
|
return new TrackVADEmitter(procNodeSampleRate, rnnoiseProcessor, localTrack[0]); |
||||||
|
} catch (error) { |
||||||
|
logger.error(`Failed to create TrackVADEmitter for ${micDeviceId} with error: ${error}`); |
||||||
|
|
||||||
|
if (rnnoiseProcessor) { |
||||||
|
rnnoiseProcessor.destroy(); |
||||||
|
} |
||||||
|
|
||||||
|
if (localTrack) { |
||||||
|
localTrack.stopStream(); |
||||||
|
} |
||||||
|
|
||||||
|
throw error; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets up the audio graph in the AudioContext. |
||||||
|
* |
||||||
|
* @returns {Promise<void>} |
||||||
|
*/ |
||||||
|
_initializeAudioContext() { |
||||||
|
this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream); |
||||||
|
|
||||||
|
// TODO AudioProcessingNode is deprecated check and replace with alternative.
|
||||||
|
// We don't need stereo for determining the VAD score so we create a single chanel processing node.
|
||||||
|
this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1); |
||||||
|
this._audioProcessingNode.onaudioprocess = this._onAudioProcess; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise. |
||||||
|
* Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple |
||||||
|
* of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal |
||||||
|
* to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on. |
||||||
|
* |
||||||
|
* @param {AudioProcessingEvent} audioEvent - Audio event. |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
_onAudioProcess(audioEvent: Object) { |
||||||
|
// Prepend the residue PCM buffer from the previous process callback.
|
||||||
|
const inData = audioEvent.inputBuffer.getChannelData(0); |
||||||
|
const completeInData = [ ...this._bufferResidue, ...inData ]; |
||||||
|
const sampleTimestamp = Date.now(); |
||||||
|
|
||||||
|
let i = 0; |
||||||
|
|
||||||
|
for (; i + this._rnnoiseSampleSize < completeInData.length; i += this._rnnoiseSampleSize) { |
||||||
|
const pcmSample = completeInData.slice(i, i + this._rnnoiseSampleSize); |
||||||
|
const vadScore = this._rnnoiseProcessor.calculateAudioFrameVAD(pcmSample); |
||||||
|
|
||||||
|
this.emit(VAD_SCORE_PUBLISHED, { |
||||||
|
timestamp: sampleTimestamp, |
||||||
|
score: vadScore, |
||||||
|
deviceId: this._micDeviceId |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
this._bufferResidue = completeInData.slice(i, completeInData.length); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Connects the nodes in the AudioContext to start the flow of audio data. |
||||||
|
* |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
_connectAudioGraph() { |
||||||
|
this._audioSource.connect(this._audioProcessingNode); |
||||||
|
this._audioProcessingNode.connect(this._audioContext.destination); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Disconnects the nodes in the AudioContext. |
||||||
|
* |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
_disconnectAudioGraph() { |
||||||
|
// Even thought we disconnect the processing node it seems that some callbacks remain queued,
|
||||||
|
// resulting in calls with and uninitialized context.
|
||||||
|
// eslint-disable-next-line no-empty-function
|
||||||
|
this._audioProcessingNode.onaudioprocess = () => {}; |
||||||
|
this._audioProcessingNode.disconnect(); |
||||||
|
this._audioSource.disconnect(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Cleanup potentially acquired resources. |
||||||
|
* |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
_cleanupResources() { |
||||||
|
logger.debug(`Cleaning up resources for device ${this._micDeviceId}!`); |
||||||
|
|
||||||
|
this._disconnectAudioGraph(); |
||||||
|
this._localTrack.stopStream(); |
||||||
|
this._rnnoiseProcessor.destroy(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Destroy TrackVADEmitter instance (release resources and stop callbacks). |
||||||
|
* |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
destroy() { |
||||||
|
if (this._destroyed) { |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
logger.log(`Destroying TrackVADEmitter for mic: ${this._micDeviceId}`); |
||||||
|
this._cleanupResources(); |
||||||
|
this._destroyed = true; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,7 @@ |
|||||||
|
// Event generated by a TrackVADEmitter when it emits a VAD score from rnnoise.
|
||||||
|
// The generated objects are of type VADScore
|
||||||
|
export const VAD_SCORE_PUBLISHED = 'vad-score-published'; |
||||||
|
|
||||||
|
// Event generated by VADReportingService when if finishes creating a VAD report for the monitored devices.
|
||||||
|
// The generated objects are of type Array<VADReportScore>, one score for each monitored device.
|
||||||
|
export const VAD_REPORT_PUBLISHED = 'vad-report-published'; |
@ -0,0 +1,284 @@ |
|||||||
|
// @flow
|
||||||
|
|
||||||
|
import EventEmitter from 'events'; |
||||||
|
import logger from './logger'; |
||||||
|
import TrackVADEmitter from './TrackVADEmitter'; |
||||||
|
import { VAD_SCORE_PUBLISHED, VAD_REPORT_PUBLISHED } from './VADEvents'; |
||||||
|
import type { VADScore } from './TrackVADEmitter'; |
||||||
|
export type { VADScore }; |
||||||
|
|
||||||
|
/** |
||||||
|
* Sample rate used by TrackVADEmitter, this value determines how often the ScriptProcessorNode is going to call the |
||||||
|
* process audio function and with what sample size. |
||||||
|
* Basically lower values mean more callbacks with lower processing times bigger values less callbacks with longer |
||||||
|
* processing times. This value is somewhere in the middle, so we strike a balance between flooding with callbacks |
||||||
|
* and processing time. Possible values 256, 512, 1024, 2048, 4096, 8192, 16384. Passing other values will default |
||||||
|
* to closes neighbor. |
||||||
|
*/ |
||||||
|
const SCRIPT_NODE_SAMPLE_RATE = 4096; |
||||||
|
|
||||||
|
/** |
||||||
|
* Context that contains the emitter and additional information about the device. |
||||||
|
*/ |
||||||
|
type VADDeviceContext = { |
||||||
|
|
||||||
|
/** |
||||||
|
* MediaDeviceInfo for associated context |
||||||
|
*/ |
||||||
|
deviceInfo: MediaDeviceInfo, |
||||||
|
|
||||||
|
/** |
||||||
|
* Array with VAD scores publish from the emitter. |
||||||
|
*/ |
||||||
|
scoreArray: Array<VADScore>, |
||||||
|
|
||||||
|
/** |
||||||
|
* TrackVADEmitter associated with media device |
||||||
|
*/ |
||||||
|
vadEmitter: TrackVADEmitter |
||||||
|
}; |
||||||
|
|
||||||
|
/** |
||||||
|
* The structure used by VADReportingService to relay a score report |
||||||
|
*/ |
||||||
|
export type VADReportScore = { |
||||||
|
|
||||||
|
/** |
||||||
|
* Device ID associated with the VAD score |
||||||
|
*/ |
||||||
|
deviceId: string, |
||||||
|
|
||||||
|
/** |
||||||
|
* The PCM score from 0 - 1 i.e. 0.60 |
||||||
|
*/ |
||||||
|
score: number, |
||||||
|
|
||||||
|
/** |
||||||
|
* Epoch time at which PCM was recorded |
||||||
|
*/ |
||||||
|
timestamp: number |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Voice activity detection reporting service. The service create TrackVADEmitters for the provided devices and |
||||||
|
* publishes an average of their VAD score over the specified interval via EventEmitter. |
||||||
|
* The service is not reusable if destroyed a new one needs to be created, i.e. when a new device is added to the system |
||||||
|
* a new service needs to be created and the old discarded. |
||||||
|
*/ |
||||||
|
export default class VADReportingService extends EventEmitter { |
||||||
|
/** |
||||||
|
* Map containing context for devices currently being monitored by the reporting service. |
||||||
|
*/ |
||||||
|
_contextMap: Map<string, VADDeviceContext>; |
||||||
|
|
||||||
|
/** |
||||||
|
* State flag, check if the instance was destroyed. |
||||||
|
*/ |
||||||
|
_destroyed: boolean = false; |
||||||
|
|
||||||
|
/** |
||||||
|
* Delay at which to publish VAD score for monitored devices. |
||||||
|
*/ |
||||||
|
_intervalDelay: number; |
||||||
|
|
||||||
|
/** |
||||||
|
* Identifier for the interval publishing stats on the set interval. |
||||||
|
*/ |
||||||
|
_intervalId: ?IntervalID; |
||||||
|
|
||||||
|
/** |
||||||
|
* Constructor. |
||||||
|
* |
||||||
|
* @param {number} intervalDelay - Delay at which to publish VAD score for monitored devices. |
||||||
|
* @param {Function} publishScoreCallBack - Function called on the specific interval with the calculated VAD score. |
||||||
|
*/ |
||||||
|
constructor(intervalDelay: number) { |
||||||
|
super(); |
||||||
|
this._contextMap = new Map(); |
||||||
|
this._intervalDelay = intervalDelay; |
||||||
|
|
||||||
|
logger.log(`Constructed VADReportingService with publish interval of: ${intervalDelay}`); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Factory methods that creates the TrackVADEmitters for the associated array of devices and instantiates |
||||||
|
* a VADReportingService. |
||||||
|
* |
||||||
|
* @param {Array<MediaDeviceInfo>} micDeviceList - Device list that is monitored inside the service. |
||||||
|
* @param {number} intervalDelay - Delay at which to publish VAD score for monitored devices. |
||||||
|
* @param {Function} publishScoreCallBack - Function called on the specific interval with the calculated VAD score. |
||||||
|
* |
||||||
|
* @returns {Promise<VADReportingService>} |
||||||
|
*/ |
||||||
|
static create(micDeviceList: Array<MediaDeviceInfo>, intervalDelay: number) { |
||||||
|
const vadReportingService = new VADReportingService(intervalDelay); |
||||||
|
const emitterPromiseArray = []; |
||||||
|
|
||||||
|
// Create a TrackVADEmitter for each provided audioinput device.
|
||||||
|
for (const micDevice of micDeviceList) { |
||||||
|
if (micDevice.kind !== 'audioinput') { |
||||||
|
logger.warn(`Provided device ${micDevice.label} -> ${micDevice.deviceId}, is not audioinput ignoring!`); |
||||||
|
|
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
logger.log(`Initializing VAD context for mic: ${micDevice.label} -> ${micDevice.deviceId}`); |
||||||
|
|
||||||
|
const emitterPromise = TrackVADEmitter.create(micDevice.deviceId, SCRIPT_NODE_SAMPLE_RATE).then(emitter => { |
||||||
|
emitter.on(VAD_SCORE_PUBLISHED, vadReportingService._devicePublishVADScore.bind(vadReportingService)); |
||||||
|
|
||||||
|
return { |
||||||
|
vadEmitter: emitter, |
||||||
|
deviceInfo: micDevice, |
||||||
|
scoreArray: [] |
||||||
|
}; |
||||||
|
}); |
||||||
|
|
||||||
|
emitterPromiseArray.push(emitterPromise); |
||||||
|
} |
||||||
|
|
||||||
|
// Once all the TrackVADEmitter promises are resolved check if all of them resolved properly if not reject
|
||||||
|
// the promise and clear the already created emitters.
|
||||||
|
// $FlowFixMe - allSettled is not part of flow prototype even though it's a valid Promise function
|
||||||
|
return Promise.allSettled(emitterPromiseArray).then(outcomeArray => { |
||||||
|
const vadContextArray = []; |
||||||
|
const rejectedEmitterPromiseArray = []; |
||||||
|
|
||||||
|
for (const outcome of outcomeArray) { |
||||||
|
if (outcome.status === 'fulfilled') { |
||||||
|
vadContextArray.push(outcome.value); |
||||||
|
} else { |
||||||
|
// Promise was rejected.
|
||||||
|
logger.error(`Create TrackVADEmitter promise failed with ${outcome.reason}`); |
||||||
|
|
||||||
|
rejectedEmitterPromiseArray.push(outcome); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Check if there were any rejected promises and clear the already created ones list.
|
||||||
|
if (rejectedEmitterPromiseArray.length > 0) { |
||||||
|
logger.error('Cleaning up remaining VADDeviceContext, due to create fail!'); |
||||||
|
|
||||||
|
for (const context of vadContextArray) { |
||||||
|
context.vadEmitter.destroy(); |
||||||
|
} |
||||||
|
|
||||||
|
// Reject create promise if one emitter failed to instantiate, we might one just ignore it,
|
||||||
|
// leaving it like this for now
|
||||||
|
throw new Error('Create VADReportingService failed due to TrackVADEmitter creation issues!'); |
||||||
|
} |
||||||
|
|
||||||
|
vadReportingService._setVADContextArray(vadContextArray); |
||||||
|
vadReportingService._startPublish(); |
||||||
|
|
||||||
|
return vadReportingService; |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Destroy TrackVADEmitters and clear the context map. |
||||||
|
* |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
_clearContextMap() { |
||||||
|
for (const vadContext of this._contextMap.values()) { |
||||||
|
vadContext.vadEmitter.destroy(); |
||||||
|
} |
||||||
|
this._contextMap.clear(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Set the watched device contexts. |
||||||
|
* |
||||||
|
* @param {Array<VADDeviceContext>} vadContextArray - List of mics. |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
_setVADContextArray(vadContextArray: Array<VADDeviceContext>): void { |
||||||
|
for (const vadContext of vadContextArray) { |
||||||
|
this._contextMap.set(vadContext.deviceInfo.deviceId, vadContext); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Start the setInterval reporting process. |
||||||
|
* |
||||||
|
* @returns {void}. |
||||||
|
*/ |
||||||
|
_startPublish() { |
||||||
|
logger.log('VADReportingService started publishing.'); |
||||||
|
this._intervalId = setInterval(() => { |
||||||
|
this._reportVadScore(); |
||||||
|
}, this._intervalDelay); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Function called at set interval with selected compute. The result will be published on the set callback. |
||||||
|
* |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
_reportVadScore() { |
||||||
|
const vadComputeScoreArray = []; |
||||||
|
const computeTimestamp = Date.now(); |
||||||
|
|
||||||
|
// Go through each device and compute cumulated VAD score.
|
||||||
|
|
||||||
|
for (const [ deviceId, vadContext ] of this._contextMap) { |
||||||
|
const nrOfVADScores = vadContext.scoreArray.length; |
||||||
|
let vadSum = 0; |
||||||
|
|
||||||
|
vadContext.scoreArray.forEach(vadScore => { |
||||||
|
vadSum += vadScore.score; |
||||||
|
}); |
||||||
|
|
||||||
|
// TODO For now we just calculate the average score for each device, more compute algorithms will be added.
|
||||||
|
const avgVAD = vadSum / nrOfVADScores; |
||||||
|
|
||||||
|
vadContext.scoreArray = []; |
||||||
|
|
||||||
|
vadComputeScoreArray.push({ |
||||||
|
timestamp: computeTimestamp, |
||||||
|
score: avgVAD, |
||||||
|
deviceId |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
this.emit(VAD_REPORT_PUBLISHED, vadComputeScoreArray); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Callback method passed to vad emitters in order to publish their score. |
||||||
|
* |
||||||
|
* @param {VADScore} vadScore - Mic publishing the score. |
||||||
|
* @returns {void} |
||||||
|
*/ |
||||||
|
_devicePublishVADScore(vadScore: VADScore) { |
||||||
|
const context = this._contextMap.get(vadScore.deviceId); |
||||||
|
|
||||||
|
if (context) { |
||||||
|
context.scoreArray.push(vadScore); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Destroy the VADReportingService, stops the setInterval reporting, destroys the emitters and clears the map. |
||||||
|
* After this call the instance is no longer usable. |
||||||
|
* |
||||||
|
* @returns {void}. |
||||||
|
*/ |
||||||
|
destroy() { |
||||||
|
if (this._destroyed) { |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
logger.log('Destroying VADReportingService.'); |
||||||
|
|
||||||
|
if (this._intervalId) { |
||||||
|
clearInterval(this._intervalId); |
||||||
|
this._intervalId = null; |
||||||
|
} |
||||||
|
this._clearContextMap(); |
||||||
|
this._destroyed = true; |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,5 @@ |
|||||||
|
// @flow
|
||||||
|
|
||||||
|
import { getLogger } from '../base/logging/functions'; |
||||||
|
|
||||||
|
export default getLogger('features/vad-reporter'); |
Loading…
Reference in new issue