mirror of https://github.com/jitsi/jitsi-meet
feat(face-landmarks) merge face expressions and face centering (#11283)
* feat(face-landmarks) merge face expressions and face centering * code review * code review * code reviewpull/11311/head jitsi-meet_7156
parent
4b84f71021
commit
2c165d4313
@ -1,39 +0,0 @@ |
||||
/** |
||||
* Redux action type dispatched in order to set the time interval in which |
||||
* the message to the face centering worker will be sent. |
||||
* |
||||
* { |
||||
* type: SET_DETECTION_TIME_INTERVAL, |
||||
* time: number |
||||
* } |
||||
*/ |
||||
export const SET_DETECTION_TIME_INTERVAL = 'SET_DETECTION_TIME_INTERVAL'; |
||||
|
||||
/** |
||||
* Redux action type dispatched in order to set recognition active in the state. |
||||
* |
||||
* { |
||||
* type: START_FACE_RECOGNITION |
||||
* } |
||||
*/ |
||||
export const START_FACE_RECOGNITION = 'START_FACE_RECOGNITION'; |
||||
|
||||
/** |
||||
* Redux action type dispatched in order to set recognition inactive in the state. |
||||
* |
||||
* { |
||||
* type: STOP_FACE_RECOGNITION |
||||
* } |
||||
*/ |
||||
export const STOP_FACE_RECOGNITION = 'STOP_FACE_RECOGNITION'; |
||||
|
||||
/** |
||||
* Redux action type dispatched in order to update coordinates of a detected face. |
||||
* |
||||
* { |
||||
* type: UPDATE_FACE_COORDINATES, |
||||
* faceBox: Object({ left, bottom, right, top }), |
||||
* participantId: string |
||||
* } |
||||
*/ |
||||
export const UPDATE_FACE_COORDINATES = 'UPDATE_FACE_COORDINATES'; |
@ -1,139 +0,0 @@ |
||||
import 'image-capture'; |
||||
|
||||
import { getCurrentConference } from '../base/conference'; |
||||
import { getLocalParticipant, getParticipantCount } from '../base/participants'; |
||||
import { getLocalVideoTrack } from '../base/tracks'; |
||||
import { getBaseUrl } from '../base/util'; |
||||
import '../facial-recognition/createImageBitmap'; |
||||
|
||||
import { |
||||
START_FACE_RECOGNITION, |
||||
STOP_FACE_RECOGNITION, |
||||
UPDATE_FACE_COORDINATES |
||||
} from './actionTypes'; |
||||
import { |
||||
FACE_BOX_MESSAGE, |
||||
SEND_IMAGE_INTERVAL_MS |
||||
} from './constants'; |
||||
import { sendDataToWorker, sendFaceBoxToParticipants } from './functions'; |
||||
import logger from './logger'; |
||||
|
||||
/** |
||||
* Interval object for sending new image data to worker. |
||||
*/ |
||||
let interval; |
||||
|
||||
/** |
||||
* Object containing a image capture of the local track. |
||||
*/ |
||||
let imageCapture; |
||||
|
||||
/** |
||||
* Object where the face centering worker is stored. |
||||
*/ |
||||
let worker; |
||||
|
||||
/** |
||||
* Loads the worker. |
||||
* |
||||
* @returns {Function} |
||||
*/ |
||||
export function loadWorker() { |
||||
return async function(dispatch: Function, getState: Function) { |
||||
if (navigator.product === 'ReactNative') { |
||||
logger.warn('Unsupported environment for face centering'); |
||||
|
||||
return; |
||||
} |
||||
|
||||
const baseUrl = getBaseUrl(); |
||||
let workerUrl = `${baseUrl}libs/face-centering-worker.min.js`; |
||||
|
||||
const workerBlob = new Blob([ `importScripts("${workerUrl}");` ], { type: 'application/javascript' }); |
||||
|
||||
workerUrl = window.URL.createObjectURL(workerBlob); |
||||
worker = new Worker(workerUrl, { name: 'Face Centering Worker' }); |
||||
worker.onmessage = function(e: Object) { |
||||
const { type, value } = e.data; |
||||
|
||||
// receives a message with the face(s) bounding box.
|
||||
if (type === FACE_BOX_MESSAGE) { |
||||
const state = getState(); |
||||
const conference = getCurrentConference(state); |
||||
const localParticipant = getLocalParticipant(state); |
||||
|
||||
if (getParticipantCount(state) > 1) { |
||||
sendFaceBoxToParticipants(conference, value); |
||||
} |
||||
|
||||
dispatch({ |
||||
type: UPDATE_FACE_COORDINATES, |
||||
faceBox: value, |
||||
id: localParticipant.id |
||||
}); |
||||
} |
||||
}; |
||||
|
||||
dispatch(startFaceRecognition()); |
||||
}; |
||||
} |
||||
|
||||
/** |
||||
* Starts the recognition and detection of face position. |
||||
* |
||||
* @param {Track | undefined} track - Track for which to start detecting faces. |
||||
* |
||||
* @returns {Function} |
||||
*/ |
||||
export function startFaceRecognition(track) { |
||||
return async function(dispatch: Function, getState: Function) { |
||||
if (!worker) { |
||||
return; |
||||
} |
||||
const state = getState(); |
||||
const { recognitionActive } = state['features/face-centering']; |
||||
|
||||
if (recognitionActive) { |
||||
logger.log('Face centering already active.'); |
||||
|
||||
return; |
||||
} |
||||
|
||||
const localVideoTrack = track || getLocalVideoTrack(state['features/base/tracks']); |
||||
|
||||
if (!localVideoTrack) { |
||||
logger.warn('Face centering is disabled due to missing local track.'); |
||||
|
||||
return; |
||||
} |
||||
|
||||
dispatch({ type: START_FACE_RECOGNITION }); |
||||
logger.log('Start face recognition'); |
||||
|
||||
const stream = localVideoTrack.jitsiTrack.getOriginalStream(); |
||||
const firstVideoTrack = stream.getVideoTracks()[0]; |
||||
|
||||
imageCapture = new ImageCapture(firstVideoTrack); |
||||
const { disableLocalVideoFlip, faceCoordinatesSharing } = state['features/base/config']; |
||||
|
||||
interval = setInterval(() => { |
||||
sendDataToWorker(worker, imageCapture, faceCoordinatesSharing?.threshold, !disableLocalVideoFlip); |
||||
}, faceCoordinatesSharing?.captureInterval || SEND_IMAGE_INTERVAL_MS); |
||||
}; |
||||
} |
||||
|
||||
/** |
||||
* Stops the recognition and detection of face position. |
||||
* |
||||
* @returns {Function} |
||||
*/ |
||||
export function stopFaceRecognition() { |
||||
return function(dispatch: Function) { |
||||
clearInterval(interval); |
||||
interval = null; |
||||
imageCapture = null; |
||||
|
||||
dispatch({ type: STOP_FACE_RECOGNITION }); |
||||
logger.log('Stop face recognition'); |
||||
}; |
||||
} |
@ -1,20 +0,0 @@ |
||||
/** |
||||
* Type of message sent from main thread to worker that contain image data and |
||||
* will trigger a response message from the worker containing the detected face(s) bounding box if any. |
||||
*/ |
||||
export const DETECT_FACE_BOX = 'DETECT_FACE_BOX'; |
||||
|
||||
/** |
||||
* Type of event sent on the data channel. |
||||
*/ |
||||
export const FACE_BOX_EVENT_TYPE = 'face-box'; |
||||
|
||||
/** |
||||
* Type of message sent from the worker to main thread that contains a face box or undefined. |
||||
*/ |
||||
export const FACE_BOX_MESSAGE = 'face-box'; |
||||
|
||||
/** |
||||
* Miliseconds interval value for sending new image data to the worker. |
||||
*/ |
||||
export const SEND_IMAGE_INTERVAL_MS = 100; |
@ -1,107 +0,0 @@ |
||||
import * as blazeface from '@tensorflow-models/blazeface'; |
||||
import { setWasmPaths } from '@tensorflow/tfjs-backend-wasm'; |
||||
import * as tf from '@tensorflow/tfjs-core'; |
||||
|
||||
import { FACE_BOX_MESSAGE, DETECT_FACE_BOX } from './constants'; |
||||
|
||||
/** |
||||
* Indicates whether an init error occured. |
||||
*/ |
||||
let initError = false; |
||||
|
||||
/** |
||||
* The blazeface model. |
||||
*/ |
||||
let model; |
||||
|
||||
/** |
||||
* A flag that indicates whether the tensorflow backend is set or not. |
||||
*/ |
||||
let backendSet = false; |
||||
|
||||
/** |
||||
* Flag for indicating whether an init operation (e.g setting tf backend) is in progress. |
||||
*/ |
||||
let initInProgress = false; |
||||
|
||||
/** |
||||
* Callbacks queue for avoiding overlapping executions of face detection. |
||||
*/ |
||||
const queue = []; |
||||
|
||||
/** |
||||
* Contains the last valid face bounding box (passes threshold validation) which was sent to the main process. |
||||
*/ |
||||
let lastValidFaceBox; |
||||
|
||||
const detect = async message => { |
||||
const { baseUrl, image, isHorizontallyFlipped, threshold } = message.data; |
||||
|
||||
if (initInProgress || initError) { |
||||
return; |
||||
} |
||||
|
||||
if (!backendSet) { |
||||
initInProgress = true; |
||||
setWasmPaths(`${baseUrl}libs/`); |
||||
|
||||
try { |
||||
await tf.setBackend('wasm'); |
||||
} catch (err) { |
||||
initError = true; |
||||
|
||||
return; |
||||
} |
||||
|
||||
backendSet = true; |
||||
initInProgress = false; |
||||
} |
||||
|
||||
// load face detection model
|
||||
if (!model) { |
||||
try { |
||||
model = await blazeface.load(); |
||||
} catch (err) { |
||||
initError = true; |
||||
|
||||
return; |
||||
} |
||||
} |
||||
|
||||
tf.engine().startScope(); |
||||
|
||||
const imageTensor = tf.browser.fromPixels(image); |
||||
const detections = await model.estimateFaces(imageTensor, false, isHorizontallyFlipped, false); |
||||
|
||||
tf.engine().endScope(); |
||||
|
||||
let faceBox; |
||||
|
||||
if (detections.length) { |
||||
faceBox = { |
||||
// normalize to percentage based
|
||||
left: Math.round(Math.min(...detections.map(d => d.topLeft[0])) * 100 / image.width), |
||||
right: Math.round(Math.max(...detections.map(d => d.bottomRight[0])) * 100 / image.width), |
||||
top: Math.round(Math.min(...detections.map(d => d.topLeft[1])) * 100 / image.height), |
||||
bottom: Math.round(Math.max(...detections.map(d => d.bottomRight[1])) * 100 / image.height) |
||||
}; |
||||
|
||||
if (lastValidFaceBox && Math.abs(lastValidFaceBox.left - faceBox.left) < threshold) { |
||||
return; |
||||
} |
||||
|
||||
lastValidFaceBox = faceBox; |
||||
|
||||
self.postMessage({ |
||||
type: FACE_BOX_MESSAGE, |
||||
value: faceBox |
||||
}); |
||||
} |
||||
}; |
||||
|
||||
onmessage = function(message) { |
||||
if (message.data.id === DETECT_FACE_BOX) { |
||||
queue.push(() => detect(message)); |
||||
queue.shift()(); |
||||
} |
||||
}; |
@ -1,112 +0,0 @@ |
||||
import { getBaseUrl } from '../base/util'; |
||||
|
||||
import { FACE_BOX_EVENT_TYPE, DETECT_FACE_BOX } from './constants'; |
||||
import logger from './logger'; |
||||
|
||||
/** |
||||
* Sends the face box to all the other participants. |
||||
* |
||||
* @param {Object} conference - The current conference. |
||||
* @param {Object} faceBox - Face box to be sent. |
||||
* @returns {void} |
||||
*/ |
||||
export function sendFaceBoxToParticipants( |
||||
conference: Object, |
||||
faceBox: Object |
||||
): void { |
||||
try { |
||||
conference.sendEndpointMessage('', { |
||||
type: FACE_BOX_EVENT_TYPE, |
||||
faceBox |
||||
}); |
||||
} catch (err) { |
||||
logger.warn('Could not broadcast the face box to the other participants', err); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Sends the image data a canvas from the track in the image capture to the face centering worker. |
||||
* |
||||
* @param {Worker} worker - Face centering worker. |
||||
* @param {Object} imageCapture - Image capture that contains the current track. |
||||
* @param {number} threshold - Movement threshold as percentage for sharing face coordinates. |
||||
* @param {boolean} isHorizontallyFlipped - Indicates whether the image is horizontally flipped. |
||||
* @returns {Promise<void>} |
||||
*/ |
||||
export async function sendDataToWorker( |
||||
worker: Worker, |
||||
imageCapture: Object, |
||||
threshold: number = 10, |
||||
isHorizontallyFlipped = true |
||||
): Promise<void> { |
||||
if (imageCapture === null || imageCapture === undefined) { |
||||
return; |
||||
} |
||||
|
||||
let imageBitmap; |
||||
let image; |
||||
|
||||
try { |
||||
imageBitmap = await imageCapture.grabFrame(); |
||||
} catch (err) { |
||||
logger.warn(err); |
||||
|
||||
return; |
||||
} |
||||
|
||||
if (typeof OffscreenCanvas === 'undefined') { |
||||
const canvas = document.createElement('canvas'); |
||||
const context = canvas.getContext('2d'); |
||||
|
||||
canvas.width = imageBitmap.width; |
||||
canvas.height = imageBitmap.height; |
||||
context.drawImage(imageBitmap, 0, 0); |
||||
|
||||
image = context.getImageData(0, 0, imageBitmap.width, imageBitmap.height); |
||||
} else { |
||||
image = imageBitmap; |
||||
} |
||||
|
||||
worker.postMessage({ |
||||
id: DETECT_FACE_BOX, |
||||
baseUrl: getBaseUrl(), |
||||
image, |
||||
threshold, |
||||
isHorizontallyFlipped |
||||
}); |
||||
|
||||
imageBitmap.close(); |
||||
} |
||||
|
||||
/** |
||||
* Gets face box for a participant id. |
||||
* |
||||
* @param {string} id - The participant id. |
||||
* @param {Object} state - The redux state. |
||||
* @returns {Object} |
||||
*/ |
||||
export function getFaceBoxForId(id: string, state: Object) { |
||||
return state['features/face-centering'].faceBoxes[id]; |
||||
} |
||||
|
||||
/** |
||||
* Gets the video object position for a participant id. |
||||
* |
||||
* @param {Object} state - The redux state. |
||||
* @param {string} id - The participant id. |
||||
* @returns {string} - CSS object-position in the shape of '{horizontalPercentage}% {verticalPercentage}%'. |
||||
*/ |
||||
export function getVideoObjectPosition(state: Object, id: string) { |
||||
const faceBox = getFaceBoxForId(id, state); |
||||
|
||||
if (faceBox) { |
||||
const { left, right, top, bottom } = faceBox; |
||||
|
||||
const horizontalPos = 100 - Math.round((left + right) / 2, 100); |
||||
const verticalPos = 100 - Math.round((top + bottom) / 2, 100); |
||||
|
||||
return `${horizontalPos}% ${verticalPos}%`; |
||||
} |
||||
|
||||
return '50% 50%'; |
||||
} |
@ -1,3 +0,0 @@ |
||||
import { getLogger } from '../base/logging/functions'; |
||||
|
||||
export default getLogger('features/face-centering'); |
@ -1,103 +0,0 @@ |
||||
import { |
||||
CONFERENCE_JOINED, |
||||
CONFERENCE_WILL_LEAVE, |
||||
getCurrentConference |
||||
} from '../base/conference'; |
||||
import { JitsiConferenceEvents } from '../base/lib-jitsi-meet'; |
||||
import { MiddlewareRegistry } from '../base/redux'; |
||||
import { TRACK_UPDATED, TRACK_REMOVED, TRACK_ADDED } from '../base/tracks'; |
||||
|
||||
import { UPDATE_FACE_COORDINATES } from './actionTypes'; |
||||
import { |
||||
loadWorker, |
||||
stopFaceRecognition, |
||||
startFaceRecognition |
||||
} from './actions'; |
||||
import { FACE_BOX_EVENT_TYPE } from './constants'; |
||||
|
||||
MiddlewareRegistry.register(({ dispatch, getState }) => next => action => { |
||||
const state = getState(); |
||||
const { faceCoordinatesSharing } = state['features/base/config']; |
||||
|
||||
if (!getCurrentConference(state)) { |
||||
return next(action); |
||||
} |
||||
|
||||
if (action.type === CONFERENCE_JOINED) { |
||||
if (faceCoordinatesSharing?.enabled) { |
||||
dispatch(loadWorker()); |
||||
} |
||||
|
||||
// allow using remote face centering data when local face centering is not enabled
|
||||
action.conference.on( |
||||
JitsiConferenceEvents.ENDPOINT_MESSAGE_RECEIVED, |
||||
(participant, eventData) => { |
||||
if (!participant || !eventData) { |
||||
return; |
||||
} |
||||
|
||||
if (eventData.type === FACE_BOX_EVENT_TYPE) { |
||||
dispatch({ |
||||
type: UPDATE_FACE_COORDINATES, |
||||
faceBox: eventData.faceBox, |
||||
id: participant.getId() |
||||
}); |
||||
} |
||||
}); |
||||
|
||||
return next(action); |
||||
} |
||||
|
||||
if (!faceCoordinatesSharing?.enabled) { |
||||
return next(action); |
||||
} |
||||
|
||||
switch (action.type) { |
||||
case CONFERENCE_WILL_LEAVE : { |
||||
dispatch(stopFaceRecognition()); |
||||
|
||||
return next(action); |
||||
} |
||||
case TRACK_ADDED: { |
||||
const { jitsiTrack: { isLocal, videoType } } = action.track; |
||||
|
||||
if (videoType === 'camera' && isLocal()) { |
||||
// need to pass this since the track is not yet added in the store
|
||||
dispatch(startFaceRecognition(action.track)); |
||||
} |
||||
|
||||
return next(action); |
||||
} |
||||
case TRACK_UPDATED: { |
||||
const { jitsiTrack: { isLocal, videoType } } = action.track; |
||||
|
||||
if (videoType !== 'camera' || !isLocal()) { |
||||
return next(action); |
||||
} |
||||
|
||||
const { muted } = action.track; |
||||
|
||||
if (muted !== undefined) { |
||||
// addresses video mute state changes
|
||||
if (muted) { |
||||
dispatch(stopFaceRecognition()); |
||||
} else { |
||||
dispatch(startFaceRecognition()); |
||||
} |
||||
} |
||||
|
||||
return next(action); |
||||
} |
||||
case TRACK_REMOVED: { |
||||
const { jitsiTrack: { isLocal, videoType } } = action.track; |
||||
|
||||
if (videoType === 'camera' && isLocal()) { |
||||
dispatch(stopFaceRecognition()); |
||||
} |
||||
|
||||
return next(action); |
||||
} |
||||
} |
||||
|
||||
return next(action); |
||||
}); |
@ -1,55 +0,0 @@ |
||||
import { ReducerRegistry } from '../base/redux'; |
||||
|
||||
import { |
||||
START_FACE_RECOGNITION, |
||||
STOP_FACE_RECOGNITION, |
||||
UPDATE_FACE_COORDINATES |
||||
} from './actionTypes'; |
||||
|
||||
/** |
||||
* The default state object. |
||||
*/ |
||||
const defaultState = { |
||||
/** |
||||
* Map of participant ids containing their respective facebox in the shape of a left, right, bottom, top percentages |
||||
* The percentages indicate the distance of the detected face starting edge (top or left) to the corresponding edge. |
||||
* |
||||
* Examples: |
||||
* 70% left indicates a 70% distance from the left edge of the video to the left edge of the detected face. |
||||
* 70% right indicates a 70% distance from the right edge of the video to the left edge of the detected face. |
||||
* 30% top indicates a 30% distance from the top edge of the video to the top edge of the detected face. |
||||
* 30% bottom indicates a 30% distance from the bottom edge of the video to the top edge of the detected face. |
||||
*/ |
||||
faceBoxes: {}, |
||||
|
||||
/** |
||||
* Flag indicating whether face recognition is currently running. |
||||
*/ |
||||
recognitionActive: false |
||||
}; |
||||
|
||||
ReducerRegistry.register('features/face-centering', (state = defaultState, action) => { |
||||
switch (action.type) { |
||||
case UPDATE_FACE_COORDINATES: { |
||||
return { |
||||
...state, |
||||
faceBoxes: { |
||||
...state.faceBoxes, |
||||
[action.id]: action.faceBox |
||||
} |
||||
}; |
||||
} |
||||
case START_FACE_RECOGNITION: { |
||||
return { |
||||
...state, |
||||
recognitionActive: true |
||||
}; |
||||
} |
||||
|
||||
case STOP_FACE_RECOGNITION: { |
||||
return defaultState; |
||||
} |
||||
} |
||||
|
||||
return state; |
||||
}); |
Loading…
Reference in new issue