feat(SpeechToText): Add SpeechToText provider API

Signed-off-by: Marcel Klehr <mklehr@gmx.net>
pull/37674/head
Marcel Klehr 3 years ago
parent c6645cbc46
commit 317521b607
  1. 22
      lib/private/AppFramework/Bootstrap/RegistrationContext.php
  2. 4
      lib/private/Server.php
  3. 107
      lib/private/SpeechToText/SpeechToTextManager.php
  4. 47
      lib/private/SpeechToText/TranscriptionJob.php
  5. 11
      lib/public/AppFramework/Bootstrap/IRegistrationContext.php
  6. 74
      lib/public/SpeechToText/Events/TranscriptionFinishedEvent.php
  7. 62
      lib/public/SpeechToText/ISpeechToTextManager.php
  8. 45
      lib/public/SpeechToText/ISpeechToTextProvider.php

@ -33,6 +33,7 @@ use Closure;
use OCP\Calendar\Resource\IBackend as IResourceBackend;
use OCP\Calendar\Room\IBackend as IRoomBackend;
use OCP\Collaboration\Reference\IReferenceProvider;
use OCP\SpeechToText\ISpeechToTextProvider;
use OCP\Talk\ITalkBackend;
use OCP\Translation\ITranslationProvider;
use RuntimeException;
@ -111,6 +112,9 @@ class RegistrationContext {
/** @var ServiceRegistration<IHandler>[] */
private $wellKnownHandlers = [];
/** @var ServiceRegistration<ISpeechToTextProvider>[] */
private $speechToTextProviders = [];
/** @var ServiceRegistration<ICustomTemplateProvider>[] */
private $templateProviders = [];
@ -252,6 +256,13 @@ class RegistrationContext {
);
}
public function registerSpeechToTextProvider(string $providerClass): void {
$this->context->registerSpeechToTextProvider(
$this->appId,
$providerClass
);
}
public function registerTemplateProvider(string $providerClass): void {
$this->context->registerTemplateProvider(
$this->appId,
@ -414,6 +425,10 @@ class RegistrationContext {
$this->wellKnownHandlers[] = new ServiceRegistration($appId, $class);
}
public function registerSpeechToTextProvider(string $appId, string $class): void {
$this->speechToTextProviders[] = new ServiceRegistration($appId, $class);
}
public function registerTemplateProvider(string $appId, string $class): void {
$this->templateProviders[] = new ServiceRegistration($appId, $class);
}
@ -685,6 +700,13 @@ class RegistrationContext {
return $this->wellKnownHandlers;
}
/**
* @return ServiceRegistration<ISpeechToTextProvider>[]
*/
public function getSpeechToTextProviders(): array {
return $this->speechToTextProviders;
}
/**
* @return ServiceRegistration<ICustomTemplateProvider>[]
*/

@ -148,6 +148,7 @@ use OC\Security\VerificationToken\VerificationToken;
use OC\Session\CryptoWrapper;
use OC\Share20\ProviderFactory;
use OC\Share20\ShareHelper;
use OC\SpeechToText\SpeechToTextManager;
use OC\SystemTag\ManagerFactory as SystemTagManagerFactory;
use OC\Tagging\TagMapper;
use OC\Talk\Broker;
@ -246,6 +247,7 @@ use OCP\Security\ISecureRandom;
use OCP\Security\ITrustedDomainHelper;
use OCP\Security\VerificationToken\IVerificationToken;
use OCP\Share\IShareHelper;
use OCP\SpeechToText\ISpeechToTextManager;
use OCP\SystemTag\ISystemTagManager;
use OCP\SystemTag\ISystemTagObjectMapper;
use OCP\Talk\IBroker;
@ -1457,6 +1459,8 @@ class Server extends ServerContainer implements IServerContainer {
$this->registerAlias(ITranslationManager::class, TranslationManager::class);
$this->registerAlias(ISpeechToTextManager::class, SpeechToTextManager::class);
$this->connectDispatcher();
}

@ -0,0 +1,107 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2023 Julius Härtl <jus@bitgrid.net>
* @copyright Copyright (c) 2023 Marcel Klehr <mklehr@gmx.net>
*
* @author Julius Härtl <jus@bitgrid.net>
* @author Marcel Klehr <mklehr@gmx.net>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OC\SpeechToText;
use InvalidArgumentException;
use OC\AppFramework\Bootstrap\Coordinator;
use OCP\BackgroundJob\IJobList;
use OCP\IServerContainer;
use OCP\PreConditionNotMetException;
use OCP\SpeechToText\ISpeechToTextManager;
use OCP\SpeechToText\ISpeechToTextProvider;
use Psr\Container\ContainerExceptionInterface;
use Psr\Container\NotFoundExceptionInterface;
use Psr\Log\LoggerInterface;
use Throwable;
class SpeechToTextManager implements ISpeechToTextManager {
/** @var ?ISpeechToTextProvider[] */
private ?array $providers = null;
public function __construct(
private IServerContainer $serverContainer,
private Coordinator $coordinator,
private LoggerInterface $logger,
private IJobList $jobList,
) { }
public function getProviders(): array {
$context = $this->coordinator->getRegistrationContext();
if ($context === null) {
return [];
}
if ($this->providers !== null) {
return $this->providers;
}
$this->providers = [];
foreach ($context->getSpeechToTextProviders() as $providerRegistration) {
$class = $providerRegistration->getService();
try {
$this->providers[$class] = $this->serverContainer->get($class);
} catch (NotFoundExceptionInterface|ContainerExceptionInterface|Throwable $e) {
$this->logger->error('Failed to load SpeechToText provider ' . $class, [
'exception' => $e
]);
}
}
return $this->providers;
}
public function hasProviders(): bool {
$context = $this->coordinator->getRegistrationContext();
if ($context === null) {
return false;
}
return !empty($context->getTranslationProviders());
}
public function scheduleFileTranscription(string $path, array $context): void {
if (!$this->hasProviders()) {
throw new PreConditionNotMetException('No SpeechToText providers have been registered');
}
$this->jobList->add(TranscriptionJob::class, [ 'path' => $path, 'context' => $context]);
}
public function transcribeFile(string $path): string {
$provider = current($this->getProviders());
if (!$provider) {
throw new PreConditionNotMetException('No SpeechToText providers have been registered');
}
if (!file_exists($path)) {
throw new InvalidArgumentException('File does not exist');
}
return $provider->transcribeFile($path);
}
}

@ -0,0 +1,47 @@
<?php
namespace OC\SpeechToText;
use OCP\AppFramework\Utility\ITimeFactory;
use OCP\BackgroundJob\QueuedJob;
use OCP\EventDispatcher\IEventDispatcher;
use OCP\PreConditionNotMetException;
use OCP\SpeechToText\Events\TranscriptionFinishedEvent;
use OCP\SpeechToText\ISpeechToTextManager;
class TranscriptionJob extends QueuedJob {
public function __construct(
ITimeFactory $timeFactory,
private ISpeechToTextManager $speechToTextManager,
private IEventDispatcher $eventDispatcher,
) {
parent::__construct($timeFactory);
}
/**
* @inheritDoc
*/
protected function run($argument) {
try {
$result = $this->speechToTextManager->transcribeFile($argument['path']);
$this->eventDispatcher->dispatchTyped(
new TranscriptionFinishedEvent(
true,
$result,
'',
$argument['context']
)
);
} catch (PreConditionNotMetException|\RuntimeException|\InvalidArgumentException $e) {
$this->eventDispatcher->dispatchTyped(
new TranscriptionFinishedEvent(
false,
'',
$e->getMessage(),
$argument['context']
)
);
}
}
}

@ -39,6 +39,7 @@ use OCP\Files\Template\ICustomTemplateProvider;
use OCP\IContainer;
use OCP\Notification\INotifier;
use OCP\Preview\IProviderV2;
use OCP\SpeechToText\ISpeechToTextProvider;
use OCP\Translation\ITranslationProvider;
/**
@ -208,6 +209,16 @@ interface IRegistrationContext {
*/
public function registerWellKnownHandler(string $class): void;
/**
* Register a custom SpeechToText provider class that can provide transcription
* of audio through the OCP\SpeechToText APIs
*
* @param string $providerClass
* @psalm-param class-string<ISpeechToTextProvider> $providerClass
* @since 27.0.0
*/
public function registerSpeechToTextProvider(string $providerClass): void;
/**
* Register a custom template provider class that is able to inject custom templates
* in addition to the user defined ones

@ -0,0 +1,74 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2023 Marcel Klehr <mklehr@gmx.net>
*
* @author Marcel Klehr <mklehr@gmx.net>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
namespace OCP\SpeechToText\Events;
use OCP\EventDispatcher\Event;
/**
* @since 27.0.0
*/
class TranscriptionFinishedEvent extends Event {
/**
* @since 27.0.0
*/
public function __construct(
private bool $successful,
private string $transcription,
private string $errorMessage,
private array $context
) {
parent::__construct();
}
/**
* @since 27.0.0
*/
public function getContext(): array {
return $this->context;
}
/**
* @since 27.0.0
*/
public function isSuccessful(): bool {
return $this->successful;
}
/**
* @since 27.0.0
*/
public function getErrorMessage(): string {
return $this->errorMessage;
}
/**
* @since 27.0.0
*/
public function getTranscription(): string {
return $this->transcription;
}
}

@ -0,0 +1,62 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2023 Marcel Klehr <mklehr@gmx.net>
*
* @author Marcel Klehr <mklehr@gmx.net>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OCP\SpeechToText;
use InvalidArgumentException;
use OCP\PreConditionNotMetException;
use RuntimeException;
/**
* @since 27.0.0
*/
interface ISpeechToTextManager {
/**
* @since 27.0.0
*/
public function hasProviders(): bool;
/**
* Will schedule a transcription process in the background. The result will become available
* with the \OCP\SpeechToText\Events\TranscriptionFinishedEvent
* You should add context information to the context array to re-identify the transcription result as
* as belonging to your transcription request.
*
* @since 27.0.0
* @throws PreConditionNotMetException If no provider was registered but this method was still called
* @throws InvalidArgumentException If the file could not be found or is not of a supported type
* @throws RuntimeException If the transcription failed for other reasons
*/
public function scheduleFileTranscription(string $path, array $context): void;
/**
* @since 27.0.0
* @throws PreConditionNotMetException If no provider was registered but this method was still called
* @throws InvalidArgumentException If the file could not be found or is not of a supported type
* @throws RuntimeException If the transcription failed for other reasons
*/
public function transcribeFile(string $path) : string;
}

@ -0,0 +1,45 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2022 Marcel Klehr <mklehr@gmx.net>
*
* @author Marcel Klehr <mklehr@gmx.net>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OCP\SpeechToText;
use RuntimeException;
/**
* @since 27.0.0
*/
interface ISpeechToTextProvider {
/**
* @since 27.0.0
*/
public function getName(): string;
/**
* @since 27.0.0
* @throws RuntimeException If the text could not be transcribed
*/
public function transcribeFile(string $path): string;
}
Loading…
Cancel
Save