-
-
Notifications
You must be signed in to change notification settings - Fork 86
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(language-detection): add mediapipe language detection
- Loading branch information
Showing
16 changed files
with
189 additions
and
72 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
src/app/modules/translate/language-detection/cld3.service.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import {Injectable} from '@angular/core'; | ||
import {LanguageIdentifier} from 'cld3-asm'; | ||
import {GoogleAnalyticsService} from '../../../core/modules/google-analytics/google-analytics.service'; | ||
import {TranslationService} from '../translate.service'; | ||
import {LanguageDetectionService} from './language-detection.service'; | ||
|
||
@Injectable({ | ||
providedIn: 'root', | ||
}) | ||
export class CLD3LanguageDetectionService extends LanguageDetectionService { | ||
private cld: LanguageIdentifier; | ||
|
||
constructor(private ga: GoogleAnalyticsService, translationService: TranslationService) { | ||
super(translationService); | ||
} | ||
|
||
async init(): Promise<void> { | ||
if (this.cld) { | ||
return; | ||
} | ||
const cld3 = await this.ga.trace( | ||
'language-detector', | ||
'import', | ||
() => import(/* webpackChunkName: "cld3-asm" */ 'cld3-asm') | ||
); | ||
const cldFactory = await this.ga.trace('language-detector', 'load', () => cld3.loadModule()); | ||
this.cld = await this.ga.trace('language-detector', 'create', () => cldFactory.create(1, 500)); | ||
} | ||
|
||
async detectSpokenLanguage(text: string): Promise<string> { | ||
if (!this.cld) { | ||
return this.languageCode(null); | ||
} | ||
|
||
const language = await this.ga.trace('language-detector', 'find', () => this.cld.findLanguage(text)); | ||
if (language.is_reliable) { | ||
return this.languageCode(language.language); | ||
} | ||
|
||
return this.languageCode(null); | ||
} | ||
} |
23 changes: 23 additions & 0 deletions
23
src/app/modules/translate/language-detection/language-detection.service.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import {Injectable} from '@angular/core'; | ||
import {TranslationService} from '../translate.service'; | ||
|
||
const OBSOLETE_LANGUAGE_CODES = { | ||
iw: 'he', | ||
}; | ||
const DEFAULT_SPOKEN_LANGUAGE = 'en'; | ||
|
||
@Injectable({ | ||
providedIn: 'root', | ||
}) | ||
export abstract class LanguageDetectionService { | ||
constructor(private translationService: TranslationService) {} | ||
|
||
abstract init(): Promise<void>; | ||
|
||
abstract detectSpokenLanguage(text: string): Promise<string>; | ||
|
||
protected languageCode(language: string): string { | ||
const correctedCode = OBSOLETE_LANGUAGE_CODES[language] ?? language; | ||
return this.translationService.spokenLanguages.includes(correctedCode) ? correctedCode : DEFAULT_SPOKEN_LANGUAGE; | ||
} | ||
} |
51 changes: 51 additions & 0 deletions
51
src/app/modules/translate/language-detection/mediapipe.service.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import {Injectable} from '@angular/core'; | ||
import {GoogleAnalyticsService} from '../../../core/modules/google-analytics/google-analytics.service'; | ||
import {TranslationService} from '../translate.service'; | ||
import {LanguageDetectionService} from './language-detection.service'; | ||
import type {LanguageDetector} from '@mediapipe/tasks-text'; | ||
|
||
@Injectable({ | ||
providedIn: 'root', | ||
}) | ||
export class MediaPipeLanguageDetectionService extends LanguageDetectionService { | ||
private detector: LanguageDetector; | ||
|
||
constructor(private ga: GoogleAnalyticsService, translationService: TranslationService) { | ||
super(translationService); | ||
} | ||
|
||
async init(): Promise<void> { | ||
if (this.detector) { | ||
return; | ||
} | ||
|
||
const textTasks = await this.ga.trace( | ||
'language-detector', | ||
'import', | ||
() => import(/* webpackChunkName: "@mediapipe/tasks-text" */ '@mediapipe/tasks-text') | ||
); | ||
|
||
this.detector = await this.ga.trace('language-detector', 'create', async () => { | ||
const wasmFiles = await textTasks.FilesetResolver.forTextTasks('assets/models/mediapipe-language-detector/'); | ||
return await textTasks.LanguageDetector.createFromModelPath( | ||
wasmFiles, | ||
'assets/models/mediapipe-language-detector/model.tflite' | ||
); | ||
}); | ||
} | ||
|
||
async detectSpokenLanguage(text: string): Promise<string> { | ||
if (!this.detector) { | ||
return this.languageCode(null); | ||
} | ||
|
||
const {languages} = await this.ga.trace('language-detector', 'detect', () => this.detector.detect(text)); | ||
|
||
if (languages.length === 0) { | ||
// This usually happens when the text is too short. | ||
return this.languageCode(null); | ||
} | ||
|
||
return this.languageCode(languages[0].languageCode); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.