Skip to content

Commit

Permalink
fix: cache the detector
Browse files Browse the repository at this point in the history
Signed-off-by: Frost Ming <me@frostming.com>
  • Loading branch information
frostming committed May 22, 2024
1 parent 3022d53 commit d6aa0af
Showing 1 changed file with 15 additions and 4 deletions.
19 changes: 15 additions & 4 deletions xiaogpt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
import re
import socket
from http.cookies import SimpleCookie
from typing import AsyncIterator
from typing import TYPE_CHECKING, AsyncIterator
from urllib.parse import urlparse

from requests.utils import cookiejar_from_dict

if TYPE_CHECKING:
from lingua import LanguageDetector


### HELP FUNCTION ###
def parse_cookie_string(cookie_string):
Expand Down Expand Up @@ -71,11 +74,19 @@ def get_hostname() -> str:
return s.getsockname()[0]


def detect_language(text: str) -> str:
def _get_detector() -> LanguageDetector | None:
try:
from lingua import LanguageDetectorBuilder
except ImportError:
return None
return LanguageDetectorBuilder.from_all_spoken_languages().build()


_detector = _get_detector()


def detect_language(text: str) -> str:
if _detector is None:
return "zh" # default to Chinese if langdetect module is not available
detector = LanguageDetectorBuilder.from_all_spoken_languages().build()
lang = detector.detect_language_of(text)
lang = _detector.detect_language_of(text)
return lang.iso_code_639_1.name.lower() if lang is not None else "zh"

0 comments on commit d6aa0af

Please sign in to comment.