diff --git a/xiaogpt/utils.py b/xiaogpt/utils.py index 9b7081b7..e0026369 100644 --- a/xiaogpt/utils.py +++ b/xiaogpt/utils.py @@ -5,11 +5,14 @@ import re import socket from http.cookies import SimpleCookie -from typing import AsyncIterator +from typing import TYPE_CHECKING, AsyncIterator from urllib.parse import urlparse from requests.utils import cookiejar_from_dict +if TYPE_CHECKING: + from lingua import LanguageDetector + ### HELP FUNCTION ### def parse_cookie_string(cookie_string): @@ -71,11 +74,19 @@ def get_hostname() -> str: return s.getsockname()[0] -def detect_language(text: str) -> str: +def _get_detector() -> LanguageDetector | None: try: from lingua import LanguageDetectorBuilder except ImportError: + return None + return LanguageDetectorBuilder.from_all_spoken_languages().build() + + +_detector = _get_detector() + + +def detect_language(text: str) -> str: + if _detector is None: return "zh" # default to Chinese if langdetect module is not available - detector = LanguageDetectorBuilder.from_all_spoken_languages().build() - lang = detector.detect_language_of(text) + lang = _detector.detect_language_of(text) return lang.iso_code_639_1.name.lower() if lang is not None else "zh"