From 431ac76a3121aa2d427639b6912e54583b0d0cad Mon Sep 17 00:00:00 2001 From: yumoqing Date: Mon, 3 Nov 2025 13:53:53 +0800 Subject: [PATCH] bugfix --- MANIFEST.in | 2 + README.md | 23 +++++ app/{checklang.py => langapp.py} | 18 ++-- checklang/__init__.py | 0 checklang/init.py | 142 +++++++++++++++++++++++++++++++ {app => checklang}/lid.176.ftz | Bin pyproject.toml | 29 +++++++ requirements.txt | 5 -- 8 files changed, 202 insertions(+), 17 deletions(-) create mode 100644 MANIFEST.in rename app/{checklang.py => langapp.py} (65%) create mode 100644 checklang/__init__.py create mode 100644 checklang/init.py rename {app => checklang}/lid.176.ftz (100%) create mode 100644 pyproject.toml delete mode 100644 requirements.txt diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..d058114 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include checklang/lid.176.ftz + diff --git a/README.md b/README.md index ed7e3bc..7716c8d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,25 @@ # checklang +此模块提供自动判断给定文本的语言 + +## Usage + +``` +from checklang.init import LanguageChecker + +lc = LanguageChecker() +lang = lc.checklang(text) +print lang +# { +# 'lang':'en' +# } + +``` + +## Dependents + +``` +pip install fasttext +``` + + diff --git a/app/checklang.py b/app/langapp.py similarity index 65% rename from app/checklang.py rename to app/langapp.py index 26ead17..126d769 100644 --- a/app/checklang.py +++ b/app/langapp.py @@ -3,7 +3,7 @@ from ahserver.webapp import webapp from ahserver.serverenv import ServerEnv from appPublic.registerfunction import RegisterFunction -import fasttext +from checklang.init import load_checklang def docs(request, *args, **kw): return """Check langage for text @@ -22,7 +22,8 @@ response: } """ def checklang(request, params_kw, *args, **kw): - engine = kw.get('engine') + env = request._run_ns + engine = env.language_checker text = params_kw.text pred = engine.predict(text) d = pred[0][0][9:] @@ -31,18 +32,11 @@ def checklang(request, params_kw, *args, **kw): } def init(): - p = os.path.join(os.path.dirname(__file__), 'lid.176.ftz') - print(f'model path={p}') - model = fasttext.load_model(p) - # 需先下载模型:https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz - env = ServerEnv() - env.engine = model - rf = RegisterFunction() - rf.register('checklang', checklang) - -if __name__ == '__main__': + load_checklang() rf = RegisterFunction() rf.register('checklang', checklang) rf.register('docs', docs) + +if __name__ == '__main__': webapp(init) diff --git a/checklang/__init__.py b/checklang/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/checklang/init.py b/checklang/init.py new file mode 100644 index 0000000..2ad56c1 --- /dev/null +++ b/checklang/init.py @@ -0,0 +1,142 @@ +import os +from ahserver.serverenv import ServerEnv +import fasttext +# 需先下载模型:https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz + +class LanguageChecker: + def __init__(self): + p = os.path.join(os.path.dirname(__file__), 'lid.176.ftz') + print(f'model path={p}') + self.model = fasttext.load_model(p) + + def checklang(self, text): + pred = self.model.predict(text) + d = pred[0][0][9:] + return { + 'lang':d + } + +def get_languages(): + """ + 获取地球上主要人类语言列表 + 返回值: [{'value': 'en', 'text': 'English'}, ...] + """ + languages = [ + {"value": "af", "text": "Afrikaans"}, + {"value": "am", "text": "Amharic"}, + {"value": "ar", "text": "Arabic"}, + {"value": "az", "text": "Azerbaijani"}, + {"value": "be", "text": "Belarusian"}, + {"value": "bg", "text": "Bulgarian"}, + {"value": "bn", "text": "Bengali"}, + {"value": "bo", "text": "Tibetan"}, + {"value": "bs", "text": "Bosnian"}, + {"value": "ca", "text": "Catalan"}, + {"value": "ceb", "text": "Cebuano"}, + {"value": "cs", "text": "Czech"}, + {"value": "cy", "text": "Welsh"}, + {"value": "da", "text": "Danish"}, + {"value": "de", "text": "German"}, + {"value": "dv", "text": "Dhivehi"}, + {"value": "el", "text": "Greek"}, + {"value": "en", "text": "English"}, + {"value": "eo", "text": "Esperanto"}, + {"value": "es", "text": "Spanish"}, + {"value": "et", "text": "Estonian"}, + {"value": "eu", "text": "Basque"}, + {"value": "fa", "text": "Persian"}, + {"value": "fi", "text": "Finnish"}, + {"value": "fil", "text": "Filipino"}, + {"value": "fr", "text": "French"}, + {"value": "fy", "text": "Frisian"}, + {"value": "ga", "text": "Irish"}, + {"value": "gd", "text": "Scottish Gaelic"}, + {"value": "gl", "text": "Galician"}, + {"value": "gu", "text": "Gujarati"}, + {"value": "ha", "text": "Hausa"}, + {"value": "haw", "text": "Hawaiian"}, + {"value": "he", "text": "Hebrew"}, + {"value": "hi", "text": "Hindi"}, + {"value": "hmn", "text": "Hmong"}, + {"value": "hr", "text": "Croatian"}, + {"value": "ht", "text": "Haitian Creole"}, + {"value": "hu", "text": "Hungarian"}, + {"value": "hy", "text": "Armenian"}, + {"value": "id", "text": "Indonesian"}, + {"value": "ig", "text": "Igbo"}, + {"value": "is", "text": "Icelandic"}, + {"value": "it", "text": "Italian"}, + {"value": "ja", "text": "Japanese"}, + {"value": "jv", "text": "Javanese"}, + {"value": "ka", "text": "Georgian"}, + {"value": "kk", "text": "Kazakh"}, + {"value": "km", "text": "Khmer"}, + {"value": "kn", "text": "Kannada"}, + {"value": "ko", "text": "Korean"}, + {"value": "ku", "text": "Kurdish"}, + {"value": "ky", "text": "Kyrgyz"}, + {"value": "la", "text": "Latin"}, + {"value": "lb", "text": "Luxembourgish"}, + {"value": "lo", "text": "Lao"}, + {"value": "lt", "text": "Lithuanian"}, + {"value": "lv", "text": "Latvian"}, + {"value": "mg", "text": "Malagasy"}, + {"value": "mi", "text": "Maori"}, + {"value": "mk", "text": "Macedonian"}, + {"value": "ml", "text": "Malayalam"}, + {"value": "mn", "text": "Mongolian"}, + {"value": "mr", "text": "Marathi"}, + {"value": "ms", "text": "Malay"}, + {"value": "mt", "text": "Maltese"}, + {"value": "my", "text": "Burmese"}, + {"value": "ne", "text": "Nepali"}, + {"value": "nl", "text": "Dutch"}, + {"value": "no", "text": "Norwegian"}, + {"value": "ny", "text": "Nyanja"}, + {"value": "or", "text": "Odia"}, + {"value": "pa", "text": "Punjabi"}, + {"value": "pl", "text": "Polish"}, + {"value": "ps", "text": "Pashto"}, + {"value": "pt", "text": "Portuguese"}, + {"value": "ro", "text": "Romanian"}, + {"value": "ru", "text": "Russian"}, + {"value": "rw", "text": "Kinyarwanda"}, + {"value": "sd", "text": "Sindhi"}, + {"value": "si", "text": "Sinhala"}, + {"value": "sk", "text": "Slovak"}, + {"value": "sl", "text": "Slovenian"}, + {"value": "sm", "text": "Samoan"}, + {"value": "sn", "text": "Shona"}, + {"value": "so", "text": "Somali"}, + {"value": "sq", "text": "Albanian"}, + {"value": "sr", "text": "Serbian"}, + {"value": "st", "text": "Sesotho"}, + {"value": "su", "text": "Sundanese"}, + {"value": "sv", "text": "Swedish"}, + {"value": "sw", "text": "Swahili"}, + {"value": "ta", "text": "Tamil"}, + {"value": "te", "text": "Telugu"}, + {"value": "tg", "text": "Tajik"}, + {"value": "th", "text": "Thai"}, + {"value": "ti", "text": "Tigrinya"}, + {"value": "tk", "text": "Turkmen"}, + {"value": "tl", "text": "Tagalog"}, + {"value": "tr", "text": "Turkish"}, + {"value": "tt", "text": "Tatar"}, + {"value": "ug", "text": "Uyghur"}, + {"value": "uk", "text": "Ukrainian"}, + {"value": "ur", "text": "Urdu"}, + {"value": "uz", "text": "Uzbek"}, + {"value": "vi", "text": "Vietnamese"}, + {"value": "xh", "text": "Xhosa"}, + {"value": "yi", "text": "Yiddish"}, + {"value": "yo", "text": "Yoruba"}, + {"value": "zh", "text": "Chinese"}, + {"value": "zu", "text": "Zulu"} + ] + return languages + +def load_checklang(): + env = ServerEnv() + env.get_languages = get_languages + env.language_checker = LanguageChecker() diff --git a/app/lid.176.ftz b/checklang/lid.176.ftz similarity index 100% rename from app/lid.176.ftz rename to checklang/lid.176.ftz diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..78c3862 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,29 @@ +[build-system] +requires = ["setuptools>=61", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "checklang" +version = "0.1.0" +description = "a language utils module" +authors = [{name = "Yu Moqing", email = "yumoqing@gmail.com"}] +license = {text = "MIT"} +dependencies = ["fasttext", "ahserver", "sqlor", "appPublic"] + +[tool.setuptools.packages.find] +where = ["."] +include = ["checklang*"] + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.package-data] +checklang = ["lid.176.ftz"] + +[options] +requires_python = ">=3.8" +install_requires = + fasttext + apppublic + sqlor + ahserver diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 579081d..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -fasttext -git+https://git.kaiyuancloud.cn/yumoqing/apppublic -git+https://git.kaiyuancloud.cn/yumoqing/sqlor -git+https://git.kaiyuancloud.cn/yumoqing/ahserver -