This commit is contained in:
yumoqing 2025-11-03 13:53:53 +08:00
parent a1d9b7e524
commit 431ac76a31
8 changed files with 202 additions and 17 deletions

2
MANIFEST.in Normal file
View File

@ -0,0 +1,2 @@
include checklang/lid.176.ftz

View File

@ -1,2 +1,25 @@
# checklang
此模块提供自动判断给定文本的语言
## Usage
```
from checklang.init import LanguageChecker
lc = LanguageChecker()
lang = lc.checklang(text)
print lang
# {
# 'lang':'en'
# }
```
## Dependents
```
pip install fasttext
```

View File

@ -3,7 +3,7 @@ from ahserver.webapp import webapp
from ahserver.serverenv import ServerEnv
from appPublic.registerfunction import RegisterFunction
import fasttext
from checklang.init import load_checklang
def docs(request, *args, **kw):
return """Check langage for text
@ -22,7 +22,8 @@ response:
}
"""
def checklang(request, params_kw, *args, **kw):
engine = kw.get('engine')
env = request._run_ns
engine = env.language_checker
text = params_kw.text
pred = engine.predict(text)
d = pred[0][0][9:]
@ -31,18 +32,11 @@ def checklang(request, params_kw, *args, **kw):
}
def init():
p = os.path.join(os.path.dirname(__file__), 'lid.176.ftz')
print(f'model path={p}')
model = fasttext.load_model(p)
# 需先下载模型https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz
env = ServerEnv()
env.engine = model
rf = RegisterFunction()
rf.register('checklang', checklang)
if __name__ == '__main__':
load_checklang()
rf = RegisterFunction()
rf.register('checklang', checklang)
rf.register('docs', docs)
if __name__ == '__main__':
webapp(init)

0
checklang/__init__.py Normal file
View File

142
checklang/init.py Normal file
View File

@ -0,0 +1,142 @@
import os
from ahserver.serverenv import ServerEnv
import fasttext
# 需先下载模型https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz
class LanguageChecker:
def __init__(self):
p = os.path.join(os.path.dirname(__file__), 'lid.176.ftz')
print(f'model path={p}')
self.model = fasttext.load_model(p)
def checklang(self, text):
pred = self.model.predict(text)
d = pred[0][0][9:]
return {
'lang':d
}
def get_languages():
"""
获取地球上主要人类语言列表
返回值: [{'value': 'en', 'text': 'English'}, ...]
"""
languages = [
{"value": "af", "text": "Afrikaans"},
{"value": "am", "text": "Amharic"},
{"value": "ar", "text": "Arabic"},
{"value": "az", "text": "Azerbaijani"},
{"value": "be", "text": "Belarusian"},
{"value": "bg", "text": "Bulgarian"},
{"value": "bn", "text": "Bengali"},
{"value": "bo", "text": "Tibetan"},
{"value": "bs", "text": "Bosnian"},
{"value": "ca", "text": "Catalan"},
{"value": "ceb", "text": "Cebuano"},
{"value": "cs", "text": "Czech"},
{"value": "cy", "text": "Welsh"},
{"value": "da", "text": "Danish"},
{"value": "de", "text": "German"},
{"value": "dv", "text": "Dhivehi"},
{"value": "el", "text": "Greek"},
{"value": "en", "text": "English"},
{"value": "eo", "text": "Esperanto"},
{"value": "es", "text": "Spanish"},
{"value": "et", "text": "Estonian"},
{"value": "eu", "text": "Basque"},
{"value": "fa", "text": "Persian"},
{"value": "fi", "text": "Finnish"},
{"value": "fil", "text": "Filipino"},
{"value": "fr", "text": "French"},
{"value": "fy", "text": "Frisian"},
{"value": "ga", "text": "Irish"},
{"value": "gd", "text": "Scottish Gaelic"},
{"value": "gl", "text": "Galician"},
{"value": "gu", "text": "Gujarati"},
{"value": "ha", "text": "Hausa"},
{"value": "haw", "text": "Hawaiian"},
{"value": "he", "text": "Hebrew"},
{"value": "hi", "text": "Hindi"},
{"value": "hmn", "text": "Hmong"},
{"value": "hr", "text": "Croatian"},
{"value": "ht", "text": "Haitian Creole"},
{"value": "hu", "text": "Hungarian"},
{"value": "hy", "text": "Armenian"},
{"value": "id", "text": "Indonesian"},
{"value": "ig", "text": "Igbo"},
{"value": "is", "text": "Icelandic"},
{"value": "it", "text": "Italian"},
{"value": "ja", "text": "Japanese"},
{"value": "jv", "text": "Javanese"},
{"value": "ka", "text": "Georgian"},
{"value": "kk", "text": "Kazakh"},
{"value": "km", "text": "Khmer"},
{"value": "kn", "text": "Kannada"},
{"value": "ko", "text": "Korean"},
{"value": "ku", "text": "Kurdish"},
{"value": "ky", "text": "Kyrgyz"},
{"value": "la", "text": "Latin"},
{"value": "lb", "text": "Luxembourgish"},
{"value": "lo", "text": "Lao"},
{"value": "lt", "text": "Lithuanian"},
{"value": "lv", "text": "Latvian"},
{"value": "mg", "text": "Malagasy"},
{"value": "mi", "text": "Maori"},
{"value": "mk", "text": "Macedonian"},
{"value": "ml", "text": "Malayalam"},
{"value": "mn", "text": "Mongolian"},
{"value": "mr", "text": "Marathi"},
{"value": "ms", "text": "Malay"},
{"value": "mt", "text": "Maltese"},
{"value": "my", "text": "Burmese"},
{"value": "ne", "text": "Nepali"},
{"value": "nl", "text": "Dutch"},
{"value": "no", "text": "Norwegian"},
{"value": "ny", "text": "Nyanja"},
{"value": "or", "text": "Odia"},
{"value": "pa", "text": "Punjabi"},
{"value": "pl", "text": "Polish"},
{"value": "ps", "text": "Pashto"},
{"value": "pt", "text": "Portuguese"},
{"value": "ro", "text": "Romanian"},
{"value": "ru", "text": "Russian"},
{"value": "rw", "text": "Kinyarwanda"},
{"value": "sd", "text": "Sindhi"},
{"value": "si", "text": "Sinhala"},
{"value": "sk", "text": "Slovak"},
{"value": "sl", "text": "Slovenian"},
{"value": "sm", "text": "Samoan"},
{"value": "sn", "text": "Shona"},
{"value": "so", "text": "Somali"},
{"value": "sq", "text": "Albanian"},
{"value": "sr", "text": "Serbian"},
{"value": "st", "text": "Sesotho"},
{"value": "su", "text": "Sundanese"},
{"value": "sv", "text": "Swedish"},
{"value": "sw", "text": "Swahili"},
{"value": "ta", "text": "Tamil"},
{"value": "te", "text": "Telugu"},
{"value": "tg", "text": "Tajik"},
{"value": "th", "text": "Thai"},
{"value": "ti", "text": "Tigrinya"},
{"value": "tk", "text": "Turkmen"},
{"value": "tl", "text": "Tagalog"},
{"value": "tr", "text": "Turkish"},
{"value": "tt", "text": "Tatar"},
{"value": "ug", "text": "Uyghur"},
{"value": "uk", "text": "Ukrainian"},
{"value": "ur", "text": "Urdu"},
{"value": "uz", "text": "Uzbek"},
{"value": "vi", "text": "Vietnamese"},
{"value": "xh", "text": "Xhosa"},
{"value": "yi", "text": "Yiddish"},
{"value": "yo", "text": "Yoruba"},
{"value": "zh", "text": "Chinese"},
{"value": "zu", "text": "Zulu"}
]
return languages
def load_checklang():
env = ServerEnv()
env.get_languages = get_languages
env.language_checker = LanguageChecker()

29
pyproject.toml Normal file
View File

@ -0,0 +1,29 @@
[build-system]
requires = ["setuptools>=61", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "checklang"
version = "0.1.0"
description = "a language utils module"
authors = [{name = "Yu Moqing", email = "yumoqing@gmail.com"}]
license = {text = "MIT"}
dependencies = ["fasttext", "ahserver", "sqlor", "appPublic"]
[tool.setuptools.packages.find]
where = ["."]
include = ["checklang*"]
[tool.setuptools]
include-package-data = true
[tool.setuptools.package-data]
checklang = ["lid.176.ftz"]
[options]
requires_python = ">=3.8"
install_requires =
fasttext
apppublic
sqlor
ahserver

View File

@ -1,5 +0,0 @@
fasttext
git+https://git.kaiyuancloud.cn/yumoqing/apppublic
git+https://git.kaiyuancloud.cn/yumoqing/sqlor
git+https://git.kaiyuancloud.cn/yumoqing/ahserver