diff --git a/llmengine/base_m2m.py b/llmengine/base_m2m.py new file mode 100644 index 0000000..a1f5654 --- /dev/null +++ b/llmengine/base_m2m.py @@ -0,0 +1,26 @@ +# m2m/base_m2m.py +import torch +from typing import List, Dict + + +model_pathMap = {} + + +def llm_register(model_key, Klass): + model_pathMap[model_key] = Klass + + +def get_llm_class(model_path): + for k, klass in model_pathMap.items(): + if k in model_path: + return klass + print(f'{model_pathMap=}') + return None + + +class BaseM2M: + def __init__(self, model_id, **kw): + self.model_id = model_id + + def m2m(self, texts: str, src_lang: str, tgt_lang: str) -> str: + raise NotImplementedError \ No newline at end of file diff --git a/llmengine/fanyi_m2m.py b/llmengine/fanyi_m2m.py new file mode 100644 index 0000000..3873b09 --- /dev/null +++ b/llmengine/fanyi_m2m.py @@ -0,0 +1,36 @@ +# m2m/fanyi_m2m.py +import torch +from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer +from .base_m2m import BaseM2M, llm_register + + +class M2M100Translator(BaseM2M): + def __init__(self, model_id, max_length=200): + super().__init__(model_id) + self.max_length = max_length + self.tokenizer = M2M100Tokenizer.from_pretrained(model_id) + self.model = M2M100ForConditionalGeneration.from_pretrained( + model_id, torch_dtype=torch.bfloat16,device_map="cuda" + ) + self.model.eval() + self.model_name = model_id.split('/')[-1] + + def m2m(self, text: str, src_lang: str, tgt_lang: str) -> str: + """翻译一段话""" + self.tokenizer.src_lang = src_lang + encoded = self.tokenizer( + text, return_tensors="pt", truncation=True, max_length=self.max_length + ) + encoded = {k: v.to(self.model.device) for k, v in encoded.items()} + + with torch.no_grad(): + generated = self.model.generate( + **encoded, + forced_bos_token_id=self.tokenizer.get_lang_id(tgt_lang), + max_length=self.max_length + ) + + return self.tokenizer.decode(generated[0], skip_special_tokens=True) + +# 注册模型(路径中包含 m2m100 即可匹配) +llm_register("m2m100", M2M100Translator) \ No newline at end of file diff --git a/llmengine/m2m.py b/llmengine/m2m.py new file mode 100644 index 0000000..157105d --- /dev/null +++ b/llmengine/m2m.py @@ -0,0 +1,96 @@ +# m2m/m2m.py +import os +import argparse +from traceback import format_exc +from appPublic.registerfunction import RegisterFunction +from appPublic.worker import awaitify +from appPublic.log import debug, exception +from ahserver.serverenv import ServerEnv +from ahserver.webapp import webserver + +from .fanyi_m2m import * +from .base_m2m import get_llm_class + + +helptext = """M2M100 翻译 API: +POST /v1/m2m +Headers: + Content-Type: application/json + +Body: +{ + "model": "m2m100_1.2B", + "texts": "将图片中的车变为淡粉色。", + "src_lang": "zh", + "tgt_lang": "en" +} + +Response: +{ + "data": { + "text": "将图片中的车变为淡粉色。", + "translation": "The car in the picture turned pink." + }, + "object": "translation.result", + "model": "m2m100_1.2B" +} +""" + + +def init(): + rf = RegisterFunction() + rf.register('m2m', m2m) + rf.register('docs', docs) + + +async def docs(request, params_kw, *params, **kw): + return helptext + + +async def m2m(request, params_kw, *params, **kw): + debug(f'{params_kw=}') + se = ServerEnv() + engine = se.engine + + text = params_kw.text + src_lang = params_kw.src_lang or "zh" + tgt_lang = params_kw.tgt_lang or "en" + model = params_kw.model or "m2m100_1.2B" + + if not text or not isinstance(text, str): + raise Exception("`text` must be a non-empty string") + + f = awaitify(engine.m2m) + translation = await f(text, src_lang, tgt_lang) + + ret = { + "data": { + "text": text, + "translation": translation + }, + "object": "translation.result", + "model": engine.model_name + } + return ret + + +def main(): + parser = argparse.ArgumentParser(prog="M2M100 Translate API") + parser.add_argument('-w', '--workdir', default=os.getcwd()) + parser.add_argument('-p', '--port', type=int, default=8883) + parser.add_argument('model_path', help="Path to m2m100 model, e.g. /share/models/facebook/m2m100_1.2B") + args = parser.parse_args() + + Klass = get_llm_class(args.model_path) + if Klass is None: + raise Exception(f"Model {args.model_path} not registered. Should contain 'm2m100'") + + se = ServerEnv() + se.engine = Klass(args.model_path) + + debug(f"Starting M2M100 service on port {args.port}") + webserver(init, args.workdir, args.port) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/test/clip/clip.service b/test/clip/clip.service new file mode 100644 index 0000000..f09b3fd --- /dev/null +++ b/test/clip/clip.service @@ -0,0 +1,14 @@ +[Unit] +Wants=systemd-networkd.service + +[Service] +WorkingDirectory=/share/run/clip +ExecStart=/share/run/clip/start.sh +ExecStop=/share/run/clip/stop.sh +StandardOutput=append:/var/log/clip/clip.log +StandardError=append:/var/log/clip/clip.log +SyslogIdentifier=clip + +[Install] +WantedBy=multi-user.target + diff --git a/test/clip/conf/config.json b/test/clip/conf/config.json new file mode 100644 index 0000000..d836cd6 --- /dev/null +++ b/test/clip/conf/config.json @@ -0,0 +1,51 @@ +{ + "filesroot":"$[workdir]$/files", + "model_name": "/data/ymq/models/laion/CLIP-ViT-H-14-laion2B-s32B-b79K", + "logger":{ + "name":"llmengine", + "levelname":"info", + "logfile":"$[workdir]$/logs/llmengine.log" + }, + "website":{ + "paths":[ + ["$[workdir]$/wwwroot",""] + ], + "client_max_size":10000, + "host":"0.0.0.0", + "port":8882, + "coding":"utf-8", + "indexes":[ + "index.html", + "index.ui" + ], + "startswiths":[ + { + "leading":"/idfile", + "registerfunction":"idfile" + },{ + "leading": "/v1/embed", + "registerfunction": "embed" + },{ + "leading": "/docs", + "registerfunction": "docs" + } + ], + "processors":[ + [".tmpl","tmpl"], + [".app","app"], + [".ui","bui"], + [".dspy","dspy"], + [".md","md"] + ], + "rsakey_oops":{ + "privatekey":"$[workdir]$/conf/rsa_private_key.pem", + "publickey":"$[workdir]$/conf/rsa_public_key.pem" + }, + "session_max_time":3000, + "session_issue_time":2500, + "session_redis_notuse":{ + "url":"redis://127.0.0.1:6379" + } + } +} + diff --git a/test/clip/logs/llmengine.log b/test/clip/logs/llmengine.log new file mode 100644 index 0000000..e69de29 diff --git a/test/clip/start.sh b/test/clip/start.sh new file mode 100755 index 0000000..7fc2be6 --- /dev/null +++ b/test/clip/start.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +CUDA_VISIBLE_DEVICES=4 /share/vllm-0.8.5/bin/python -m llmengine.mm_embedding -p 8882 diff --git a/test/clip/stop.sh b/test/clip/stop.sh new file mode 100755 index 0000000..3fe46ce --- /dev/null +++ b/test/clip/stop.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +PORT=8882 +PID=$(lsof -t -i:$PORT) + +if [ -n "$PID" ]; then + echo "找到端口 $PORT 的进程: PID=$PID" + kill -9 $PID + echo "已终止端口 $PORT 的进程" +else + echo "未找到端口 $PORT 的进程" +fi \ No newline at end of file diff --git a/test/m2m/conf/config.json b/test/m2m/conf/config.json new file mode 100644 index 0000000..09a49ac --- /dev/null +++ b/test/m2m/conf/config.json @@ -0,0 +1,50 @@ +{ + "filesroot":"$[workdir]$/files", + "logger":{ + "name":"llmengine", + "levelname":"info", + "logfile":"$[workdir]$/logs/llmengine.log" + }, + "website":{ + "paths":[ + ["$[workdir]$/wwwroot",""] + ], + "client_max_size":10000, + "host":"0.0.0.0", + "port":8883, + "coding":"utf-8", + "indexes":[ + "index.html", + "index.ui" + ], + "startswiths":[ + { + "leading":"/idfile", + "registerfunction":"idfile" + },{ + "leading": "/v1/m2m", + "registerfunction": "m2m" + },{ + "leading": "/docs", + "registerfunction": "docs" + } + ], + "processors":[ + [".tmpl","tmpl"], + [".app","app"], + [".ui","bui"], + [".dspy","dspy"], + [".md","md"] + ], + "rsakey_oops":{ + "privatekey":"$[workdir]$/conf/rsa_private_key.pem", + "publickey":"$[workdir]$/conf/rsa_public_key.pem" + }, + "session_max_time":3000, + "session_issue_time":2500, + "session_redis_notuse":{ + "url":"redis://127.0.0.1:6379" + } + } +} + diff --git a/test/m2m/logs/llmengine.log b/test/m2m/logs/llmengine.log new file mode 100644 index 0000000..e69de29 diff --git a/test/m2m/m2m.service b/test/m2m/m2m.service new file mode 100644 index 0000000..b25beb8 --- /dev/null +++ b/test/m2m/m2m.service @@ -0,0 +1,14 @@ +[Unit] +Wants=systemd-networkd.service + +[Service] +WorkingDirectory=/share/run/m2m +ExecStart=/share/run/m2m/start.sh +ExecStop=/share/run/m2m/stop.sh +StandardOutput=append:/var/log/m2m/m2m.log +StandardError=append:/var/log/m2m/m2m.log +SyslogIdentifier=m2m + +[Install] +WantedBy=multi-user.target + diff --git a/test/m2m/start.sh b/test/m2m/start.sh new file mode 100755 index 0000000..d0148e4 --- /dev/null +++ b/test/m2m/start.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +CUDA_VISIBLE_DEVICES=7 /share/vllm-0.8.5/bin/python -m llmengine.m2m -p 8883 /share/models/facebook/m2m100_1.2B diff --git a/test/m2m/stop.sh b/test/m2m/stop.sh new file mode 100755 index 0000000..24e0565 --- /dev/null +++ b/test/m2m/stop.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +PORT=8883 +PID=$(lsof -t -i:$PORT) + +if [ -n "$PID" ]; then + echo "找到端口 $PORT 的进程: PID=$PID" + kill -9 $PID + echo "已终止端口 $PORT 的进程" +else + echo "未找到端口 $PORT 的进程" +fi \ No newline at end of file