modescope环境运行gte-qwen2-1.5b文本转向量模型
代码示例:
#pip install gevent
#pip install flask
# 导入Flask类库
import os.path
import threading
import uuid
import requests
from flask import Flask, request
from gevent import pywsgi
import os
os.environ['MODELSCOPE_CACHE'] = './model_data'
# 向量模型
from sentence_transformers import SentenceTransformer
from modelscope import snapshot_download
# 自动下载模型
model_dir = snapshot_download("iic/gte_Qwen2-1.5B-instruct")
model = SentenceTransformer(model_dir, trust_remote_code=True)
# In case you want to reduce the maximum length:
# model.max_seq_length = 8192
model.max_seq_length = 2048
# 创建应用实例
app = Flask(__name__)
# 视图函数(路由)
@app.route("/")
def hello_world():
return "Hello, World!"
@app.route("/text2vec", methods=["POST"])
def text2vec():
# 在调用thread之前打印当前线程信息
print("当前线程", threading.enumerate())
text = request.form.get("text")
query = request.form.get("query")
result ={"status": 0}
if query=='1':
print("query vec")
result["vec"] = model.encode([text], prompt_name="query")[0].tolist()
else:
result["vec"] = model.encode([text])[0].tolist()
return result
@app.route("/texts2vecs", methods=["POST"])
def texts2vecs():
# 在调用thread之前打印当前线程信息
print("当前线程", threading.enumerate())
# 从请求中获取 JSON 数据
request_data = request.json
# 假设请求数据是一个包含数字的列表
data_list = request_data.get('texts', [])
result ={"status": 0}
result["vec"] = model.encode(data_list).tolist()
return result
# 启动服务
if __name__ == '__main__':
# threaded=False 使用单线程模式,处理完一个请求再处理下一个请求
# app.run(host='0.0.0.0', port=9012, threaded=False, debug=False)
app.config['JSON_AS_ASCII'] = False
app.config['threaded'] = False
app.config['debug'] = False
port = 9522
# app.run(host='0.0.0.0', port=9012, threaded=True, debug=False)
print("端口:", port)
server = pywsgi.WSGIServer(('0.0.0.0', port), app)
server.serve_forever()