import time
import requests
import json
from services.ChatService import ChatService
from deepeval.models import DeepEvalBaseLLM
from deepeval.test_case import LLMTestCase
from deepeval.metrics import (FaithfulnessMetric,
ContextualPrecisionMetric,
ContextualRecallMetric,
ContextualRelevancyMetric)
class QwenModel(DeepEvalBaseLLM):
def __init__(self):
self.api_key = "fastgpt-*******"
self.base_url = "https://jz-fastgpt-stable.djtest.cn/api/v1"
self.model_name = "qwen-max"
def load_model(self):
return self
def generate(self, prompt: str) -> str:
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
payload = {
"model": self.model_name,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0
}
response = requests.post(
f"{self.base_url}/chat/completions",
headers=headers,
data=json.dumps(payload)
)
if response.status_code == 200:
return response.json()["choices"][0]["message"]["content"]
else:
raise RuntimeError(f"API 调用失败: {response.status_code}, {response.text}")
async def a_generate(self, prompt: str) -> str:
return self.generate(prompt)
def get_model_name(self):
return self.model_name
class EvalService:
def get_faithfulness(self,ques:str, response):
qwen_model = QwenModel()
faithfulness_metric = FaithfulnessMetric(model=qwen_model)
test_case = self.get_test_case(ques,response)
faithfulness_metric.measure(test_case)
faithfulness = dict()
faithfulness["score"] = faithfulness_metric.score
faithfulness["reason"] = faithfulness_metric.reason
print(f"faithfulness:{faithfulness}")
return faithfulness
def get_contextprecision(self, ques: str, response):
qwen_model = QwenModel()
contextprecision_metric = ContextualPrecisionMetric(model=qwen_model)
test_case = self.get_test_case(ques,response)
contextprecision_metric.measure(test_case)
contextprecision = dict()
contextprecision["score"] = contextprecision_metric.score
contextprecision["reason"] = contextprecision_metric.reason
print(f"contextprecision:{contextprecision}")
return contextprecision
def get_contextrecall(self, ques: str, response):
qwen_model = QwenModel()
contextrecall_metric = ContextualRecallMetric(model=qwen_model)
test_case = self.get_test_case(ques,response)
contextrecall_metric.measure(test_case)
contextrecall = dict()
contextrecall["score"] = contextrecall_metric.score
contextrecall["reason"] = contextrecall_metric.reason
print(f"contextrecall:{contextrecall}")
return contextrecall
def get_contextrelevant(self, ques: str, response):
qwen_model = QwenModel()
contextrelevant_metric = ContextualRelevancyMetric(model=qwen_model)
test_case = self.get_test_case(ques,response)
contextrelevant_metric.measure(test_case)
contextrelevant = dict()
contextrelevant["score"] = contextrelevant_metric.score
contextrelevant["reason"] = contextrelevant_metric.reason
print(f"contextrelevant:{contextrelevant}")
return contextrelevant
def get_test_case(self, ques: str, result):
quote_list = result["responseData"][1]["quoteList"]
retrival_context = []
for quote in quote_list:
retrival_context.append(f"{quote['q']}:{quote['a']}")
context = []
historypreview = result["responseData"][2]["historyPreview"]
for history in historypreview:
context.append(history['value'])
answer = result["choices"][0]["message"]["content"]
res_case = LLMTestCase(
input=ques,
actual_output=answer,
expected_output=answer,
context=context,
retrieval_context=retrival_context
)
return res_case
if __name__ == "__main__":
url='https://XXXXXX/api/v1/chat/completions'
key='fastgpt-XXXXXX'
cr=ChatService(url,key)
#调用ai应用,得到result
result=cr.question_response("XXX怎么收费?")
es = EvalService()
es.get_faithfulness("XXX怎么收费?", result)
es.get_contextprecision("XXX怎么收费?", result)
es.get_contextrecall("XXX怎么收费?", result)
es.get_contextrelevant("XXX怎么收费?", result)
faithfulness:{'score': 1.0, 'reason': '实际输出与检索上下文完全一致,没有任何矛盾之处,所以得到了满分1.00的忠实度评分。'}
contextprecision:{'score': 1.0, 'reason': '得分为1.00,因为相关的节点(即第一个节点)被正确地排在了最前面。'}
contextrecall:{'score': 0.5, 'reason': '分数为0.50,因为虽然节点在检索上下文中提到了'}
contextrelevant:{'score': 0.16666666666666666, 'reason': "分数为0.17,因为大部分检索内容并未涉及XXX问题,例如……"}