You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
from openai import OpenAI
import re
import json
from tqdm import tqdm
class ChatGLM4(object):
def __init__(self, api_key="EMPTY", base_url="http://x.x.x.x:8000/v1/",model_name="glm-4"):
self.base_url = base_url
self.model_name = model_name
# 初始化 OpenAI 客户端,并增加错误处理
try:
self._client = OpenAI(api_key="glm-4", base_url=self.base_url)
except Exception as e:
raise ValueError(f"Failed to initialize OpenAI client: {e}")
def call(self, prompt, chat_history = []):
# 将 chat_history 与当前 prompt 组合成请求
messages = [{"role": "system", "content": "you are a helpful assistant!"}]
# 添加之前的对话历史
for user_content, assistant_content in chat_history:
messages.append({"role": "user", "content": user_content})
messages.append({"role": "system", "content": assistant_content})
# 当前用户的输入
messages.append({"role": "user", "content": prompt})
# logger_file.info(f"messages: {messages}")
# 发送请求
try:
response = self._client.chat.completions.create(
model="glm-4", # 或其他模型
messages=messages,
# max_tokens=, # 可以调整返回的 token 数量
n=1, # 返回一个结果
stop=None, # 可设置 stop 标记
temperature=0, # 控制输出的创造性
top_p = 0.1
)
return response
except Exception as e:
log.error(f"error:,Failed to get a response from OpenAI: {e}")
raise ValueError(f"Failed to get a response from OpenAI: {e}")
def chat(self,prompt,chat_history=[]):
response = self.call(prompt,chat_history)
response = response.choices[0].message.content
return response
llm = ChatGLM4()
with open("webq_test_gpt_output.json",'r') as f:
datas = json.load(f)
prompt = """
Refer to the passage below and answer the following question with just one entity. \n\n Passage: {background} \n\n Question: {query} \n\n The answer is
"""
results = []
for data in tqdm(datas):
question = data['question']
ctxs = data['ctxs']
answer = data['answers']
string = ""
for idx,ctx in enumerate(ctxs):
string += f"\n {idx}: {ctx['text']}"
final_prompt = prompt.format(background=string,query=question)
output= llm.chat(final_prompt)
results.append({"question":question,"answer":answer,"output":output})
with open('results2.json', 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=4) # 设置 ensure_ascii=False 以支持中文
得到结果如下
将输出转化为列表,
import json
results = []
with open("results2.json",'r') as f:
datas = json.load(f)
for data in datas:
question = data['question']
answer = data['answer']
output = [ ans.strip() for ans in data['output'][0].split(',')]
results.append({"question":question,"answer":answer,"output":output})
with open('results_output.json', 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=4) # 设置 ensure_ascii=False 以支持中文
得到结果如下:
最后调用eval进行评估
import regex
import json
import string
import unicodedata
from typing import List
import numpy as np
from collections import Counter
from rouge import Rouge
class SimpleTokenizer(object):
ALPHA_NUM = r'[\p{L}\p{N}\p{M}]+'
NON_WS = r'[^\p{Z}\p{C}]'
def __init__(self):
"""
Args:
annotators: None or empty set (only tokenizes).
"""
self._regexp = regex.compile(
'(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
)
def tokenize(self, text, uncased=False):
matches = [m for m in self._regexp.finditer(text)]
if uncased:
tokens = [m.group().lower() for m in matches]
else:
tokens = [m.group() for m in matches]
return tokens
def check_answer(example, tokenizer) -> List[bool]:
"""Search through all the top docs to see if they have any of the answers."""
answers = example['answers']
ctxs = example['ctxs']
hits = []
for _, doc in enumerate(ctxs):
text = doc['text']
if text is None: # cannot find the document for some reason
hits.append(False)
continue
hits.append(has_answer(answers, text, tokenizer))
return hits
def has_answer(answers, text, tokenizer=SimpleTokenizer()) -> bool:
"""Check if a document contains an answer string."""
text = _normalize(text)
text = tokenizer.tokenize(text, uncased=True)
for answer in answers:
answer = _normalize(answer)
answer = tokenizer.tokenize(answer, uncased=True)
for i in range(0, len(text) - len(answer) + 1):
if answer == text[i: i + len(answer)]:
return True
return False
def white_space_fix(text):
return ' '.join(text.split())
def remove_punc(text):
exclude = set(string.punctuation)
return ''.join(ch for ch in text if ch not in exclude)
def lower(text):
return text.lower()
return white_space_fix(remove_articles(remove_punc(lower(s))))
1.代码如下。
测试generate_then_read代码在glm9b下的性
from openai import OpenAI
import re
import json
from tqdm import tqdm
class ChatGLM4(object):
llm = ChatGLM4()
with open("webq_test_gpt_output.json",'r') as f:
datas = json.load(f)
prompt = """
Refer to the passage below and answer the following question with just one entity. \n\n Passage: {background} \n\n Question: {query} \n\n The answer is
"""
results = []
for data in tqdm(datas):
question = data['question']
ctxs = data['ctxs']
answer = data['answers']
string = ""
for idx,ctx in enumerate(ctxs):
string += f"\n {idx}: {ctx['text']}"
final_prompt = prompt.format(background=string,query=question)
output= llm.chat(final_prompt)
results.append({"question":question,"answer":answer,"output":output})
with open('results2.json', 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=4) # 设置 ensure_ascii=False 以支持中文
得到结果如下
将输出转化为列表,
import json
results = []
with open("results2.json",'r') as f:
datas = json.load(f)
for data in datas:
question = data['question']
answer = data['answer']
output = [ ans.strip() for ans in data['output'][0].split(',')]
results.append({"question":question,"answer":answer,"output":output})
with open('results_output.json', 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=4) # 设置 ensure_ascii=False 以支持中文
得到结果如下:
最后调用eval进行评估
import regex
import json
import string
import unicodedata
from typing import List
import numpy as np
from collections import Counter
from rouge import Rouge
class SimpleTokenizer(object):
ALPHA_NUM = r'[\p{L}\p{N}\p{M}]+'
NON_WS = r'[^\p{Z}\p{C}]'
def check_answer(example, tokenizer) -> List[bool]:
"""Search through all the top docs to see if they have any of the answers."""
answers = example['answers']
ctxs = example['ctxs']
def has_answer(answers, text, tokenizer=SimpleTokenizer()) -> bool:
"""Check if a document contains an answer string."""
text = _normalize(text)
text = tokenizer.tokenize(text, uncased=True)
def _normalize(text):
return unicodedata.normalize('NFD', text)
def normalize_answer(s):
def remove_articles(text):
return regex.sub(r'\b(a|an|the)\b', ' ', text)
def exact_match_score(prediction, ground_truth):
return normalize_answer(prediction) == normalize_answer(ground_truth)
def ems(prediction, ground_truths):
return max([exact_match_score(prediction, gt) for gt in ground_truths])
def f1_score(prediction, ground_truth):
prediction_tokens = normalize_answer(prediction).split()
ground_truth_tokens = normalize_answer(ground_truth).split()
common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
num_same = sum(common.values())
if num_same == 0:
return 0
precision = 1.0 * num_same / len(prediction_tokens)
recall = 1.0 * num_same / len(ground_truth_tokens)
f1 = (2 * precision * recall) / (precision + recall)
return f1
def f1(prediction, ground_truths):
return max([f1_score(prediction, gt) for gt in ground_truths])
def rougel_score(prediction, ground_truth):
rouge = Rouge()
# no normalization
try:
scores = rouge.get_scores(prediction, ground_truth, avg=True)
except ValueError: # "Hypothesis is empty."
return 0.0
return scores["rouge-l"]["f"]
def rl(prediction, ground_truths):
return max([rougel_score(prediction, gt) for gt in ground_truths])
file-level evaluation ...
def eval_recall(infile):
def eval_question_answering(infile):
def eval_fact_checking(infile):
def eval_dialogue_system(infile):
emscore, length = eval_question_answering("results_output.json")
print(emscore)
print(length) 最后的结果只有
0.3179
2.6216
不清楚是因为大模型的问题吗。
webq_test_gpt_output.json来自于https://drive.google.com/drive/folders/1DNjTTOLKi24wohJKu1Z-v6b4izfymlLu 您这边提供的文件。希望可以得到您的回复
The text was updated successfully, but these errors were encountered: