QUE_REPLACE/components/ai_check.py


								import glob

								import json

								from pathlib import Path

								import time


								from components.doubao_process import ask_question

								# from components.jsonl_repair_reindex import process_json


								def get_spilit_file_list(file_path):

								    # 递归匹配所有子目录中的 .txt 文件

								    recursive_txt_files = glob.glob(file_path+'/*.jsonl', recursive=True)

								    return recursive_txt_files


								def process_jsonl_file(file_path,output_folder,logger):

								    base_prompt = '该json中的answer_detail不够详细,请改写为详细内容,格式要求为分步骤推导，格式为："Step 1: ...\\nStep 2: ...\\nStep 3: ...;要求公式呈现 所有使用的公式需独立写出（如"F=ma"），不能隐含在文字中变量代入过程,展示具体数值代入公式的过程（如"9.8=5×a"）;latex公式使用双斜杠;返回具有id,answer_detail字段的新json,json不要换行,不需要其他回复'

								    temp_cache_0 = list(read_jsonl(file_path))


								    start_time = time.time()

								    for item in temp_cache_0:

								        quest_detail = process_question(item)

								        full_question = base_prompt+quest_detail

								        ai_response = ask_question(full_question)

								        logger.info(f"问题: {quest_detail}")

								        logger.info(f"回答: {ai_response}")

								        logger.info("===================================")

								        with open(file_path + '.replace', 'a', encoding='utf-8') as f:

								            # json_str = process_json(ai_response)

								            # json_line = json.dumps(ai_response, ensure_ascii=False)

								            f.write(ai_response + '\n')


								    end_time = time.time()

								    logger.info(f"处理时间: {end_time - start_time}秒")

								def read_jsonl(file_path):

								    with open(file_path, 'r', encoding='utf-8') as f:

								        for line in f:

								            yield json.loads(line.strip())  # 每行解析为字典 [[3]]


								# 标准化处理函数 [[8]][[9]]

								def process_question(json_line):

								    data = json_line

								    # 提取关键字段（根据实际JSON结构调整）

								    question = data.get('q_main', '')

								    answer = data.get('answer_detail', '')


								    # 文本压缩处理

								    # question = re.sub(r'\s+', ' ', question).strip()

								    # question = re.sub(r'(\d)\s+(\d)', r'\1\2', question)  # 合并断裂数字

								    # question = re.sub(r'(?<![\d.])\d{4,}(?![\d.])',

								    #                  lambda m: f"{float(m.group()):.2e}", question)  # 科学计数法


								    # 重组为最小化JSON结构

								    return json.dumps({

								        'id': data.get('id'),

								        'question': question,

								        'answer_detail': answer

								    }, ensure_ascii=False)


								# 写入 JSONL 文件

								def write_jsonl(file_path, data_list):

								    with open(file_path, 'w', encoding='utf-8') as f:

								        for item in data_list:

								            json_line = json.dumps(item, ensure_ascii=False)  # 转换为JSON字符串 [[3]]

								            f.write(json_line + '\n')  # 每行写入一个JSON对象 [[2]