import glob
import json
from pathlib import Path
import time

from components.doubao_process import ask_question
# from components.jsonl_repair_reindex import process_json

def get_spilit_file_list(file_path):
    # 递归匹配所有子目录中的 .txt 文件
    recursive_txt_files = glob.glob(file_path+'/*.jsonl', recursive=True)
    return recursive_txt_files


def process_jsonl_file(file_path,output_folder,logger):
    base_prompt = '该json中的answer_detail不够详细,请改写为详细内容,格式要求为分步骤推导，格式为："Step 1: ...\\nStep 2: ...\\nStep 3: ...;要求公式呈现 所有使用的公式需独立写出（如"F=ma"），不能隐含在文字中变量代入过程,展示具体数值代入公式的过程（如"9.8=5×a"）;latex公式使用双斜杠;返回具有id,answer_detail字段的新json,json不要换行,不需要其他回复'
    temp_cache_0 = list(read_jsonl(file_path))
    
    start_time = time.time()
    for item in temp_cache_0:
        quest_detail = process_question(item)
        full_question = base_prompt+quest_detail
        ai_response = ask_question(full_question)
        logger.info(f"问题: {quest_detail}")
        logger.info(f"回答: {ai_response}")
        logger.info("===================================")
        with open(file_path + '.replace', 'a', encoding='utf-8') as f:
            # json_str = process_json(ai_response)
            # json_line = json.dumps(ai_response, ensure_ascii=False)
            f.write(ai_response + '\n')
            
    end_time = time.time()   
    logger.info(f"处理时间: {end_time - start_time}秒") 
def read_jsonl(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            yield json.loads(line.strip())  # 每行解析为字典 [[3]]


# 标准化处理函数 [[8]][[9]]
def process_question(json_line):
    data = json_line
    # 提取关键字段（根据实际JSON结构调整）
    question = data.get('q_main', '')
    answer = data.get('answer_detail', '')
    
    # 文本压缩处理
    # question = re.sub(r'\s+', ' ', question).strip()
    # question = re.sub(r'(\d)\s+(\d)', r'\1\2', question)  # 合并断裂数字
    # question = re.sub(r'(?<![\d.])\d{4,}(?![\d.])', 
    #                  lambda m: f"{float(m.group()):.2e}", question)  # 科学计数法
    
    # 重组为最小化JSON结构
    return json.dumps({
        'id': data.get('id'),
        'question': question,
        'answer_detail': answer
    }, ensure_ascii=False)
    
# 写入 JSONL 文件
def write_jsonl(file_path, data_list):
    with open(file_path, 'w', encoding='utf-8') as f:
        for item in data_list:
            json_line = json.dumps(item, ensure_ascii=False)  # 转换为JSON字符串 [[3]]
            f.write(json_line + '\n')  # 每行写入一个JSON对象 [[2]