You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
64 lines
2.2 KiB
64 lines
2.2 KiB
import json
|
|
import sys
|
|
|
|
def load_b_file(filepath):
|
|
"""读取文件 B,并返回以 id 为 key 的字典"""
|
|
b_data = {}
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
data = json.loads(line)
|
|
# 只保留需要更新的字段
|
|
if 'id' in data and 'answer_detail' in data:
|
|
b_data[data['id']] = data['answer_detail']
|
|
else:
|
|
print(f"警告:文件 B 中缺少必要字段: {line}")
|
|
except json.JSONDecodeError as e:
|
|
print(f"解析失败(文件 B):{e} -> {line[:50]}...")
|
|
return b_data
|
|
|
|
def update_a_file(a_filepath, b_dict, output_filepath):
|
|
"""读取文件 A,根据 b_dict 替换 answer_detail,并写入输出文件"""
|
|
with open(a_filepath, 'r', encoding='utf-8') as fin, \
|
|
open(output_filepath, 'w', encoding='utf-8') as fout:
|
|
|
|
for line_num, line in enumerate(fin, start=1):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
data = json.loads(line)
|
|
record_id = data.get('id')
|
|
|
|
# 如果在 B 中有对应 id,则替换 answer_detail
|
|
if record_id in b_dict:
|
|
data['answer_detail'] = b_dict[record_id]
|
|
|
|
# 写回文件
|
|
fout.write(json.dumps(data, ensure_ascii=False) + '\n')
|
|
|
|
except json.JSONDecodeError as e:
|
|
print(f"第 {line_num} 行解析失败:{e} -> {line[:50]}...")
|
|
|
|
def main():
|
|
if len(sys.argv) != 4:
|
|
print("用法: python replace_answer_detail.py <文件A路径> <文件B路径> <输出文件路径>")
|
|
sys.exit(1)
|
|
|
|
a_file = sys.argv[1]
|
|
b_file = sys.argv[2]
|
|
output_file = sys.argv[3]
|
|
|
|
print("正在加载文件 B ...")
|
|
b_dict = load_b_file(b_file)
|
|
|
|
print(f"共加载 {len(b_dict)} 条记录。开始处理文件 A ...")
|
|
update_a_file(a_file, b_dict, output_file)
|
|
|
|
print("处理完成!结果已保存到:", output_file)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|