V1.0_(批量处理有待完善,目前只能一个一个来)
代码如下:
import re
import openpyxl
def extract_and_save_dialogues_with_headers_to_excel(input_file, output_file):
# 创建一个新的Excel工作簿
workbook = openpyxl.Workbook()
sheet = workbook.active
# 添加行标题
sheet.append(["说话人", "数字", "时间", "文本"])
# 使用正则表达式来匹配说话人、数字和时间的模式
pattern = r'说话人 (\d+) (\d+:\d+)'
# 初始化变量以跟踪当前说话人、数字和对话文本
current_speaker = None
current_number = None
current_time = None
dialogue = []
# 打开并读取文本文件
with open(input_file, 'r', encoding='utf-8') as file:
for line in file:
match = re.match(pattern, line)
if match:
# 如果找到新的说话人,则保存之前的对话文本和相关信息并开始新的对话
if current_speaker:
sheet.append([current_speaker, current_number, current_time, '\n'.join(dialogue)])
dialogue = []
current_speaker = match.group(1)
current_number = match.group(1)
current_time = match.group(2)
else:
# 如果不是匹配到的行,则将文本行添加到当前对话中
if current_speaker:
dialogue.append(line.strip())
# 处理最后一个对话
if current_speaker:
sheet.append([current_speaker, current_number, current_time, '\n'.join(dialogue)])
# 保存Excel文件
workbook.save(output_file)
print(f"提取并保存对话、说话人、数字、时间和文本到 {output_file} 完成")
# 使用示例
extract_and_save_dialogues_with_headers_to_excel("xu.txt", "2.xlsx")