1.针对需要空行处理的文本。 2.读取txt文档,将处理后的内容写回原文件。
文本预处理3-空一行
import os
import re
def add_empty_line_before_numbers(text):
# 正则表达式匹配阿拉伯数字,但排除第一个数字
pattern = re.compile(r'(?<!^)(\d+)')
# 使用列表推导式,对每个匹配的数字前添加空行
result = re.sub(pattern, r'\n\1', text)
return result
def process_txt_files(folder_path):
for filename in os.listdir(folder_path):
if filename.endswith('.txt'):
file_path = os.path.join(folder_path, filename)
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
processed_content = add_empty_line_before_numbers(content)
# 将处理后的内容写回原文件
with open(file_path, 'w', encoding='utf-8') as file:
file.write(processed_content)
# print(f'Processed content written back to {filename}')
# 替换为你的文件夹路径
folder_path = "C:\\Users\\lenovo\\Desktop"
process_txt_files(folder_path)
标签:原文件,一行,re,import,文本,预处理 From: https://blog.csdn.net/weixin_53389235/article/details/139832882