1.针对去除文本中的阿拉伯数字。
2.读取txt文档,将处理后的内容写回原文件。
#文本预处理4-去除阿拉伯数字
import os
import re
def remove_numbers(text):
# 正则表达式匹配阿拉伯数字
pattern = re.compile(r'\d+')
# 使用re.sub()函数将匹配到的数字替换为空字符串
result = re.sub(pattern, '', text)
return result
def process_txt_files(folder_path):
for filename in os.listdir(folder_path):
if filename.endswith('.txt'):
file_path = os.path.join(folder_path, filename)
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
processed_content = remove_numbers(content)
# 将处理后的内容写回原文件
with open(file_path, 'w', encoding='utf-8') as file:
file.write(processed_content)
# print(f'Processed content written back to {filename}')
# 替换为你的文件夹路径
folder_path = "C:\\Users\\lenovo\\Desktop"
process_txt_files(folder_path)
标签:txt,阿拉伯数字,filename,content,file,path,folder,文本,预处理 From: https://blog.csdn.net/weixin_53389235/article/details/139832946