import pandas as pd def extract_characters(file_path, sheet_name, column_name): # 读取Excel文件 df = pd.read_excel(file_path, sheet_name=sheet_name) # 创建两个新的列 df['中文'] = '' df['其他字符'] = '' # 遍历每行数据 for index, row in df.iterrows(): text = str(row[column_name]) # 获取指定列的值 chinese = '' other = '' # 遍历每个字符 for char in text: if '\u4e00' <= char <= '\u9fff': # 判断当前字符是否为中文字符 chinese += char # 如果是中文字符,则添加到中文字符集合中 else: other += char # 如果不是中文字符,则添加到其他字符集合中 df.at[index, '中文'] = chinese # 将中文字符集合添加到新的“中文”列中 df.at[index, '其他字符'] = other # 将其他字符集合添加到新的“其他字符”列中 # 返回处理后的DataFrame对象 return df # 测试示例 file_path = r'测试.xlsx' sheet_name = 'Sheet1' column_name = '店铺销售sku' result_df = extract_characters(file_path, sheet_name, column_name) result_df.to_excel('result.xlsx', index=False)
标签:字符,中文,sheet,name,分列,Python,text,df From: https://www.cnblogs.com/lcl-cn/p/17783106.html