''' 设置表格所有单元格的四个边为0.5磅,黑色,实线 可以使用返回值,也可以不使用 ''' def 设置表格网格线为黑色实线(table_object:object): kwargs = { "top":{"sz": 4, "val": "single", "color": "#000000"}, "bottom":{"sz": 4, "val": "single", "color": "#000000"}, "left":{"sz": 4, "val": "single", "color": "#000000"}, "right":{"sz": 4, "val": "single", "color": "#000000"}, "insideV":{"sz": 4, "val": "single", "color": "#000000"}, "insideH":{"sz": 4, "val": "single", "color": "#000000"} } borders = docx.oxml.OxmlElement('w:tblBorders') for tag in ('bottom', 'top', 'left', 'right', 'insideV', 'insideH'): edge_data = kwargs.get(tag) if edge_data: any_border = docx.oxml.OxmlElement(f'w:{tag}') for key in ["sz", "val", "color", "space", "shadow"]: if key in edge_data: any_border.set(docx.oxml.ns.qn(f'w:{key}'), str(edge_data[key])) borders.append(any_border) table_object._tbl.tblPr.append(borders) return table_object ''' 设置标题样式 ''' def 设置标题样式为黑色宋体(heading_object:object): heading_object.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.LEFT #左对齐 for run in heading_object.runs: run.font.name=u'宋体' #设置为宋体 #run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')#设置为宋体,和上边的一起使用 run.font.color.rgb = docx.shared.RGBColor(0,0,0)#设置颜色为黑色 return heading_object ''' 创建docx文档,将翻译结果和原文写入文档中 ''' def word(): my_word_doc = docx.Document() #打开一个空白文档 # for style in my_word_doc.styles: # print(style) heading = my_word_doc.add_heading(翻译结果["NAME"],level=2) #指定样式标题2 设置标题样式为黑色宋体(heading) heading = my_word_doc.add_heading("描述",level=3) #指定样式标题3 设置标题样式为黑色宋体(heading) for line in 翻译结果["SYNOPSIS"].split("\n"): my_word_doc.add_paragraph(line) for line in 翻译结果["DESCRIPTION"].split("\n"): my_word_doc.add_paragraph(line) heading = my_word_doc.add_heading("参数",level=3) #指定样式标题3 设置标题样式为黑色宋体(heading) #table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=3) #指定样式标题3;在末尾添加一个表 table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=2) #指定样式标题3;在末尾添加一个表 #table.style = my_word_doc.styles['Medium Grid 1'] 设置表格网格线为黑色实线(table) index=0 for key,value in 翻译结果["PARAMETERS"].items(): for line in key.split("\n"): cell = table.cell(index,0) cell.text += line for line in value.split("\n"): table.cell(index,1).text += line #table.cell(index,1).text = 帮助文件解析结果["PARAMETERS"][key] cell_paragraphs = table.cell(index,0).paragraphs for i in cell_paragraphs: i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 cell_paragraphs = table.cell(index,1).paragraphs for i in cell_paragraphs: i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 # table.cell(index,2).text = value # cell_paragraphs = table.cell(index,2).paragraphs # for i in cell_paragraphs: # i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 index += 1 heading = my_word_doc.add_heading("示例",level=3) #指定样式标题3 设置标题样式为黑色宋体(heading) for key,value in 翻译结果["Example"].items(): heading = my_word_doc.add_heading(key[0:-1],level=4) #指定样式标题4 设置标题样式为黑色宋体(heading) for line in value.split("\n"): my_word_doc.add_paragraph(line) my_word_doc.save(r"C:\Users\gyj\Downloads\temp.docx")
实际上我是拿来转换PowerShell cmdlet命令的帮助txt文件为word文档的。其中带了翻译。简单记录下。
1 import docx #pip install python-docx 2 import re 3 import json 4 import requests 5 import time 6 import hashlib 7 from urllib.parse import urlencode 8 import random 9 import copy 10 #字典形式 11 帮助文件解析结果={"NAME":"","SYNOPSIS":"","DESCRIPTION":"","PARAMETERS":{},"Example":{}}; 12 翻译结果={"NAME":"","SYNOPSIS":"","DESCRIPTION":"","PARAMETERS":{},"Example":{}}; 13 ''' 14 15 ''' 16 def 解析PowerShell命令的帮助文本(文件路径:str): 17 with open(文件路径,mode='r',encoding='utf-8') as fd_help: 18 line = fd_help.readline(); 19 while line: 20 if(line.strip() == "NAME"): 21 line = fd_help.readline() 22 line = line.strip() 23 帮助文件解析结果['NAME'] = line 24 elif(line.strip() == "SYNOPSIS"): 25 line = fd_help.readline() 26 while line[0:4]==" ": 27 帮助文件解析结果['SYNOPSIS'] += line.lstrip() 28 line = fd_help.readline() 29 elif(line.strip() == "DESCRIPTION"): 30 line = fd_help.readline() 31 while line[0:4]==" ": 32 帮助文件解析结果['DESCRIPTION'] += line.lstrip() 33 line = fd_help.readline() 34 elif(line.strip() == "PARAMETERS"): 35 line = fd_help.readline() 36 while line[0:4]==" ": 37 参数名 = line.lstrip() 38 帮助文件解析结果['PARAMETERS'][参数名] = "" 39 line = fd_help.readline() 40 while line[0:8]==" ": 41 帮助文件解析结果['PARAMETERS'][参数名] += line.lstrip() 42 line = fd_help.readline() 43 if(len(re.findall(r'^\s*-*\s*Example',line))): 44 break 45 elif(len(re.findall(r'^\s*-*\s*Example',line))): 46 temp=re.sub(r"^\s*-*\s*([\w,\.: ]+) *-*",r"\1",line) 47 temp.rstrip() 48 帮助文件解析结果["Example"][temp]=""; 49 line = fd_help.readline() 50 while line: 51 if(len(re.findall(r'^\s*-*\s*Example',line))): 52 temp=re.sub(r"^\s*-*\s*([\w,\.: ]+) *-*",r"\1",line) 53 temp.rstrip() 54 帮助文件解析结果["Example"][temp]=""; 55 line = fd_help.readline() 56 while not (len(re.findall(r'^\s*-*\s*Example',line))) and line: 57 帮助文件解析结果["Example"][temp]+=line.lstrip() 58 line = fd_help.readline() 59 else: 60 line = fd_help.readline() 61 #检查key=""的情况,并删除这样的key 62 if("" in 帮助文件解析结果): 63 del 帮助文件解析结果[""] 64 if("" in 帮助文件解析结果["PARAMETERS"]): 65 del 帮助文件解析结果["PARAMETERS"][""] 66 if("" in 帮助文件解析结果["Example"]): 67 del 帮助文件解析结果["Example"][""] 68 69 # 本文件是通过请求有道翻译,去获取翻译结果 70 ''' 71 i: 你好 72 from: AUTO 73 to: AUTO 74 smartresult: dict 75 client: fanyideskweb 76 salt: 16643765479061 //毫秒级别的时间戳后面加上个0-9之间的随机数,js代码:r + parseInt(10 * Math.random(), 10);这里的r表示时间戳字符串 77 sign: 1d69ce8f7c6258243e573e31e29e0012 //签名,下面找到了 78 lts: 1664376547906 //毫秒级别的时间戳 79 bv: 42c8b36dd7d61c619e7b1dc11e44d870 //同设备相同,使用md5加密的(方法是:md5(User-Agent)==>md5("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53") 80 doctype: json 81 version: 2.1 82 keyfrom: fanyi.web 83 action: FY_BY_REALTlME 84 85 /* 86 解密出来了:sign的计算如下 87 define("newweb/common/service", ["./utils", "./md5", "./jquery-1.7"], function(e, t) { 88 var n = e("./jquery-1.7"); 89 e("./utils"); 90 e("./md5"); 91 var r = function(e) { 92 var t = n.md5(navigator.appVersion) 93 , r = "" + (new Date).getTime() 94 , i = r + parseInt(10 * Math.random(), 10); 95 return { 96 ts: r, 97 bv: t, 98 salt: i, 99 sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5") //在这里,e是要翻译的内容,i是毫秒级别的时间戳后面加上个0-9之间的随机数;后面这串字符串估计是服务器那边随机生成的,应该会变化。每次抓取的时候,可以查看下js代码 100 } 101 }; 102 103 总体来说,data数据由函数“generateSaltSign”计算出来 104 */ 105 ''' 106 107 ''' 108 获取翻译结果 109 The_translated_string:被翻译的字符串 110 由于翻译是以行为单位,所以一行一个结果,函数将解析The_translated_string参数,并以字符串形式返回所有翻译结果 111 ''' 112 def youdao_translate(The_translated_string:str): 113 if(The_translated_string == ""): 114 return {"":""} 115 url = r'https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule' 116 117 User_Agent = "5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.37" 118 header = { 119 "Accept": "application/json, text/javascript, */*; q=0.01", 120 "Accept-Encoding": "gzip, deflate, br", 121 "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", 122 "Connection": "keep-alive", 123 #"Content-Length": "307", 124 "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", 125 "Cookie":"OUTFOX_SEARCH_USER_ID=1135160796@10.108.162.134; OUTFOX_SEARCH_USER_ID_NCOO=775555146.507473; JSESSIONID=aaaQ2GYK5N-ozb24rKNcy; SESSION_FROM_COOKIE=unknown; DICT_UGC=be3af0da19b5c5e6aa4e17bd8d90b28a|; JSESSIONID=abcPzon0RcZqc7GltuAgy; ___rl__test__cookies=1665366515354", 126 "Host": "fanyi.youdao.com", 127 "Origin": "https://fanyi.youdao.com", 128 "Referer": "https://fanyi.youdao.com/", 129 "sec-ch-ua": """\"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"\"""", 130 "sec-ch-ua-mobile": "?0", 131 "sec-ch-ua-platform": "Windows", 132 "Sec-Fetch-Dest": "empty", 133 "Sec-Fetch-Mode": "cors", 134 "Sec-Fetch-Site": "same-origin", 135 "User-Agent": User_Agent, 136 "X-Requested-With": "XMLHttpRequest" 137 } 138 139 #The_translated_string=input("输入你要翻译的中文信息:\n") # 被翻译的字符串 140 timestamp = str(round(time.time()*1000)) # 毫秒级别的时间戳 141 salt = timestamp + str(random.randint(0, 9)) # 毫秒级别的时间戳后面加上个0-9之间的随机数 142 sign_str = "fanyideskweb" + The_translated_string + salt + "Ygy_4c=r#e#4EX^NUGUc5" # 构造签名字符串 143 # 签名,算法:sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5") //在这里,后面这串字符串估计是服务器那边随机生成的,应该会变化。每次抓取的时候,可以查看下js代码 144 sign = hashlib.md5(str.encode(sign_str)).hexdigest() 145 # 同设备相同,使用md5加密的(方法是:md5(User-Agent)==>md5("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53") 146 bv = hashlib.md5(str.encode(User_Agent)).hexdigest() 147 cookies = { 148 "OUTFOX_SEARCH_USER_ID": "1135160796@10.108.162.134", 149 "OUTFOX_SEARCH_USER_ID_NCOO": "775555146.507473", 150 "JSESSIONID": "aaaQ2GYK5N-ozb24rKNcy", 151 "SESSION_FROM_COOKIE": "unknown", 152 "DICT_UGC": "be3af0da19b5c5e6aa4e17bd8d90b28a|", 153 "JSESSIONID": "abcPzon0RcZqc7GltuAgy", 154 "___rl__test__cookies": "1665366515354" 155 } 156 data = { 157 "i": The_translated_string, 158 "from": "AUTO", 159 "to": "AUTO", 160 # 一般来说,是从中文翻译为英文 161 # "from": "zh-CHS", 162 # "to": "en", 163 "smartresult": "dict", 164 "client": "fanyideskweb", 165 "salt": salt, 166 "sign": sign, 167 "lts": timestamp, 168 "bv": bv, 169 "doctype": "json", 170 "version": "2.1", 171 "keyfrom": "fanyi.web", 172 "action": "FY_BY_CLICKBUTTION" 173 } 174 data = urlencode(data); 175 result = requests.post(url, data=data, cookies=cookies,headers=header) 176 json_result = json.loads(result.text); 177 #lines = The_translated_string.splitlines(); #按照行边界拆分 178 if(not json_result["errorCode"]): 179 ret_list=""; 180 for i in json_result["translateResult"]: #如果源字符串就是存在段落的,则这里就会根据其来分结果 181 for j in i: #翻译服务器认为该分段的,这里就会再次存在子项 182 ret_list+=j['tgt'] 183 ret_list+="\n" 184 #ret_list[json_result["translateResult"][i][0]["src"]]=json_result["translateResult"][i][0]["tgt"] 185 return ret_list; 186 else: 187 return "errorCode = "+str(json_result["errorCode"]); 188 189 #示例 190 ''' 191 str_ = "你好\n世界\n我来了\n哈哈" 192 print(youdao_translate(str_)) 193 -----out----- 194 {'你好': 'hello', '世界': 'The world', '我来了': "I'm coming", '哈哈': 'Ha ha'} 195 ''' 196 197 ''' 198 将变量“帮助文件解析结果”中的value翻译成中文 199 ''' 200 def 执行翻译(): 201 翻译结果 = copy.deepcopy(帮助文件解析结果) #这里执行了深拷贝了,所以它不再是外面的那个“翻译结果”变量了 202 翻译结果["SYNOPSIS"] = youdao_translate(翻译结果["SYNOPSIS"]) 203 翻译结果["DESCRIPTION"] = youdao_translate(翻译结果["DESCRIPTION"]) 204 for key,value in 翻译结果["PARAMETERS"].items(): 205 if(value == ""): 206 continue 207 翻译结果["PARAMETERS"][key] = youdao_translate(value) 208 for key,value in 翻译结果["Example"].items(): 209 if(value == ""): 210 continue 211 翻译结果["Example"][key] = youdao_translate(value) 212 temp_dict = {} 213 for key,value in 翻译结果["Example"].items(): 214 if(key == ""): 215 continue 216 temp_dict[youdao_translate(key)] = value 217 翻译结果["Example"] = temp_dict 218 return 翻译结果 219 ''' 220 设置表格所有单元格的四个边为0.5磅,黑色,实线 221 可以使用返回值,也可以不使用 222 ''' 223 def 设置表格网格线为黑色实线(table_object:object): 224 kwargs = { 225 "top":{"sz": 4, "val": "single", "color": "#000000"}, 226 "bottom":{"sz": 4, "val": "single", "color": "#000000"}, 227 "left":{"sz": 4, "val": "single", "color": "#000000"}, 228 "right":{"sz": 4, "val": "single", "color": "#000000"}, 229 "insideV":{"sz": 4, "val": "single", "color": "#000000"}, 230 "insideH":{"sz": 4, "val": "single", "color": "#000000"} 231 } 232 233 borders = docx.oxml.OxmlElement('w:tblBorders') 234 for tag in ('bottom', 'top', 'left', 'right', 'insideV', 'insideH'): 235 edge_data = kwargs.get(tag) 236 if edge_data: 237 any_border = docx.oxml.OxmlElement(f'w:{tag}') 238 for key in ["sz", "val", "color", "space", "shadow"]: 239 if key in edge_data: 240 any_border.set(docx.oxml.ns.qn(f'w:{key}'), str(edge_data[key])) 241 borders.append(any_border) 242 table_object._tbl.tblPr.append(borders) 243 return table_object 244 ''' 245 设置标题样式 246 ''' 247 def 设置标题样式为黑色宋体(heading_object:object): 248 heading_object.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.LEFT #左对齐 249 for run in heading_object.runs: 250 run.font.name=u'宋体' #设置为宋体 251 #run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')#设置为宋体,和上边的一起使用 252 run.font.color.rgb = docx.shared.RGBColor(0,0,0)#设置颜色为黑色 253 return heading_object 254 ''' 255 创建docx文档,将翻译结果和原文写入文档中 256 ''' 257 def word(): 258 my_word_doc = docx.Document() #打开一个空白文档 259 # for style in my_word_doc.styles: 260 # print(style) 261 heading = my_word_doc.add_heading(翻译结果["NAME"],level=2) #指定样式标题2 262 设置标题样式为黑色宋体(heading) 263 heading = my_word_doc.add_heading("描述",level=3) #指定样式标题3 264 设置标题样式为黑色宋体(heading) 265 for line in 翻译结果["SYNOPSIS"].split("\n"): 266 my_word_doc.add_paragraph(line) 267 for line in 翻译结果["DESCRIPTION"].split("\n"): 268 my_word_doc.add_paragraph(line) 269 270 heading = my_word_doc.add_heading("参数",level=3) #指定样式标题3 271 设置标题样式为黑色宋体(heading) 272 273 #table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=3) #指定样式标题3;在末尾添加一个表 274 table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=2) #指定样式标题3;在末尾添加一个表 275 #table.style = my_word_doc.styles['Medium Grid 1'] 276 设置表格网格线为黑色实线(table) 277 index=0 278 for key,value in 翻译结果["PARAMETERS"].items(): 279 for line in key.split("\n"): 280 cell = table.cell(index,0) 281 cell.text += line 282 for line in value.split("\n"): 283 table.cell(index,1).text += line 284 285 #table.cell(index,1).text = 帮助文件解析结果["PARAMETERS"][key] 286 cell_paragraphs = table.cell(index,0).paragraphs 287 for i in cell_paragraphs: 288 i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 289 cell_paragraphs = table.cell(index,1).paragraphs 290 for i in cell_paragraphs: 291 i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 292 # table.cell(index,2).text = value 293 # cell_paragraphs = table.cell(index,2).paragraphs 294 # for i in cell_paragraphs: 295 # i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 296 index += 1 297 heading = my_word_doc.add_heading("示例",level=3) #指定样式标题3 298 设置标题样式为黑色宋体(heading) 299 300 for key,value in 翻译结果["Example"].items(): 301 heading = my_word_doc.add_heading(key[0:-1],level=4) #指定样式标题4 302 设置标题样式为黑色宋体(heading) 303 for line in value.split("\n"): 304 my_word_doc.add_paragraph(line) 305 my_word_doc.save(r"C:\Users\xxx\Downloads\temp.docx") 306 307 解析PowerShell命令的帮助文本(r"C:\Users\xxx\Downloads\Get-CimClass.txt") 308 #有道翻译的新版翻译结果解析实在是太变态了,我搞不了,是在不行就把格式弄好,使用文档翻译翻译吧。 309 #翻译结果 = 执行翻译() 310 翻译结果=帮助文件解析结果 311 word()View Code
标签:docx,word,python,翻译,cell,简单,table,line,heading From: https://www.cnblogs.com/love-DanDan/p/17368654.html