第二次使用

文本操作

文本文件
二进制文件

 '''
 先将文件里的数值读取出来
 然后排序
 排序后在保存在新的文件夹中
 '''
 #读取文件中每一行的数据
 with open('test.txt','r') as fp:
     data = fp.readlines()
 #将每一行的数据去头去尾，然后转化成整型
 data = [int(line.strip()) for line in data]
 #将data中的数值进行排序
 data.sort()
 #将排好序的，做好格式，存储到指定的文件夹下
 data = [str(i)+'\n' for i in data]
 print(data)
 with open('data_asc.txt','w') as fp:
     fp.writelines(data)

 '''
 读取代码文件
 并为代码的每一行标序号
 '''
 filename = 'demo6.py'
 #读取demo6这个python程序文件
 with open(filename,'r') as fp:
     lines = fp.readlines()  #用读取行的方式，这样能够直接生成一个数组lines，这里面就是读取的值
 #获取，读取代码的最大长度
 maxLength = len(max(lines,key=len))
 #把获取来的行，去头去尾，右侧统一补上最长段落的长度（通过空格），
 lines = [line.rstrip().ljust(maxLength)+'#'+str(index) + '\n' for index,line in enumerate(lines)]
 #然后打开文件，把读取到的文件写入新的文件中去
 with open(filename[:-3]+'_new.py','w')as fp:
     fp.writelines(lines)

 '''
 批量修改文件文本格式为UTF-8
 '''
 
 from os import listdir
 
 from chardet import detect
 
 fns = (fn for fn in listdir() if fn.endswith('.txt'))
 for fn in fns:
     with open(fn,'rb+')as fp:
         content = fp.read()
         #判断编码格式
         encoding = detect(content)['encoding']
         #格式转换
         content = content.decode(encoding).encode('gbk')
         #写回文件
         fp.seek(0)
         fp.write(content)

1、二进制文件操作

 '''
 pickle模块的使用
 '''
 import pickle
 
 i = 13000000
 a = 99.056
 s = '中国人名123abc'
 list = [[1,2,3],[4,5,6],[7,8,9]]
 tu = (-5,10,8)
 coll = {4,5,6}
 dic = {'a':'apple','b':'banana','g':'grape','o':'orange'}
 data = [i,a,s,list,tu,coll,dic]
 #打开保存二进制文件的文本文件，
 with open('sample_pickle.dat','wb') as f:
     try:
         pickle.dump(len(data),f)
         #将data中的数据，逐一进行二进制的写入
         for item in data:
             pickle.dump(item,f)
     except:
         print('文件写入异常')

 '''
 写入的二进制文件的内容还原为Python对象
 '''
 import pickle
 
 with open('sample_pickle.dat','rb') as f:
     #load(f)获取f对象里面的文件的个数,因为我在制作二进制的时候，第一个参数是这里面文件的长度
     n = pickle.load(f)
     for i in range(n):
         x = pickle.load(f)
         print(x)

 '''
 使用struct模块
 encode  英文是编码的意思
 struct.pack('if?',n,b,s)这种方法是实现英文和数字的编码
 s.encode()  这种是实现字符串的编码方式
 '''
 import struct
 
 n = 1300000
 x = 3.14
 b = True
 s = 'abc测试'
 
 #序列化，得到字符串,就是在这里实现乱码的，struct.pack()的第一个参数必须是字符串‘
 sn = struct.pack('if?',n,x,b)
 with open('sample_struct.dat','wb') as fp:
     fp. write(sn)
     fp.write(s.encode())

 '''
 解码：
 整数、实数、逻辑判断用 struct.unpack()来解码
 字符串用 decode()来解码
 '''
 import struct
 
 with open('sample_struct.dat','rb') as fp:
     #整数、实属占用4个字节，逻辑值占用1个字节
     sn = fp.read(9)
     #解码：整数、实数、逻辑值的方法，解码之后(1300000, 3.140000104904175, True)
     tu = struct.unpack('if?', sn)
     n, x, bl = tu
     print('n=', n, 'x=', x, 'bl=', bl)
     #utf8编码的英文占用1个字节，汉字占用3个
     s = fp.read(9).decode()
     print('s=', s)

2、文件级操作

 '''
 os.path.isfile(fname)   判断文件是不是文件，而不是目录哪些的
 '''
 import os
 print([fname for fname in os.listdir() if os.path.isfile(fname) and fname.endswith('.py')])

 '''
 将目录下所有的.html文件改名为htm
 '''
 import os
 
 file_list = [filename for filename in os.listdir() if filename.endswith('htm')]
 
 for filename in file_list:
     #制作新的名字
     newname = filename[:-4]+'.html'
     #重新改名
     os.rename(filename,newname)
     print(filename+"更名为："+newname)

 '''
 shutil模块
 复制文件
 压缩文件
 '''
 import shutil
 
 shutil.copyfile('D:\\a.html','D:\\2021year\copy_a.txt')
 #
 shutil.make_archive('D:\\test','zip','D:\\test','Dlls')

3、目录级操作

 '''
 获取当前目录位置
 创建目录
 改变当前工作目录
 成列当前目录中的信息
 移出目录
 '''
 import os
 print(os.getcwd())  #获取当前目录位置
 # os.mkdir(os.getcwd()+'\\temp')  #在当前文件目录下创建temp
 os.chdir(os.getcwd()+'\\temp')  #改变当前工作目录
 print(os.getcwd())
 # os.mkdir(os.getcwd()+'\\test')  #在当前目录下创建test
 print(os.listdir('..'))     #.表示当前路径    ..表示成列上级路径
 os.rmdir('test')    #移出目录

 '''
 os.path.isdir(path)    判断路径是否为目录
 遍历目录下所有的文件
 采用递归调用
 '''
 import os
 def vistDir(path):
     if not os.path.isdir(path):
         print('错误',path,'不是文件夹或则根本不存在')
         return
     for lists in os.listdir(path):
         #这里是联结合，文件名和文件
         sub_path = os.path.join(path,lists)
         print(sub_path)
         if os.path.isdir(sub_path):
             vistDir(sub_path)
 vistDir(r'D:\\不吃鹅肝酱')

 '''
 使用广度遍历目录下的文件
 '''
 from os import listdir
 from os.path import join, isfile, isdir
 
 
 def listDirWidthFirst(directory):
     '''广度遍历文件夹'''
     dirs = [directory]
     #如果还没有遍历过的文件夹，继续往下遍历
     while dirs:
         #遍历还没有遍历过的第一项
         #移出dirs中的第一个元素，赋予current
         current = dirs.pop(0)
         #遍历该文件夹，如果是文件就直接显示输出
         #如果是文件夹，输出显示后，标记为待遍历项，放入列表dirs尾部
         for subPath in listdir(current):
             path = join(current,subPath)
             if isfile(path):
                 print(path)
             elif isdir(path):
                 print(path)
                 dirs.append(path)
 listDirWidthFirst('D:\\test')

 '''
 采用os.walk遍历文件里面的所有文件
 正如walk的单词意义走一样，他会走遍目录下的所有文件位置
 '''
 import os
 def visitDir2(path):
     if not os.path.isdir(path):
         print('Error:',path,'不是文件夹，或则不存在')
         return
     list_dirs = os.walk(path)
     #通过walk获取的对象，有三个参数：路径名、目录名、文件名,这里的目录名，还包括子目录下的文件
     for root,dirs,files in list_dirs:
         print(root,dirs,files)
         for d in dirs:
             print(os.path.join(root,d))
         for f in files:
             print(os.path.join(root,f))
 visitDir2(r'D:\test')

4、案例精选

 '''
 计算CRC32值
 下面代码分别是使用zlib和binascii模块方法
 计算任意字符串的CRC32值
 '''
 import zlib
 print(zlib.crc32('123'.encode()))
 print(zlib.crc32('111'.encode()))
 print(zlib.crc32('SDIBT'.encode()))
 import binascii
 print(binascii.crc32('SDIBT'.encode()))

 '''
 计算文本文件中最长行的长度
 '''
 with open('sample.txt',encoding='gbk') as fp:
     print(max(len(line.strip()) for line in fp))

 '''
 计算MD5的值，MD5可以用来判断文件发布之后是否被篡改
 对于完整性保护具有重要的意义
 也经常用于数字签名
 '''
 import hashlib
 print(hashlib.md5('123'.encode()).hexdigest())
 print(hashlib.md5('1234'.encode()).hexdigest())

 '''
 实现当前代码的MD5编码
 '''
 import hashlib
 import os.path
 import sys
 #sys.argv[0]表示当前文件的路径
 fileName = sys.argv[0]
 
 print(fileName)
 if os.path.isfile(fileName):
     with open(fileName,'rb') as fp:
         data = fp.read()
         print(type(data))
         print(hashlib.md5(data).hexdigest())

 '''
 判断一个图片是否是gif类型的
 '''
 def is_gif(fname):
     with open(fname,'rb') as fp:
         first4 = fp.read(4)
         print(first4)
     return first4 == b'GIF8'
 print(is_gif('a.gif'))

 '''
 把指定文件夹中的所有文件的名字批量随机化，保持文件类型不变
 print(ascii_letters)    abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
 '''
 from os import listdir, rename
 from os.path import splitext, join
 from random import randint, choice
 from string import ascii_letters
 
 
 def randomFileName(directory):
     for fn in listdir(directory):
         #切分得到文件名和扩展名
         name,ext = splitext(fn)
         n = randint(5,20)
         #生成随机字符串作为新文件名字
         newName = ''.join((choice(ascii_letters) for i in range(n)))
         #修改文件名字
         rename(join(directory,fn),join(directory,newName+ext))
 randomFileName('D:\不吃鹅肝酱\img')

 '''
 使用xlwt模块写入Excel文件
 看不懂，太多不知道的类的调用了！！！
 '''
 
 from xlwt import *
 
 book = Workbook()
 #获取sheet1，也就是获取一个excel的表单
 sheet1 = book.add_sheet("First")
 
 a1 = Alignment()
 a1.horz = Alignment.HORZ_CENTER
 a1.vert = Alignment.VERT_CENTER
 
 borders = Borders()
 borders.bottom = Borders.THIN
 
 style = XFStyle()
 style.alignment = a1
 style.borders = borders
 
 row0 = sheet1.row(0)
 row0.write(0,'test',style=style)
 book.save(r'D:\test.xls')

 '''
 使用xlrd模块读入excel文件
 '''
 import xlrd
 #找到对应文件
 book = xlrd.open_workbook(r'D:\test.xls')
 #定位到相应的表单
 sheet1 = book.sheet_by_name('First')
 #获取表单的第0行信息
 row0 = sheet1.row(0)
 print(row0[1].value)

 '''
 使用Pywin32操作Excel文件，
 反正要Pywin32这个模块，下载成功了，就是还是报错，
 反正下面的代码也看不懂，算了！！！
 '''
 import Pywin32
 import win32com
 
 xlApp = win32com.client.Dispatch('Excel.Application')
 xlBook = xlApp.Workbooks.Open('D:\\1.xls')
 xlSht = xlBook.worksheets('sheet')
 aaa = xlSht.Cells(1,2).value
 xlSht.Cells(2,3).Value = aaa
 xlBook.Close(SaveChanges=1)

模块无法下载，代码无法运行

 '''
 检查Word文档连续重复的字
 这里解决不了了，脑壳痛！！！
 '''
 import sys
 from win32com import client
 
 filename = r'D:\test.doc'   #找到文件位置
 word = client.Dispatch('Word.Application')  #打开window上的word应用
 doc = word.Document.Open(filename)  #采用D:\test.doc文件
 content = str(doc.Content)  #获取文件中的内容
 print(content)
 doc.Close() #关闭文档
 word.Quit() #退出应用
 
 repeateWords = []
 lens = len(content)
 for i in range(lens-2):
     ch,ch1,ch2 = content[i:i+3]
     if('\u4e00'<=ch<='\u9fa5') or ch in('，','。','、'):
         if ch == ch1 and ch+ch1 not in repeateWords:
             print(ch+ch1)
             repeateWords.append(ch+ch1)
         elif ch==ch2 and ch+ch1+ch2 not in repeateWords:
             print(ch+ch1+ch2)
             repeateWords.append(ch+ch1+ch2)

 '''
 python-docx封装了对docx格式word文档的操作
 使用pip安装之后，可以使用下面代码实现类似功能
 '''
 from docx import Document
 
 #找到对应的文件
 doc = Document('test.doc')
 
 #获取文档中的段落，并且联接好
 contents = ''.join((p.text for p in doc.paragraphs))
 
 words = set()
 
 for index,ch in enumerate(contents[:-2]):
     #如果当前字符等于后面的一个字符，或则，当前字符等于后面的第二个字符，word就捕捉着这相连的几个字符
     if ch==contents[index+1] or ch == contents[index+2]:
         word = contents[index:index+3]
         #将捕捉来的字符，如果不再words中，就添加进元组中
         if word not in words:
             words.add(word)
             print(word)

 '''
 编写程序，进行文件夹的增量备份
 '''
 import os
 import filecmp
 import shutil
 import sys
 
 
 def autoBackup(scrDir,dstDir):
     if(not os.path.isdir(scrDir) or (not os.path.isdir(dstDir)) or (os.path.abspath(scrDir)!=scrDir) or (os.path.abspath(dstDir)!=dstDir)):
         usage()
     for item in os.listdir(scrDir):
         scrItem = os.path.join(scrDir,item)
         dstItem = scrItem.replace(scrDir,dstDir)
         if os.path.isdir(scrItem):
             #创建新增的文件夹，保证目标文件夹的结构和原始文件夹一致
             if not os.path.exists(dstItem):
                 os.makedirs(dstItem)
                 print('创建文件夹'+dstItem)
             autoBackup(scrItem,dstItem)
         elif os.path.isfile(scrItem):
             #只复制新增或修改过的文件
             if(not os.path.cmp(scrItem,dstItem,shallow=False)):
                 shutil.copyfile(scrItem,dstItem)
                 print('file:'+scrItem+'==>'+dstItem)
 def usage():
     print('scrDir和dstDir必须存在绝对路径')
     print('For example:{0} C:\\aaa'.format(sys.argv[0]))
     sys.exit(0)
 if __name__ == '__main__':
     if len(sys.argv) != 3:
         usage()
     scrDir,dstDir = sys.argv[1],sys.argv[2]
     autoBackup(scrDir,dstDir)

标签：fp,文件,使用,import,print,path,第二次,os
From： https://www.cnblogs.com/buchijiuminvhai/p/17008597.html

文本操作

1、二进制文件操作

2、文件级操作

3、目录级操作

4、案例精选

模块无法下载，代码无法运行

相关文章

赞助商

阅读排行