代码如下:
import subprocess
for day in range(24, 30):
for h in range(0, 24):
filename = "tls-metadata-2018-10-%02d-%02d.txt" % (day, h)
cmd = "hdfs dfs -text /data/2018/10/%02d/%02d/*.snappy" % (day, h)
print(cmd)
#cmd = "cat *.py"
cmd = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
f = open(filename, "w")
for line in cmd.stdout:
try:
arr = line.split("^")
if len(arr) >= 120 and arr[6] == "6" and arr[25] == "SSL" and arr[107]:
#print(line)
f.write("^".join(arr[:32]) + "^" + arr[95] + "^" + "^".join(arr[105:119])+ "\n")
except Exception as e:
print(e, "fuck error", line)
f.close()
#import sys
#sys.exit(0)
标签:hdfs,arr,grep,02d,python,cmd,subprocess,print,line From: https://blog.51cto.com/u_11908275/6387654