import datetime标签:xml,万年历,text,str1,爬取,headers,r4str,print,find From: https://www.cnblogs.com/lionmxs/p/17428897.html
import requests
import xml.etree.ElementTree as ET
kw ={ 'wd':'python教程'}
url1 ='https://rili.ximizi.com/jinrijishi.php'
url2 ='https://www.xingzuo5.net/calendar/2025/2025-12-22.html'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' } # 定义headers
# 构建字典
tdate = datetime.date.today()
# tyear = str(tdate.year)
# tmon = str(tdate.month)
# tday =str(tdate.day)
# sday =tyear+'-'+tmon+'-'+tday
# url ='https://www.xingzuo5.net/calendar'
# url2 =url+"/"+tyear+"/"+sday+'.html'
# 稳妥方案
# cooklist = temp.split('; ')
# cookies = {}
# for cookie in cooklist:
# cookies[cookie.split('=')[0]] = cookie.split('=')[-1]
#print(cookies)
# 字典推导式
# cookies_dic = cookies[cookie.split('=')[0]]:cookie.split('=')[-1] for cookie in cooklist.split('; ')
response = requests.get(url1)
res = requests.request('get',url1,headers=headers)
#r1 = requests.get('http://www.baidu.com/s?',headers=headers,params=kw,cookies=cookies, timeout=3) # headers 指定, 设置参数
r2 = requests.get(url1)
r4 = requests.get(url1)
restr= r2.text
response.encoding ='utf-8'
r4.encoding ='utf-8'
r4str =r4.text
# print(response.text)
# print(response.content.decode())
# print(res.content.decode())
#print(r1.url.encode())
# print(restr[110:24542])
nlindex = r4str.find('农历日期')
gzrqindex = r4str.find('干支')
y1index = r4str.find('sc2title') # 第一个时辰
j1index = r4str.find('时忌')
yj2y = r4str[y1index+10:y1index+30]
yendindex =yj2y.find('/p')
cindex = r4str.find('冲')
j1str =r4str[j1index+14:cindex-15]
ystr = yj2y
nlrq = r4str[nlindex+12:nlindex+21]
gzrq =r4str[gzrqindex+10:gzrqindex+21]
y1str ='宜:'
j2str ='忌:'
print('农历日期:'+nlrq)
print('干支日期:'+gzrq)
tree = ET.parse('2.xml')
root = tree.getroot()
blstr =root.find('today')
blstr.text = str(tdate)
blstr =root.find('nl')
blstr.text = nlrq
blstr =root.find('gz')
blstr.text = gzrq
blstr =root.find('y')
blstr.text = y1str
blstr =root.find('j')
blstr.text = j2str
tree.write('2.xml')
print('-------------------------------------------')
#print(r4str)
print('-------------------------------------------')
scstr = r4str.split('sc2title')
i =1
for str1 in scstr:
i =i+1
if i>2 and i<8:
print('-------------------') #0-8 点
tstart =str1[6:7]
tend =str1[14:15]
name = str1[2:5]
allname =str1[2:22]
print(allname)
if i ==8: #9至10点
print('-------------------')
name =str1[2:5]
allname=str1[2:23]
print(name,allname)
if i>8: # 11-24 点
print('-------------------')
allname =str1[2:24]
name =str1[2:5]
print(allname)
if i>2 :
for sh in root.findall('sh'):
shat = sh.findall('sharry')
ename =shat[i-3].find('name')
aname =shat[i-3].find('allname')
ename.text =name
aname.text=allname
tree.write('2.xml')
# print(r4str)
# print(response.headers)
# print('-------')
# print(r1.content.decode())
# print(response.cookies.values())
# print(response.headers)
if __name__== '__main__':
print('开始:')