在使用python+playwright 想从网页下载Excel数据,因为需要经过SSO,携带Tokey才可以访问数据,所以无痕模式下搞不好,使用非无痕模式打开浏览器,就可以获取cookie,成功达到效果。
点击查看代码
from multiprocessing.sharedctypes import Value
from playwright.sync_api import sync_playwright
from datetime import datetime, timedelta
now = datetime.now()
# 獲取當前日期
now_date = now.today().strftime('%Y-%m-%d')
nowMonths = now.today().strftime('%Y-%m')
print(nowMonths)
#以非無痕模式打開瀏覽器,不然沒有cookie
p = sync_playwright().start()
browser = p.chromium.launch_persistent_context(
# 指定本机用户缓存地址
user_data_dir=f"D:\chrome_user\yyl1",
# 接收下载事件
accept_downloads=True,
# 设置 GUI 模式
headless=False,
bypass_csp=True,
slow_mo=1000,
channel="chrome"
)
page = browser.pages[0]
url1 = "http://*****/"
url2 = "http://******/"
dict = {"A1":url1, "B1":url2}
for key,value in dict.items():
print(">>>打開網頁<<<")
page.goto(value)
print(">>>數據加載中,請耐心等待.(為確保數據完全加載完成,會延遲15秒.)<<<")
page.wait_for_timeout(15000)
print('>>>準備開始'+ key +'下載Excel<<<')
page.locator("#ButtonLink").click()
with page.expect_download() as download_info:
with page.expect_popup() as page1_info:
page.locator("#ButtonLink").click()
page.get_by_role("link", name="Excel").click()
page.wait_for_timeout(10*1000)
page1 = page1_info.value
#download.failure()
download = download_info.value
download.save_as(now_date + key + "通報表.xlsx")
print(">>>"+ key +"Excel下載完畢<<<")
browser.close()