首页 > 其他分享 >pyquery使用

pyquery使用

时间:2022-10-09 15:00:26浏览次数:45  
标签:pq pyquery doc item html 使用 print

from pyquery import PyQuery as pq
from lxml import etree
import urllib
# d=pq("<html></html>")
# d=pq(etree.fromstring("<html></html>"))
# d=pq(url=your_url)
# d=pq(url=your_url,
# opener=lambda url,**kw:urlopen(url).read())
# d=pq(filename=path_to_html_file)

# print(d("#hello"))

html1='''
<div>
<ul>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''

# from pyquery import PyQuery as pq
# doc=pq(html)
# print(doc('li'))
'''
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>

'''


# from pyquery import PyQuery as pq
# doc=pq(url='http://www.baidu.com',encode="utf8")
# print(doc('head'))
'''
<head><meta http-equiv="content-type" content="text/html;charset=utf-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=Edge"/>
<meta content="always" name="referrer"/>
<link rel="stylesheet" type="text/css" href="http://s1.bdstatic.com/r/www/cache/bdorz/baidu.min.css"/>
<title>百度一下,你就知道</title>
</head>
'''
# from pyquery import PyQuery as pq
# doc=pq(filename='D://demo.html')
# print(doc('li'))
'''
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>

'''

html2='''
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# print(doc('#container .list li'))

'''
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>

'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# items=doc('.list')
# print(items)
# lis=items.find('li')
# print(type(lis))
# print(lis)

'''
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>

<class 'pyquery.pyquery.PyQuery'>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>

'''

# lis=items.children()
# print(type(lis))
# print(lis)
# lis=items.children('.active')
# print(lis)

'''
<class 'pyquery.pyquery.PyQuery'>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>

<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# items=doc('.list')
# container=items.parent()
# print(type(container))
# print(container)
'''
<class 'pyquery.pyquery.PyQuery'>
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''

html3='''
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# items=doc('.list')
# parents=items.parents()
# parent=items.parents('.wrap')
# print(parent)

# from pyquery import PyQuery as pq
# doc=pq(html)
# li=doc('.list .item-0.active')
# print(li.siblings())

'''
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0">first item</li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
'''

html='''
<div class="wrap">
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# items=doc('.list')
# parents=items.parents()
# parent=items.parents('.wrap')
# print(parent)

# lis=doc('li').items()
# print(type(lis))
# for li in lis:
# print(li)
'''
<class 'generator'>
<li class="item-0">first item</li>

<li class="item-1"><a href="link2.html">second item</a></li>

<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>

<li class="item-1 active"><a href="link4.html">fourth item</a></li>

<li class="item-0"><a href="link5.html">fifth item</a></li>

'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# a=doc('.item-0.active a')
# print(a)
# print(a.attr('href'))
# print(a.attr.href)
'''
<a href="link3.html"><span class="bold">third item</span></a>
link3.html
link3.html
'''
# a=doc('.item-0.active a')
# print(a)
# print(a.text)
'''
<a href="link3.html"><span class="bold">third item</span></a>
<bound method PyQuery.text of [<a>]>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# li=doc('.item-0.active')
# print(li)
# print(li.html())
'''
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>

<a href="link3.html"><span class="bold">third item</span></a>

'''

标签:pq,pyquery,doc,item,html,使用,print
From: https://www.cnblogs.com/mengdie1978/p/16772127.html

相关文章

  • linux环境变量配置错误后命令无法使用解决方案
    环境变量配置时多复制了一个空格,导致执行source/etc/profile后提示错误,无法编辑和查看文件解决方案: 查看当前系统变量:echo$PATH临时修改:exportPATH=/usr/local/sbi......
  • 开发人员使用Klocwork实现软件安全的5大原因
    Klocwork是为企业DevOps和DevSecOps而生的,因为Klocwork能够在保持高开发速度的同时,确保在安全和质量方面的持续合规,所以是企业首选的静态分析和SAST工具。在这里,我们将分享......
  • 分享一个查看分析Oracle表空间使用情况的脚本
    个人一直使用下面这个脚本查看、分析Oracle数据库表空间的使用情况,这个脚本经过我不断的调整、完善,已经接近完美了。已经很长时间没有改动过了,个人累积的脚本名为get_table......
  • spring boot项目使用mybatis-plus代码生成实例
    前言mybatis-plus官方地址https://baomidou.commybatis-plus是mybatis的增强,不对mybatis做任何改变,涵盖了代码生成,自定义ID生成器,快速实现CRUD,自动分页,逻辑删除等功能......
  • synchronized、ReentrantLock、LockSupport 的使用
    synchronized线程等待唤醒机制privatestaticfinalObjectobjLock=newObject();publicstaticvoidmain(String[]args){newThread(()->{......
  • 如何使用 Delphi/Lazarus 代码在 FastReport VCL 中生成二维码?
    FastReportVCL是用于在软件中集成商务智能的现代解决方案。它提供了可视化模板设计器,可以访问最受欢迎的数据源,报告引擎,预览,将过滤器导出为30多种格式,并可以部署到云,Web,电......
  • 使用回收站还原已删除的Active Directory对象
    在ActiveDirectory(AD)环境中,管理员不小心删除AD数据是很常见的。但是,影响取决于删除的对象类型。例如,如果您删除了单个用户的数据,则可能不会对组织产生重大影响。但是,如果您......
  • Linux中安装使用rsync
    获取 rsync-3.1.0 我的网盘里放了一个。地址:​​http://pan.baidu.com/s/1dDs4lSt​​安装rsync-3.1.0.tar.gz #tarzxvfrsync-3.1.0.tar.gz#cdrsync-3.1.0#./config......
  • ref、reactive、toRef、toRefs使用与区别
    reactive传参:reactive(arg),arg只能是对象arg为普通对象返回响应式对象,不管层级多深,都能响应使用:获取数据值的时候直接获取,不需要加.value特点:解构、扩展运算......
  • java Locale类使用
    1、定义Locale表示地区。每一个Locale对象都代表了一个特定的地理、政治和文化地区。在操作Date,​​Calendar​​等表示日期/时间的对象时,经常会用到;因为不同的区域,时间......