from pyquery import PyQuery as pq标签:pq,pyquery,doc,item,html,使用,print From: https://www.cnblogs.com/mengdie1978/p/16772127.html
from lxml import etree
import urllib
# d=pq("<html></html>")
# d=pq(etree.fromstring("<html></html>"))
# d=pq(url=your_url)
# d=pq(url=your_url,
# opener=lambda url,**kw:urlopen(url).read())
# d=pq(filename=path_to_html_file)
# print(d("#hello"))
html1='''
<div>
<ul>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# print(doc('li'))
'''
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
'''
# from pyquery import PyQuery as pq
# doc=pq(url='http://www.baidu.com',encode="utf8")
# print(doc('head'))
'''
<head><meta http-equiv="content-type" content="text/html;charset=utf-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=Edge"/>
<meta content="always" name="referrer"/>
<link rel="stylesheet" type="text/css" href="http://s1.bdstatic.com/r/www/cache/bdorz/baidu.min.css"/>
<title>ç¾åº¦ä¸ä¸ï¼ä½ å°±ç¥é</title>
</head>
'''
# from pyquery import PyQuery as pq
# doc=pq(filename='D://demo.html')
# print(doc('li'))
'''
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
'''
html2='''
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# print(doc('#container .list li'))
'''
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# items=doc('.list')
# print(items)
# lis=items.find('li')
# print(type(lis))
# print(lis)
'''
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
<class 'pyquery.pyquery.PyQuery'>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
'''
# lis=items.children()
# print(type(lis))
# print(lis)
# lis=items.children('.active')
# print(lis)
'''
<class 'pyquery.pyquery.PyQuery'>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# items=doc('.list')
# container=items.parent()
# print(type(container))
# print(container)
'''
<class 'pyquery.pyquery.PyQuery'>
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''
html3='''
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# items=doc('.list')
# parents=items.parents()
# parent=items.parents('.wrap')
# print(parent)
# from pyquery import PyQuery as pq
# doc=pq(html)
# li=doc('.list .item-0.active')
# print(li.siblings())
'''
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0">first item</li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
'''
html='''
<div class="wrap">
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# items=doc('.list')
# parents=items.parents()
# parent=items.parents('.wrap')
# print(parent)
# lis=doc('li').items()
# print(type(lis))
# for li in lis:
# print(li)
'''
<class 'generator'>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# a=doc('.item-0.active a')
# print(a)
# print(a.attr('href'))
# print(a.attr.href)
'''
<a href="link3.html"><span class="bold">third item</span></a>
link3.html
link3.html
'''
# a=doc('.item-0.active a')
# print(a)
# print(a.text)
'''
<a href="link3.html"><span class="bold">third item</span></a>
<bound method PyQuery.text of [<a>]>
'''
# from pyquery import PyQuery as pq
# doc=pq(html)
# li=doc('.item-0.active')
# print(li)
# print(li.html())
'''
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<a href="link3.html"><span class="bold">third item</span></a>
'''