Fork me on GitHub

Python_Web_Pyquery

PyQuery

class – .
id – #

初始化

1、字符串的初始化
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from pyquery import PyQuery as pq
html = '''<div>
<ul>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul></div>'''

doc = pq(html)
print(doc)
print(type(doc))
# 提取li标签内所有内容
print(doc('li'))


2、URL初始化
1
2
3
from pyquery import PyQuery as pq
doc = pq(url=" ")
print(doc("li"))


3、通过文件初始化
1
2
3
from pyquery import PyQuery as pq
doc = pq(filename='demo.html')
print(doc('li'))


CSS选择器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
from pyquery import PyQuery as pq

html = '''<div>
<ul id = 'haha'>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
</ul></div>'''

doc = pq(html)
print(doc)
# id等于haha下面的class等于item-0下的a标签下的span标签
#(注意层级关系以空格隔开)
print(doc('#haha .item-0 a span'))


查找子标签
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from pyquery import PyQuery as pq

html = '''<div>
<ul id = 'haha'>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
</ul></div>'''
doc = pq(html)
items = doc('.list')
# 寻找同时含有Item-0和active属性的list内的标签
# 其中item-0与active并列
itema = doc('.list .item-0.active')
print(type(items))
list = items.find('li')
print(list)
# 查找含有active子元素的标签
lis = items.children('.active')
兄弟元素
1
2
3
4
li = doc('.list .item-0.active')
print(li.siblings())
# 可再次筛选
print(li.siblings('.active'))
喜欢的可以对我打赏了哟~