pyquery
初始化 字符串初始化 1 2 3 4 5 6 7 8 9 10 11 12 13 14 html = ''' <div> <ul> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) print(doc('li' ))
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
URL初始化 1 2 3 from pyquery import PyQuery as pqdoc = pq(url='http://www.baidu.com' ) print(doc('head' ))
<head><meta http-equiv="content-type" content="text/html;charset=utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=Edge"/><meta content="always" name="referrer"/><link rel="stylesheet" type="text/css" href="http://s1.bdstatic.com/r/www/cache/bdorz/baidu.min.css"/><title>ç¾åº¦ä¸ä¸ï¼ä½ å°±ç¥é</title></head>
文件初始化 1 2 3 from pyquery import PyQuery as pqdoc = pq(filename='demo.html' ) print(doc('li' ))
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
基本CSS选择器 1 2 3 4 5 6 7 8 9 10 11 12 13 14 html = ''' <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) print(doc('#container .list li' ))
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
查找元素 子元素 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 html = ''' <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) items = doc('.list' ) print(type (items)) print(items) lis = items.find('li' ) print(type (lis)) print(lis)
<class 'pyquery.pyquery.PyQuery'>
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
<class 'pyquery.pyquery.PyQuery'>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
1 2 3 lis = items.children() print(type (lis)) print(lis)
<class 'pyquery.pyquery.PyQuery'>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
1 2 lis = items.children('.active' ) print(lis)
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
父元素 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 html = ''' <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) items = doc('.list' ) container = items.parent() print(type (container)) print(container)
<class 'pyquery.pyquery.PyQuery'>
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 html = ''' <div class="wrap"> <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) items = doc('.list' ) parents = items.parents() print(type (parents)) print(parents)
<class 'pyquery.pyquery.PyQuery'>
<div class="wrap">
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div><div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
1 2 parent = items.parents('.wrap' ) print(parent)
<div class="wrap">
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
兄弟元素 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 html = ''' <div class="wrap"> <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) li = doc('.list .item-0.active' ) print(li.siblings())
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0">first item</li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 html = ''' <div class="wrap"> <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) li = doc('.list .item-0.active' ) print(li.siblings('.active' ))
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
遍历 单个元素 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 html = ''' <div class="wrap"> <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) li = doc('.item-0.active' ) print(li)
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 html = ''' <div class="wrap"> <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) lis = doc('li' ).items() print(type (lis)) for li in lis: print(li)
<class 'generator'>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
获取信息 获取属性 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 html = ''' <div class="wrap"> <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) a = doc('.item-0.active a' ) print(a) print(a.attr('href' )) print(a.attr.href)
<a href="link3.html"><span class="bold">third item</span></a>
link3.html
link3.html
获取文本 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 html = ''' <div class="wrap"> <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) a = doc('.item-0.active a' ) print(a) print(a.text())
<a href="link3.html"><span class="bold">third item</span></a>
third item
获取HTML 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 html = ''' <div class="wrap"> <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) li = doc('.item-0.active' ) print(li) print(li.html())
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<a href="link3.html"><span class="bold">third item</span></a>
DOM操作 addClass、removeClass 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 html = ''' <div class="wrap"> <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) li = doc('.item-0.active' ) print(li) li.removeClass('active' ) print(li) li.addClass('active' ) print(li)
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-0"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
attr、css 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 html = ''' <div class="wrap"> <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) li = doc('.item-0.active' ) print(li) li.attr('name' , 'link' ) print(li) li.css('font-size' , '14px' ) print(li)
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-0 active" name="link"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-0 active" name="link" style="font-size: 14px"><a href="link3.html"><span class="bold">third item</span></a></li>
remove 1 2 3 4 5 6 7 8 9 10 11 12 html = ''' <div class="wrap"> Hello, World <p>This is a paragraph.</p> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) wrap = doc('.wrap' ) print(wrap.text()) wrap.find('p' ).remove() print(wrap.text())
Hello, World This is a paragraph.
Hello, World
其他DOM方法 http://pyquery.readthedocs.io/en/latest/api.html
伪类选择器 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 html = ''' <div class="wrap"> <div id="container"> <ul class="list"> <li class="item-0">first item</li> <li class="item-1"><a href="link2.html">second item</a></li> <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li> <li class="item-1 active"><a href="link4.html">fourth item</a></li> <li class="item-0"><a href="link5.html">fifth item</a></li> </ul> </div> </div> ''' from pyquery import PyQuery as pqdoc = pq(html) li = doc('li:first-child' ) print(li) li = doc('li:last-child' ) print(li) li = doc('li:nth-child(2)' ) print(li) li = doc('li:gt(2)' ) print(li) li = doc('li:nth-child(2n)' ) print(li) li = doc('li:contains(second)' ) print(li)
<li class="item-0">first item</li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>