from bs4 import BeautifulSoup import re def main(): html = """ 首页

Hello, world!

这是一个神奇的网站!


这是一个例子程序

静夜思

床前明月光

疑似地上霜

举头望明月

低头思故乡

腾讯网 美女 凯蒂猫 美女
姓名 上场时间 得分 篮板 助攻
""" soup = BeautifulSoup(html, 'lxml') # JavaScript - document.title print(soup.title) # JavaScript - document.body.h1 print(soup.body.h1) print(soup.p) print(soup.body.p.text) print(soup.body.p.contents) for p_child in soup.body.p.children: print(p_child) print(len([elem for elem in soup.body.children])) print(len([elem for elem in soup.body.descendants])) print(soup.findAll(re.compile(r'^h[1-6]'))) print(soup.body.find_all(r'^h')) print(soup.body.div.find_all(re.compile(r'^h'))) print(soup.find_all(re.compile(r'r$'))) print(soup.find_all('img', {'src': re.compile(r'\./img/\w+.png')})) print(soup.find_all(lambda x: len(x.attrs) == 2)) print(soup.find_all(foo)) print(soup.find_all('p', {'class': 'foo'})) for elem in soup.select('a[href]'): print(elem.attrs['href']) def foo(elem): return len(elem.attrs) == 2 if __name__ == '__main__': main()