注意:由于网站升级等原因,各关卡的访问链接会有所变化,请注意修改代码中的url变量。
# coding=utf-8 import requests from lxml import etree url = 'http://www.spiderbuf.cn/s01' html = requests.get(url).text f = open('01.html', 'w', encoding='utf-8') f.write(html) f.close() root = etree.HTML(html) trs = root.xpath('//tr') f = open('data01.txt', 'w', encoding='utf-8') for tr in trs: tds = tr.xpath('./td') s = '' for td in tds: # print(td.text) s = s + str(td.text) + '|' print(s) if s != '': f.write(s + '\n') f.close() # print(html)