E02 - 带验证码的登录爬取
# coding=utf-8 import requests from lxml import etree url = 'https://spiderbuf.cn/playground/e02/list' # 注意:要把Cookie改成自己的 myheaders = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36', 'Cookie':'admin=...
E01- 用户名密码登录爬取后台数据
# coding=utf-8 import requests from lxml import etree url = 'http://spiderbuf.cn/playground/e01/login' myheaders = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36'} payload = {'username':'admin','password':'123456...
S08 - http post请求的数据爬取
# coding=utf-8 import requests from lxml import etree url = 'http://www.spiderbuf.cn/playground/s08' myheaders = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36'} payload = {'level':'8'} html = requests.post(url, ...
S07 - ajax动态加载数据的爬取
# coding=utf-8 import requests import json url = 'http://spiderbuf.cn/playground/iplist?order=asc' myheaders = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36'} data_json = requests.get(url, headers=myheaders).tex...
S06 - 带iframe的页面源码分析及数据爬取
# coding=utf-8 import requests from lxml import etree url = 'http://spiderbuf.cn/playground/inner' myheaders = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36'} html = requests.get(url, headers=myheaders).text pri...