N03 - 限制访问频率不低于1秒
# coding=utf-8 import requests from lxml import etree import time base_url = 'https://www.spiderbuf.cn/playground/n03/%d' myheaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36'} max_no = 20 # exit() f...
N02 - 使用Base64编码的图片爬取与解码还原
# coding=utf-8 import requests from lxml import etree import base64 url = 'http://spiderbuf.cn/playground/n02' myheaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36'} html = requests.get(url, headers...
H01 - CSS样式偏移混淆文本内容的解析与爬取
# coding=utf-8 import requests from lxml import etree url = 'http://spiderbuf.cn/playground/h01' myheaders = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36', 'Referer':'http://www.spiderbuf.cn/list'} ...
N01 - User-Agent与Referer校验反爬
# coding=utf-8 import requests from lxml import etree url = 'http://www.spiderbuf.cn/playground/n01' myheaders = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36', 'Referer':'https://spiderbuf.cn/list'}...
E03 - 无序号翻页
# coding=utf-8 import requests from lxml import etree import re base_url = 'http://spiderbuf.cn/playground/e03' # http://spiderbuf.cn/e03/5f685274073b myheaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537....