S05 - 网页图片的爬取及本地保存

注意：由于网站升级等原因，各关卡的访问链接会有所变化，请注意修改代码中的url变量。

                # coding=utf-8

import requests
from lxml import etree

url = 'http://www.spiderbuf.cn/playground/s05'

myheaders = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36'}


html = requests.get(url, headers=myheaders).text
print(html)

f = open('05.html', 'w', encoding='utf-8')
f.write(html)
f.close()

root = etree.HTML(html)
imgs = root.xpath('//img/@src')
print(imgs)
for item in imgs:
    img_data = requests.get('http://spiderbuf.cn' + item, headers=myheaders).content
    img = open(str(item).replace('/',''), 'wb')
    img.write(img_data)
    img.close()
#
# f = open('data05.txt', 'w', encoding='utf-8')
# for tr in trs:
#     tds = tr.xpath('./td')
#     s = ''
#     for td in tds:
#         s = s + str(td.xpath('string(.)')) + '|'
#         # s = s + str(td.text) + '|'
#     print(s)
#     if s != '':
#         f.write(s + '\n')
#
# f.close()