N07 - 随机CSS样式类名，无Element ID

发布日期：1731486014阅读数：1222

# coding=utf-8

import requests
from lxml import etree

base_url = 'https://spiderbuf.cn/web-scraping-practice/random-css-classname'

my_headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36'}

# 取页数
html_bytes = requests.get(base_url, headers=my_headers).content
html = html_bytes.decode()
root = etree.HTML(html)
with open('./data/n07/n07.html', 'w', encoding='utf-8') as f:
    f.write(html)
# print(html)
divs = root.xpath('/html/body/main/div[2]/div')
with open('./data/n07/n07.txt','w',encoding='utf-8') as f:
    for div in divs:
        print(div.text)
        if div.text:
            f.write(f'{div.text}\n')

N07 - 随机CSS样式类名，无Element ID

相关文章