C08 - JS 逆向爬虫实战示例代码

发布日期：1758966412阅读数：3
# coding=utf-8

import time
import base64
import hmac
import hashlib
import requests
import urllib.parse
import json
from Crypto.Cipher import AES
import numpy as np

base_url = 'https://spiderbuf.cn/web-scraping-practice/scraper-practice-c08'

myheaders = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36',
    'Referer': base_url}


timestamp = int(time.time())
timestamp_str = str(timestamp)

salt_raw = '{:.6f}'.format(time.perf_counter() * 1000)  # 保留小数，类似浏览器行为
salt = base64.b64encode(salt_raw.encode()).decode()    # 标准 Base64（btoa）

message = (salt + timestamp_str).encode()

digest = hmac.new(base_url.encode(), message, hashlib.sha256).digest()
signature_b64 = base64.b64encode(digest).decode()

# 5a) 方式 A：让 requests 帮你做 URL 编码（通常足够）
params = {'t': timestamp_str, 's': salt, 'sig': signature_b64}
resp = requests.get(base_url + '/api', params=params, headers=myheaders)
print('请求 URL（requests 自动编码）:', resp.url)
print('状态码:', resp.status_code)
print('响应 body:', resp.text)

result = json.loads(resp.text)
key = signature_b64[:16].encode("utf-8")

cipher_data = base64.b64decode(result['d'])

# 前 16 字节是 IV，后面才是密文
iv = cipher_data[:16]
ciphertext = cipher_data[16:]

# AES-CBC 解密
cipher = AES.new(key, AES.MODE_CBC, iv)
decrypted = cipher.decrypt(ciphertext)

# 去掉 PKCS7 填充
pad_len = decrypted[-1]
decrypted = decrypted[:-pad_len]

# 转成 UTF-8 字符串并解析 JSON
raw = decrypted.decode("utf-8")
items = json.loads(raw)

prices = []
for item in items:
    p = item['price']
    prices.append(p)

print(round(np.average(prices),2))
C08 - JS 逆向爬虫实战示例代码

相关文章