zoukankan      html  css  js  c++  java
  • 聚划算爬虫

    import requests
    import re
    import json
    import pandas as pd
    headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
    juids=#首页源代码可以直接提取这里我省略
    url='https://ju.taobao.com/json/tg/ajaxGetItemsV2.json?juIds=10000150258352,10000150257351,10000150257077,10000150257135%27,%20%2710000150258352,10000150257351,10000150257077,10000150257135%27,%20%2710000150258405,10000150257930,10000150305787,10000150305694,10000153135882,10000150305662,10000150305738,10000150258202,10000150257850,10000150257981,10000150258448,10000153135896,10000150258033,10000150258293,10000150258152,10000150258089,10000150257891%27,%20%2710000150258405,10000150257930,10000150305787,10000150305694,10000153135882,10000150305662,10000150305738,10000150258202,10000150257850,10000150257981,10000150258448,10000153135896,10000150258033,10000150258293,10000150258152,10000150258089,10000150257891%27,%20%2710000150258826,10000150258980,10000150305864,10000150305909,10000153135909,10000150258772,10000150258719,10000150258654,10000150258534,10000150258490,10000150258883,10000150258932%27,%20%2710000150258826,10000150258980,10000150305864,10000150305909,10000153135909,10000150258772,10000150258719,10000150258654,10000150258534,10000150258490,10000150258883,10000150258932%27,%20%2710000150257239,10000150257588,10000150257183,10000150305542,10000150305574,10000151345535,10000150257299,10000150257757,10000150257412,10000150257807,10000150257453,10000153135922,10000150257546%27,%20%2710000150257239,10000150257588,10000150257183,10000150305542,10000150305574,10000151345535,10000150257299,10000150257757,10000150257412,10000150257807,10000150257453,10000153135922,10000150257546%27,%20%2710000150255112,10000150255627,10000150255673,10000150255554,10000150255278,10000150255231,10000150255507,10000150255400,10000150255445,10000150255355,10000150255160%27,%20%2710000150255112,10000150255627,10000150255673,10000150255554,10000150255278,10000150255231,10000150255507,10000150255400,10000150255445,10000150255355,10000150255160%27,%20%2710000150257710,10000150257494,10000150305448,10000150256423,10000150305118,10000150255775,10000153135851,10000150255962,10000150255732,10000150256015,10000150256073,10000150255853,10000153135869,10000150256273,10000150256205,10000150256142,10000150256326,10000150305407%27,%20%2710000150257710,10000150257494,10000150305448,10000150256423,10000150305118,10000150255775,10000153135851,10000150255962,10000150255732,10000150256015,10000150256073,10000150255853,10000153135869,10000150256273,10000150256205,10000150256142,10000150256326,10000150305407%27,%20%2710000150254802,10000150254754,10000150254675,10000153135934,10000150254984,10000150254940,10000150254839,10000150255066,10000150255036,10000150254901,10000150254712%27,%20%2710000150254802,10000150254754,10000150254675,10000153135934,10000150254984,10000150254940,10000150254839,10000150255066,10000150255036,10000150254901,10000150254712%27,%20%2710000150256574,10000150256637,10000150256686,10000150256905,10000150256788,10000150256993,10000150256865,10000150257038,10000153135946,10000150256738,10000150256522,10000150256472,10000150256946%27,%20%2710000150256574,10000150256637,10000150256686,10000150256905,10000150256788,10000150256993,10000150256865,10000150257038,10000153135946,10000150256738,10000150256522,10000150256472,10000150256946%27,%20%2710000150305832,10000150305248,10000150304821,10000150304678,10000150305208,10000150304959,10000150304719,10000150304753,10000150305307,10000150305362,10000150304868,10000150305011,10000150304913,10000150305062%27,%20%2710000150305832,10000150305248,10000150304821,10000150304678,10000150305208,10000150304959,10000150304719,10000150304753,10000150305307,10000150305362,10000150304868,10000150305011,10000150304913,10000150305062%27&salesSite=1&stype=ids&reverse=down&includeForecast=true&callback=jsonp_68507647'
    response=requests.get(url,headers=headers)
    user_info = json.loads(json.dumps(response.text))

    remindNum=re.compile('"fire":false,"remindNum":(.*?),').findall(response.text)
    item_id=re.compile('"item_id":"(.*?)"').findall(response.text)
    print(item_id)
    if item_id and remindNum:
    df = pd.DataFrame({'人数': remindNum, 'ID': item_id})
    df=df.set_index('ID')
    df.to_excel(r'C:UsersmgxxDesktop天气2.xlsx')
    print('done')
  • 相关阅读:
    自学Linux Shell2.1-进入shell命令行
    自学Linux Shell1.3-Linux文件系统
    自学Linux Shell1.2-Linux目录结构
    自学Linux Shell1.1-Linux初识
    03 自学Aruba之2.4GHz及5GHz无线信道
    02 自学Aruba之无线频段---ISM频段及UNII频段
    01 自学Aruba之功率单位和相对单位
    1.Zabbix报错信息:It probably means that the systems requires more physical memory.
    自学Aruba5.3.4-Aruba安全认证-有PEFNG 许可证环境的认证配置802.1x
    自学Aruba5.3.3-Aruba安全认证-有PEFNG 许可证环境的认证配置Captive-Portal
  • 原文地址:https://www.cnblogs.com/snackpython/p/10856391.html
Copyright © 2011-2022 走看看