zoukankan      html  css  js  c++  java
  • 一个抓取智联招聘数据并存入表格的python爬虫

      talk is cheap...show you the code.....

     1 import requests
     2 import lxml,time,os
     3 from bs4 import BeautifulSoup as sb
     4 from xlwt import *
     5 import sys
     6 reload(sys)
     7 sys.setdefaultencoding('utf-8')
     8 print sys.getdefaultencoding()
     9 
    10 book = Workbook(encoding = "utf-8")
    11 table = book.add_sheet("test1")
    12 table.write(0,0,'number')
    13 table.write(0,1,'position')
    14 table.write(0,2,'feedback')
    15 table.write(0,3,'company')
    16 table.write(0,4,'salary')
    17 table.write(0,5,'address ')
    18 table.write(0,6,"updatetime")
    19 table.write(0,7,"details")
    20 headers = {"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36"}
    21 
    22 for num in range(90):
    23     url = 'http://sou.zhaopin.com/jobs/searchresult.ashx?jl=%E4%B8%9C%E8%8E%9E&p='+str(num)
    24     print url
    25     res = requests.get(url,headers = headers)
    26     html = sb(res.text,'lxml')
    27     zwmc = html.find_all('td',class_="zwmc")
    28     fk_lv = html.find_all('td',class_="fk_lv")
    29     gsmc = html.find_all('td',class_="gsmc")
    30     zwyx = html.find_all('td',class_="zwyx")
    31     gzdd = html.find_all('td',class_="gzdd")
    32     gxsj = html.find_all('td',class_="gxsj")
    33     details = html.find_all('li',class_="newlist_deatil_last")
    34     row = num*len(zwmc)
    35     for i in range(1,len(zwmc)):
    36         print zwmc[i].text.strip()+"---"+fk_lv[i].text.strip()+"---"+ gsmc[i].text.strip()+"---"+ zwyx[i].text.strip()+"---"+ gzdd[i].text.strip()+"---"+gxsj[i].text.strip()
    37         table.write(row+i,0,row+i)
    38         table.write(row+i,1,zwmc[i].text.strip())
    39         table.write(row+i,2,fk_lv[i].text.strip())
    40         table.write(row+i,3,gsmc[i].text.strip())
    41         table.write(row+i,4,zwyx[i].text.strip())
    42         table.write(row+i,5,gzdd[i].text.strip())
    43         table.write(row+i,6,gxsj[i].text.strip())
    44         table.write(row+i,7,details[i].text.strip())
    45 book.save('result.xls')
  • 相关阅读:
    golang的reflect
    minium-介绍
    selenium+Node.js在windows下的配置和安装
    Jmeter-逻辑控制器之Switch控制器(Switch Controller)
    Jmeter-逻辑控制器之Foreach
    chrome插件-YSlow 一个使用的web性能测试插件
    jmeter
    Jmeter-从数据库中获取数据并作为变量传输
    Jmeter-无法启动,'findstr'不是内部或外部命令,也不是可运行的程序
    Jmeter-响应结果unicode转成中文显示
  • 原文地址:https://www.cnblogs.com/peter1994/p/7376380.html
Copyright © 2011-2022 走看看