测试链接:
https://nanjing.8684.cn/line4


思路:先抓取某个类型下的某一条线路所有数据,然后实现批量,,列举出三个类型代表既可
源码:
 from lxml import etree
from xlutils.copy import copy
import requests, os, xlrd, xlwtdef get_all():# 获取所有tynm_list = ['江南线路(1-399)', '江北线路(400-699)', '江宁线路(700-999)']tyid_list = [2, 3, 4]for tynm, tyid in zip(tynm_list, tyid_list):list_url = 'https://nanjing.8684.cn/line{}'.format(tyid)headers = {"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7","Accept-Language": "zh-CN,zh;q=0.9","Cache-Control": "no-cache","Connection": "keep-alive","Pragma": "no-cache","Sec-Fetch-Dest": "document","Sec-Fetch-Mode": "navigate","Sec-Fetch-Site": "none","Sec-Fetch-User": "?1","Upgrade-Insecure-Requests": "1","User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36","sec-ch-ua-mobile": "?0"}cookies = {"JSESSIONID": "48304F9E8D55A9F2F8ACC14B7EC5A02D","wbf__voiceplg-is": "false","tongue": "1","Hm_lvt_c31f95cd1f1c01c74f44d211939ceb8c": "1712659199","__gads": "ID","__gpi": "UID","__eoi": "ID","SECKEY_ABVK": "2DPSFBW+PxohRgE9br/PasK97Oo+bbbcKQgQu9uxadc%3D","BMAP_SECKEY": "XCSGTS0HVG9MJBd7qjmcuIneKymhvMCOXLg1JoYhcHTYNyZi_ZD1PkQ8wHX0_ycxbyF1QTuQWF68O-J3hMNYeSVrLdplIVuNxTyW1OaKt18bXNTDHrBSmsZ7DEMwNaY3o1qfZ-Gy932UGgUlRkYaQLMujMyT2eGMlHUKElpXgb3WIdgV2i4dGkFfMutvbhUKyxkjaWZMOhimPI5uGe2Zow","Hm_lpvt_c31f95cd1f1c01c74f44d211939ceb8c": "1712671763"}response = requests.get(list_url, headers=headers, cookies=cookies).content.decode()# print(response)parse_all(response, tynm)def parse_all(response, tynm):# 解析所有的线路IDA = etree.HTML(response)a_list = A.xpath('//div[@class="list clearfix"]/a')for a in a_list:xlid = a.xpath('./@href')[0]get_one(xlid, tynm)def get_one(xlid, tynm):# 某一条线路one_url = 'https://nanjing.8684.cn{}'.format(xlid)headers = {"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7","Accept-Language": "zh-CN,zh;q=0.9","Cache-Control": "no-cache","Connection": "keep-alive","Pragma": "no-cache","Sec-Fetch-Dest": "document","Sec-Fetch-Mode": "navigate","Sec-Fetch-Site": "none","Sec-Fetch-User": "?1","Upgrade-Insecure-Requests": "1","User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36","sec-ch-ua-mobile": "?0"}cookies = {"JSESSIONID": "48304F9E8D55A9F2F8ACC14B7EC5A02D","wbf__voiceplg-is": "false","tongue": "1","Hm_lvt_c31f95cd1f1c01c74f44d211939ceb8c": "1712659199","__gads": "ID","__gpi": "UID","__eoi": "ID","Hm_lpvt_c31f95cd1f1c01c74f44d211939ceb8c": "1712667896","SECKEY_ABVK": "2DPSFBW+PxohRgE9br/PahPpT7wKZzGpOzUoWKrHE14%3D","BMAP_SECKEY": "XCSGTS0HVG9MJBd7qjmcuNCVpgwunmx3HOykd-nz4D-iFhbshz31f4mcmp3_W2DuaWoxnWstpA8--nKAgM_oHpmeq9I4YTbb3qlNksDhm1p8aAgMLY_JmRsPghK_5Cz-OHHnXHh16-fsX6GY9TW5yRhSOnFDrBnVc4V5LysnCzkEjrJ4OArZaTA6rA9Gid8tLBOeKUHh-nAGPdfN_KgAnw"}response = requests.get(one_url, headers=headers, cookies=cookies).content.decode()# print(response)parse_one(response, tynm)def parse_one(response, tynm):# 解析某一条线路A = etree.HTML(response)# 线路名称xlmc = A.xpath('//h1[@class="title"]/span/text()')xlmc = ''.join(xlmc)# 线路类型xllx = A.xpath('//h1[@class="title"]/a/text()')xllx = ''.join(xllx)[1:-1]# 运行时间yxsj = A.xpath('//ul[@class="bus-desc"]/li[1]/text()')yxsj = ''.join(yxsj).split('时间:')[-1]# 参考票价ckpj = A.xpath('//ul[@class="bus-desc"]/li[2]/text()')ckpj = ''.join(ckpj).split('票价:')[-1]# 公交公司gjgs = A.xpath('//ul[@class="bus-desc"]/li[3]/a/text()')gjgs = ''.join(gjgs)# 最后更新zhgx = A.xpath('//ul[@class="bus-desc"]/li[4]/span/text()')zhgx = ''.join(zhgx).split('更新:')[-1]# 公交路线-正向gjxl_zx = A.xpath('//div[@class="service-area"]/div[@class="bus-lzlist mb15"][1]/ol/li/a/text()')gjxl_zx = '/'.join(gjxl_zx)# 公交路线-反向gjxl_fx = A.xpath('//div[@class="service-area"]/div[@class="bus-lzlist mb15"][2]/ol/li/a/text()')gjxl_fx = '/'.join(gjxl_fx)data = {tynm: [xlmc, xllx, yxsj, ckpj, gjgs, zhgx, gjxl_zx, gjxl_fx]}save_data(data, tynm, xlmc)def save_data(data, tynm, xlmc):# 保存数据sheet_name = tynmif not os.path.exists(r'公交线路数据.xls'):wb = xlwt.Workbook(encoding='utf-8')sheet = wb.add_sheet(sheet_name, cell_overwrite_ok=True)header = ('线路名称', '线路类型', '运行时间', '参考票价', '公交公司', '最后更新', '公交路线-正向', '公交路线-反向')for i in range(0, len(header)):sheet.col(i).width = 2560 * 3sheet.write(0, i, header[i])wb.save(r'./公交线路数据.xls')wb = xlrd.open_workbook(r'公交线路数据.xls')sheets_list = wb.sheet_names()if sheet_name not in sheets_list:work = copy(wb)sh = work.add_sheet(sheet_name)header_new = ('线路名称', '线路类型', '运行时间', '参考票价', '公交公司', '最后更新', '公交路线-正向', '公交路线-反向')for index in range(0, len(header_new)):sh.col(index).width = 2560 * 3sh.write(0, index, header_new[index])work.save(r'./公交线路数据.xls')if os.path.exists(r'公交线路数据.xls'):wb = xlrd.open_workbook(r'公交线路数据.xls')sheets = wb.sheet_names()for i in range(len(sheets)):for name in data.keys():worksheet = wb.sheet_by_name(sheets[i])if worksheet.name == name:rows_old = worksheet.nrowsnew_workbook = copy(wb)new_worksheet = new_workbook.get_sheet(i)for num in range(0, len(data[name])):new_worksheet.write(rows_old, num, data[name][num])new_workbook.save(r'./公交线路数据.xls')print(r'***ok: 公交线路数据: {} - {}'.format(tynm, xlmc))if __name__ == '__main__':get_all()y
 
 
运行效果:
1)running中:
 2) ending:
2) ending:

