首页 > 编程语言 >python新发地每日菜价提取

python新发地每日菜价提取

时间:2023-07-06 23:34:00浏览次数:40  
标签:info 发地 get python self dic 菜价 data append

import requests
import csv
import time


class price_spider(object):
    def __init__(self):
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)         AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
            "Referer": "http://www.xinfadi.com.cn/priceDetail.html",
            "Cookie": "SHOP_MANAGE=c5dbec72-aa12-49b1-8557-ba67bad6bbb6"}
        self.prodCat=[]
        self.prodPcat=[]
        self.prodName = []
        self.lowPrice = []
        self.avgPrice = []
        self.highPrice = []
        self.specInfo = []
        self.place = []
        self.unitInfo = []
        self.pubDate = []
        self.session = requests.session()

    def get_response(self, url, data):
        response = self.session.post(url, data=data, headers=self.headers)
        dic = response.json()
        return dic

    def parse_data(self, dic):
        price_list = dic['list']
        for test in range(1, len(price_list)):
            info = price_list[test]
            print(info)
            self.prodCat.append(info.get('prodCat'))
            self.prodPcat.append(info.get('prodPcat'))
            self.prodName.append(info.get('prodName'))
            self.lowPrice.append(info.get('lowPrice'))
            self.avgPrice.append(info.get('avgPrice'))
            self.highPrice.append(info.get('highPrice'))
            self.specInfo.append(info.get('specInfo'))
            self.place.append(info.get('place'))
            self.unitInfo.append(info.get('unitInfo'))
            self.pubDate.append(info.get('pubDate'))

    def save_data(self):
        rows = zip(self.prodCat,self.prodPcat,self.prodName, self.lowPrice, self.avgPrice, self.highPrice, self.specInfo, self.place,
                   self.unitInfo, self.pubDate)
        with open('price.csv', 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(["一级分类","二级分类","品名", "最低价", "平均价", "最高价", "规格", "产地", "单位", "发布日期"])
            for row in rows:
                writer.writerow(row)

    def run(self):
        start = time.clock()
        base_url = 'http://www.xinfadi.com.cn/getPriceData.html'
        for i in range(1, 25):
            d = {"limit": 20,"current": f"{i}","pubDateStartTime": "2023/07/03","pubDateEndTime": "2023/07/03"}
            dic = self.get_response(base_url, data=d)
            self.parse_data(dic)
        self.save_data()
        end = time.clock()
        print('Running time: %s Seconds' % (end - start))

price_spider().run()

 

import requestsimport csvimport time

class price_spider(object):    def __init__(self):        self.headers = {            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)         AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",            "Referer": "http://www.xinfadi.com.cn/priceDetail.html",            "Cookie": "SHOP_MANAGE=c5dbec72-aa12-49b1-8557-ba67bad6bbb6"}        self.prodCat=[]        self.prodPcat=[]        self.prodName = []        self.lowPrice = []        self.avgPrice = []        self.highPrice = []        self.specInfo = []        self.place = []        self.unitInfo = []        self.pubDate = []        self.session = requests.session()
    def get_response(self, url, data):        response = self.session.post(url, data=data, headers=self.headers)        dic = response.json()        return dic
    def parse_data(self, dic):        price_list = dic['list']        for test in range(1, len(price_list)):            info = price_list[test]            print(info)            self.prodCat.append(info.get('prodCat'))            self.prodPcat.append(info.get('prodPcat'))            self.prodName.append(info.get('prodName'))            self.lowPrice.append(info.get('lowPrice'))            self.avgPrice.append(info.get('avgPrice'))            self.highPrice.append(info.get('highPrice'))            self.specInfo.append(info.get('specInfo'))            self.place.append(info.get('place'))            self.unitInfo.append(info.get('unitInfo'))            self.pubDate.append(info.get('pubDate'))
    def save_data(self):        rows = zip(self.prodCat,self.prodPcat,self.prodName, self.lowPrice, self.avgPrice, self.highPrice, self.specInfo, self.place,                   self.unitInfo, self.pubDate)        with open('price.csv', 'w', newline='', encoding='utf-8') as f:            writer = csv.writer(f)            writer.writerow(["一级分类","二级分类","品名", "最低价", "平均价", "最高价", "规格", "产地", "单位", "发布日期"])            for row in rows:                writer.writerow(row)
    def run(self):        start = time.clock()        base_url = 'http://www.xinfadi.com.cn/getPriceData.html'        for i in range(1, 25):            d = {"limit": 20,"current": f"{i}","pubDateStartTime": "2023/07/03","pubDateEndTime": "2023/07/03"}            dic = self.get_response(base_url, data=d)            self.parse_data(dic)        self.save_data()        end = time.clock()        print('Running time: %s Seconds' % (end - start))
price_spider().run()

标签:info,发地,get,python,self,dic,菜价,data,append
From: https://www.cnblogs.com/wutanghua/p/17533617.html

相关文章

  • R语言和Python用泊松过程扩展:霍克斯过程Hawkes Processes分析比特币交易数据订单到达
    全文下载链接:http://tecdat.cn/?p=25880 最近我们被客户要求撰写关于泊松过程的研究报告,包括一些图形和统计输出。本文描述了一个模型,该模型解释了交易的聚集到达,并展示了如何将其应用于比特币交易数据。这是很有趣的,原因很多。例如,对于交易来说,能够预测在短期内是否有更多的买......
  • python:导入库、模块失败
    一般发生在程序开始部分:frompymodbus.client.syncimportModbusSerialClientfrompymodbus.payloadimportBinaryPayloadDecoderfrompymodbus.constantsimportEndianfrompymodbus.compatimportiteritemsimporttimeimportthreadingimportjsonfromdeviceimpor......
  • SRGAN图像超分重建算法Python实现(含数据集代码)
    摘要:本文介绍深度学习的SRGAN图像超分重建算法,使用Python以及Pytorch框架实现,包含完整训练、测试代码,以及训练数据集文件。博文介绍图像超分算法的原理,包括生成对抗网络和SRGAN模型原理和实现的代码,同时结合具体内容进行解释说明,完整代码资源文件请转至文末的下载链接。完整......
  • python: using pdfplumber Lib read pdf file
     fromopenpyxlimportWorkbookfromopenpyxl.stylesimportPatternFill,Side,Borderimportpdfplumberl=[]defvisitDir(path):ifnotos.path.isdir(path):print('Error:"',path,'"isnotadirectoryordoesnotexi......
  • python列表(一)
    列表由一系列按特定顺序排列的元素组成。bicycles=['trek','cannondale','redline','specialized']print(bicycles)1.访问列表元素#索引print(bicycles[0])#最后一个元素print(bicycles[-1])#倒数第二个元素print(bicycles[-2])2.修改、添加和删除元素2.1......
  • python基础day39 生产者消费者模型和线程相关
    如何查看进程的id号进程都有几个属性:进程名、进程id号(pid--->processid)每个进程都有一个唯一的id号,通过这个id号就能找到这个进程importosimporttimedeftask():print("task中的子进程号:",os.getpid())print("主进程中的进程号:",os.getppid())#parent......
  • 多线程python
    如何开启进程使用的是内置的模块:multiprocessfrommultiprocessingimportProcessdeftask():withopen('a.txt','w',encoding="utf8")asf:f.write('helloworld')#开一个进程来执行task这个任务#如何开进程"""在Wind......
  • Logistic回归模型,python
    代码参考https://blog.csdn.net/DL11007/article/details/129204192?ops_request_misc=&request_id=&biz_id=102&utm_term=logistic%E6%A8%A1%E5%9E%8Bpython&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduweb~default-1-129204192.142^v......
  • Python中标准输入(stdin)、标准输出(stdout)、标准错误(stdout)的用法
    1.标准输入input()、raw_input()Python3.x中input()函数可以实现提示输入,python2.x中要使用raw_input(),例如:foo=input("Enter:")#python2.x要用raw_input()print("Youinput:[%s]"%(foo))#测试执行Enter:abcdeYouinput:[abcde]#读取一行(不......
  • Python中os.system()、subprocess.run()、call()、check_output()的用法
    1.os.system()os.system()是对C语言中system()系统函数的封装,允许执行一条命令,并返回退出码(exitcode),命令输出的内容会直接打印到屏幕上,无法直接获取。示例:#test.pyimportosos.system("ls-l|greptest")#允许管道符#测试执行$ll<=======......