首页 > 其他分享 >实验二

实验二

时间:2023-10-18 23:13:19浏览次数:38  
标签:err self cursor 实验 print data conn

一、实验二

作业①:

要求:在中国气象网(http://www.weather.com.cn)给定城市集的7日天气预报,并保存在数据库。

输出信息

序号 地区 日期 天气信息 温度
1 北京 7日(今天) 多云转晴 31℃/17℃
2 北京 8日(明天) 34℃/20℃
3 北京 9日(后天) 多云转晴 32℃/19℃
4 北京 10日(周六) 阴转阵雨 34℃/18℃
5 北京 11日(周末) 阵雨 29℃/17℃

Gitee链接:作业二链接

from bs4 import BeautifulSoup
from bs4.dammit import UnicodeDammit
import urllib.request
import sqlite3

class WeatherDB:
    def __init__(self):
        self.cursor = None
        self.con = None

    def openDB(self):
        self.con = sqlite3.connect("weathers.db")
        self.cursor = self.con.cursor()
        try:
            self.cursor.execute(
                "create table weathers (wCity varchar(16),"
                "wDate varchar(16),"
                "wWeather varchar(64),"
                "wTemp varchar(32),"
                "constraint pk_weather primary key (wCity,wDate))")
        except Exception as err:
            print(err)
            self.cursor.execute("delete from weathers")

    def closeDB(self):
        self.con.commit()
        self.con.close()

    def insert(self, city, date, weather, temp):
        try:
            self.cursor.execute("insert into weathers (wCity,wDate,wWeather,wTemp) values (?,?,?,?)",
                                (city, date, weather, temp))
        except Exception as err:
            print(err)

    def show(self):
        self.cursor.execute("select * from weathers")
        rows = self.cursor.fetchall()
        print("%-16s%-16s%-32s%-16s" % ("city", "date", "weather", "temp"))
        for row in rows:
            print("%-16s%-16s%-32s%-16s" % (row[0], row[1], row[2], row[3]))


class WeatherForecast:
    def __init__(self):
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) "
                          "Gecko/2008072421 Minefield/3.0.2pre"}
        self.cityCode = {"北京": "101010100", "上海": "101020100", "广州": "101280101", "深圳": "101280601"}

    def forecastCity(self, city):
        if city not in self.cityCode.keys():
            print(city + " 找不到代码")
            return
        url = "http://www.weather.com.cn/weather/" + self.cityCode[city] + ".shtml"
        try:
            req = urllib.request.Request(url, headers=self.headers)
            data = urllib.request.urlopen(req)
            data = data.read()
            dammit = UnicodeDammit(data, ["utf-8", "gbk"])
            data = dammit.unicode_markup
            soup = BeautifulSoup(data, "lxml")
            lis = soup.select("ul[class='t clearfix'] li")
            x = 0
            for li in lis:
                try:
                    date = li.select('h1')[0].text
                    weather = li.select('p[class="wea"]')[0].text
                    if x == 0:  # 为今天只有一个温度做判断 <i>14℃</i>
                        x += 1
                        temp = li.select('p[class="tem"] i')[0].text
                    else:
                        temp = li.select('p[class="tem"] span')[0].text + "/" + li.select('p[class="tem"] i')[0].text
                    print(city, date, weather, temp)
                    self.db.insert(city, date, weather, temp)
                except Exception as err:
                    print(err)
        except Exception as err:
            print(err)

    def process(self, cities):
        self.db = WeatherDB()
        self.db.openDB()

        for city in cities:
            self.forecastCity(city)

        self.db.show()
        self.db.closeDB()


ws = WeatherForecast()
ws.process(["北京", "上海", "广州", "深圳"])
print("completed")

心得体会:

学习了解了sqlite3,加深了对bs4的理解

作业②:

要求:用 requests 和 BeautifulSoup 库方法定向爬取股票相关信息,并存储在数据库中。

候选网站:东方财富网:https://www.eastmoney.com/

新浪股票:http://finance.sina.com.cn/stock/

技巧:在谷歌浏览器中进入 F12 调试模式进行抓包,查找股票列表加载使用的 url,并分析 api 返回的值,并根据所要求的参数可适当更改api 的请求参数。根据 URL 可观察请求的参数 f1、f2 可获取不同的数值,根据情况可删减请求的参数。

参考链接:https://zhuanlan.zhihu.com/p/50099084

Gitee链接:作业二链接

输出:

import requests
import json
import pandas as pd
import pymysql


HOSTNAME = '127.0.0.1'
PORT = '3306'
DATABASE = 'spider_ex' 
USERNAME = 'root'
PASSWORD = '1234'
# 打开数据库连接
conn = pymysql.connect(host=HOSTNAME, user=USERNAME,password=PASSWORD,database=DATABASE,charset='utf8')
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = conn.cursor()

sql_table = """
CREATE TABLE ex2_2 (
    代码 VARCHAR(10),
    名称 VARCHAR(50),
    最新价 FLOAT,
    涨跌幅 FLOAT,
    涨跌额 FLOAT,
    成交量 BIGINT,
    成交额 FLOAT,
    振幅 FLOAT,
    最高 FLOAT,
    最低 FLOAT,
    今开 FLOAT,
    昨收 FLOAT
)
"""

try:
    cursor.execute(sql_table)
    conn.commit()
    print("表创建成功")
except Exception as err:
    conn.rollback()
    print("表创建失败", err)

def InsertData(data):
    global conn
    global cursor
    sql = "INSERT INTO ex2_2(代码, 名称, 最新价, 涨跌幅, 涨跌额, 成交量, 成交额, 振幅, 最高, 最低, 今开, 昨收) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
    try:
        values = (
            data['代码'],
            data['名称'],
            data['最新价'],
            data['涨跌幅'],
            data['涨跌额'],
            data['成交量'],
            data['成交额'],
            data['振幅'],
            data['最高'],
            data['最低'],
            data['今开'],
            data['昨收'],
        )
        cursor.execute(sql, values)
        print("插入成功")
    except Exception as err:
        print("插入失败",err)

def getHtml(page):
    url = f"http://49.push2.eastmoney.com/api/qt/clist/get?"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.36"}
    params = (
    ('cb', 'jQuery112405254755655209056_1696661743317'),
    ('pn', '%d' %page),  # 第几页
    ('pz', '20'),
    ('po', '1'),
    ('np', '1'),
    ('ut', 'bd1d9ddb04089700cf9c27f6f7426281'),
    ('fltt', '2'),
    ('invt', '2'),
    ('wbp2u', '|0|0|0|web'),
    ('fid', 'f3'),
    ('fs', 'm:1 t:2,m:1 t:23'),# 板块
    ('fields', 'f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152'),
    ('_', '1696661743318'),
    )
    resp = requests.get(url, headers=headers,params=params, verify=False)
    res = resp.content[len("jQuery112405254755655209056_1696661743317("):len(resp.content) - 2]
    res = str(res, 'utf-8')
    resp_json = json.loads(res)

    data_list = []  # 创建一个列表来存储提取的数据

    for k in resp_json['data']['diff']:
        data = {
            "代码": k['f12'],
            "名称": k['f14'],
            "最新价": k['f2'],
            "涨跌幅": k['f3'],
            "涨跌额": k['f4'],
            "成交量": k['f5'],
            "成交额": k['f6'],
            "振幅": k['f7'],
            "最高": k['f15'],
            "最低": k['f16'],
            "今开": k['f17'],
            "昨收": k['f18'],
        }
        InsertData(data)
        conn.commit()

for page_number in range(1, 10):
   getHtml(page_number)

cursor.close()
conn.close()

心得体会:

学习了对接口数据的提取转化成json格式,对接口请求url参数的理解与分析,pymysql数据库的使用等,收获蛮多。

作业③:

要求:中国大学2021主榜

爬取主榜所有院校信息,并存储在数据库中,同时将浏览器F2调试分析的过程录制Gif加入至博客中。技巧:分析该网站的发包情况,分析获取数据的api

Gitee链接:作业二链接

输出结果:

import requests
import json
import pymysql
import pandas as pd
import re

HOSTNAME = '127.0.0.1'
PORT = '3306'
DATABASE = 'spider_ex' 
USERNAME = 'root'
PASSWORD = '1234'
# 打开数据库连接
conn = pymysql.connect(host=HOSTNAME, user=USERNAME,password=PASSWORD,database=DATABASE,charset='utf8')
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = conn.cursor()

sql_table = """
CREATE TABLE if not exists ex2_3  (
    排名 INT AUTO_INCREMENT PRIMARY KEY,
    名称 VARCHAR(50) ,
    省份 VARCHAR(32),
    类型 VARCHAR(32),
    总分 VARCHAR(32)
) AUTO_INCREMENT=1
"""
try:
    cursor.execute(sql_table)
    conn.commit()
    print("表创建成功")
except Exception as err:
    conn.rollback()
    print("表创建失败", err)

def InsertData(data):
    global conn
    global cursor
    sql = "INSERT INTO ex2_3(名称 ,省份 ,类型 ,总分) VALUES (%s, %s, %s, %s)"
    try:
        values = (
            data["名称"],
            data["省份"],
            data["类型"],
            data["总分"],
        )
        cursor.execute(sql, values)
        print("插入成功")
    except Exception as err:
        print("插入失败",err)

def getHtml():

    url = "https://www.shanghairanking.cn/_nuxt/static/1697106492/rankings/bcur/2021/payload.js"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.36"
    }
    resp = requests.get(url=url, headers=headers)
    res = resp.content.decode('utf-8')
    univNameCn = re.findall('univNameCn:"(.*?)"', res, re.S) 
    province = re.findall('province:(.*?),', res, re.S) 
    univCategory = re.findall('univCategory:(.*?),', res, re.S) 
    score = re.findall('score:(.*?),', res, re.S)
    data = []
    for i in range(len(univNameCn)):
        province1 = province[i]
        univCategory1 = univCategory[i]
        province_name = p.get(province1, province1)
        category_name = c.get(univCategory1, univCategory1)
        data ={
            "名称": univNameCn[i],
            "省份": province_name,
            "类型": category_name,
            "总分": score[i],
        }
        InsertData(data)
        conn.commit()

getHtml()

cursor.close()
conn.close()

心得体会:

更熟悉了如何通过F12获取对应数据,数据的转换提取
加深了对正则表达式的使用理解,挺万金油的。

标签:err,self,cursor,实验,print,data,conn
From: https://www.cnblogs.com/hiuboom/p/17773627.html

相关文章

  • 实验2 C语言分支与循环基础应用编程
    实验任务1 #include<stdio.h>#include<stdlib.h>#include<time.h>#defineN5#defineN1374#defineN2465intmain(){intnumber;inti;srand(time(0));for(i=0;i<N;++i){number=rand()%(N2-N......
  • 钉钉圈子群用于实验室辅助管理
    其实前面一直用企业微信管理实验室的,疫情期间企业微信未认证的人数上限是500,也差不多够用了。上半年开始企业微信开始弹认证提醒了,提示人数不能超100,然后就发现很多新加的学生无法发送信息了,审批功能倒还是能用,就是不能聊天。微信群、freeflarum免费论坛、兔小巢、QQ频道,学生都没......
  • 实验1 类和对象_基础编程1
    实验任务1task1.cpp1//标准库string,vector,array基础用法23#include<iostream>4#include<string>5#include<vector>6#include<array>78//函数模板9//对满足特定条件的序列类型T对象,使用范围for输出10template<typenameT>11v......
  • 实验二 Linux命令使用(二)
    实验内容及步骤:(1)进入家目录,创建自己的子目录,进入该子目录,运行date>file1,然后运行catfile1,看到什么信息?“>”是什么符号?答:输出重定向操作符解释“date>file1”的含义:答:将当前日期和时间写入名为file1的文件中。(2)运行mandate>>file1,再运行catfile1,看到什么?mandat......
  • OSPF不同网络类型建立邻居实验
    个人名片:......
  • 【OSPF宣告——network命令与多区域配置实验案例】
    个人名片:......
  • 实验2
    实验1 源代码1#include<stdio.h>2#include<stdlib.h>3#include<time.h>4#defineN55#defineN13746#defineN24657intmain()8{9intnumber;10inti;1112srand(time(0));1314for(i=0;i<N;++i)15{......
  • 实验1 类和对象
     task1.cpp#include<iostream>#include<string>#include<vector>#include<array>template<typenameT>voidoutput1(constT&obj){for(autoi:obj)std::cout<<i<<",";std::cout<<"......
  • 实验1
    试验任务1task1.cpp源码//标准库string,vector,array基础用法#include<iostream>#include<string>#include<vector>#include<array>//函数模板//对满足特定条件的序列类型T对象,使用范围for输出template<typenameT>voidoutput1(constT&obj){for......
  • 实验2
    实验任务1#include<stdio.h>#include<stdlib.h>#include<time.h>#defineN5#defineN1374#defineN2465intmain(){intnumber;inti;srand(time(0));for(i=0;i<N;++i){number=rand()%(N2-N1+1)+N1;printf("202......