4.30

标签：10 plt df values print ax 4.30

一实验目的

l 使学生熟练安装扩展库numpy、requests、bs4、pandas、seaborn、matplotlib等；

l 使学生熟悉使用标准库cvs操作文件；

l 使学生熟悉使用pandas进行数据分析的基本操作；

l 使学生了解使用seaborn绘制热力图的方法；

l 使学生熟练使用matplotlib进行数据可视化；

l 使学生熟练使用nmupy进行科学计算；

l 使学生熟练运用requests库和bs4库进行基本的数据爬取

二实验环境及实验准备

l 所需硬件环境为微机；

l 所需软件环境为Python 3.X等；

l 掌握Python下numpy、requests、bs4、pandas、seaborn、matplotlib、cvs等的使用；

三实验内容

（一）、中国大学排名数据分析与可视化；（写到实验报告中）

【源代码程序】

import requests

from bs4 import BeautifulSoup

import pandas as pd

import matplotlib.pyplot as plt



plt.rcParams['font.sans-serif'] = ['SimHei']  # 设置中文字体为SimHei

plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题



def fetch_rankings(year):

    # 假设有一个函数可以从给定年份构建URL

    url = f"https://www.shanghairanking.cn/rankings/bcur/{year}"

    response = requests.get(url)



    if response.status_code != 200:

        print(f"无法获取{year}年的数据，请检查URL是否正确或网络连接。")

        return None



    soup = BeautifulSoup(response.text, 'html.parser')

    # 根据实际HTML结构解析数据，这里仅作示例

    # 请根据实际网页源码调整选择器

    university_rows = soup.select('table tr')  # 假设排名数据在表格中



    rankings = []

    for row in university_rows[1:11]:  # 前10位

        columns = row.find_all('td')

        rank = columns[0].text.strip()

        name = columns[1].text.strip()

        score = columns[2].text.strip()

        rankings.append({'Year': year, 'Rank': rank, 'Name': name, 'Score': score})



    return pd.DataFrame(rankings)





years = list(range(2015, 2020))  # 需要爬取的年份范围

all_rankings = []



for year in years:

    ranking_data = fetch_rankings(year)

    if ranking_data is not None:

        all_rankings.append(ranking_data)



df_rankings = pd.concat(all_rankings, ignore_index=True)

print(df_rankings.head(10))  # 打印前10条记录作为示例









def visualize_rankings(df):

    # 数据清洗，确保'Rank'列是数值类型

    df['Rank'] = df['Rank'].astype(int)

    plt.rcParams['font.sans-serif'] = ['SimHei']  # 用SimHei字体显示中文

    plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

    # 绘制排名变化图

    for name in df['Name'].unique()[:10]:  # 假设只关注前10位大学

        subset = df[df['Name'] == name]

        plt.plot(subset['Year'], subset['Rank'], label=name, marker='o')



    plt.title('Top 10 Universities Ranking Trend (2015-2019)')

    plt.xlabel('Year')

    plt.ylabel('Rank')



    # 设置图例的位置和大小

    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)



    plt.show()

visualize_rankings(df_rankings)



def query_university_rank(df):

    while True:

        university_name = input("请输入大学名称（输入'q'退出）：")

        if university_name.lower() == 'q':

            break



        year = input("请输入年份：")

        try:

            year = int(year)

            result = df[(df['Name'] == university_name) & (df['Year'] == year)]

            if result.empty:

                print(f"{university_name}在{year}年的排名信息未找到。")

                continue

            else:

                print(result)

        except ValueError:

            print("年份输入无效，请输入数字。")





query_university_rank(df_rankings)

【运行测试】

（二）、豆瓣图书评论数据分析与可视化；（写到实验报告中）

【源代码程序】

import re
from collections import Counter

import requests
from lxml import etree
import pandas as pd
import jieba
import matplotlib.pyplot as plt
from wordcloud import WordCloud

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39"
}

comments = []
words = []

def regex_change(line):
    # 前缀的正则
    username_regex = re.compile(r"^\d+::")
    # URL，为了防止对中文的过滤，所以使用[a-zA-Z0-9]而不是\w
    url_regex = re.compile(r"""
        (https?://)?
        ([a-zA-Z0-9]+)
        (\.[a-zA-Z0-9]+)
        (\.[a-zA-Z0-9]+)*
        (/[a-zA-Z0-9]+)*
    """, re.VERBOSE | re.IGNORECASE)
    # 剔除日期
    data_regex = re.compile(u"""        #utf-8编码
        年 |
        月 |
        日 |
        (周一) |
        (周二) |
        (周三) |
        (周四) |
        (周五) |
        (周六)
    """, re.VERBOSE)
    # 剔除所有数字
    decimal_regex = re.compile(r"[^a-zA-Z]\d+")
    # 剔除空格
    space_regex = re.compile(r"\s+")
    regEx = "[\n”“|,，；;''[表情]! 。的了是]"  # 去除字符串中的换行符、中文冒号、|，需要去除什么字符就在里面写什么字符
    line = re.sub(regEx, "", line)
    line = username_regex.sub(r"", line)
    line = url_regex.sub(r"", line)
    line = data_regex.sub(r"", line)
    line = decimal_regex.sub(r"", line)
    line = space_regex.sub(r"", line)
    return line

def getComments(url):
    score = 0
    resp = requests.get(url, headers=headers).text
    html = etree.HTML(resp)
    comment_list = html.xpath(".//div[@class='comment']")
    for comment in comment_list:
        status = ""
        name = comment.xpath(".//span[@class='comment-info']/a/text()")[0]  # 用户名
        content = comment.xpath(".//p[@class='comment-content']/span[@class='short']/text()")[0]  # 短评内容
        content = str(content).strip()
        word = jieba.cut(content, cut_all=False, HMM=False)
        time = comment.xpath(".//span[@class='comment-info']/a/text()")[1]  # 评论时间
        mark = comment.xpath(".//span[@class='comment-info']/span/@title")  # 评分
        if len(mark) == 0:
            score = 0
        else:
            for i in mark:
                status = str(i)
            if status == "力荐":
                score = 5
            elif status == "推荐":
                score = 4
            elif status == "还行":
                score = 3
            elif status == "较差":
                score = 2
            elif status == "很差":
                score = 1
        good = comment.xpath(".//span[@class='comment-vote']/span[@class='vote-count']/text()")[0]  # 点赞数（有用数）
        comments.append([str(name), content, str(time), score, int(good)])
        for i in word:
            if len(regex_change(i)) >= 2:
                words.append(regex_change(i))

def getWordCloud(words):
    # 生成词云
    all_words = []
    all_words += [word for word in words]
    dict_words = dict(Counter(all_words))
    bow_words = sorted(dict_words.items(), key=lambda d: d[1], reverse=True)
    print("热词前10位：")
    for i in range(10):
        print(bow_words[i])
    text = ' '.join(words)

    w = WordCloud(background_color='white',
                     width=1000,
                     height=700,
                     font_path='simhei.ttf',
                     margin=10).generate(text)
    plt.show()
    plt.imshow(w)
    w.to_file('wordcloud.png')

print("请选择以下选项:")
print("   1.热门评论")
print("   2.最新评论")
info = int(input())
print("前10位短评信息：")
title = ['用户名', '短评内容', '评论时间', '评分', '点赞数']
if info == 1:
    comments = []
    words = []
    for i in range(0, 60, 20):
        url = "https://book.douban.com/subject/10517238/comments/?start={}&limit=20&status=P&sort=new_score".format(
            i)  # 前3页短评信息（热门）
        getComments(url)
    df = pd.DataFrame(comments, columns=title)
    print(df.head(10))
    print("点赞数前10位的短评信息：")
    df = df.sort_values(by='点赞数', ascending=False)
    print(df.head(10))
    getWordCloud(words)
elif info == 2:
    comments = []
    words=[]
    for i in range(0, 60, 20):
        url = "https://book.douban.com/subject/10517238/comments/?start={}&limit=20&status=P&sort=time".format(
            i)  # 前3页短评信息（最新）
        getComments(url)
    df = pd.DataFrame(comments, columns=title)
    print(df.head(10))
    print("点赞数前10位的短评信息：")
    df = df.sort_values(by='点赞数', ascending=False)
    print(df.head(10))
    getWordCloud(words)

【运行测试】

（三）、函数图形1绘制；（写到实验报告中）

【源代码程序】

import numpy as np

import matplotlib.pyplot as plt



# 定义函数

def y1(x):

    return x**2



def y2(x):

    return np.cos(2*x)



def y3(x):

    return y1(x)*y2(x)



# 创建x值范围

x_values = np.linspace(0, 10, 1000)



# （1）在同一坐标系下用不同的颜色和线型绘制 y1、y2 和 y3 三条曲线

fig, ax = plt.subplots()



ax.plot(x_values, y1(x_values), color='blue', linewidth=2, label='y1=x^2')

ax.plot(x_values, y2(x_values), color='red', linewidth=2, label='y2=cos(2x)')

ax.plot(x_values, y3(x_values), color='green', linewidth=2, label='y3=y1*y2')



ax.set_xlabel('x-axis')

ax.set_ylabel('y-axis')

ax.set_title('Function Graphs')



ax.legend()



plt.show()



# （2）在同一绘图框内以子图形式绘制 y1、y2 和 y3 三条曲线

fig, axs = plt.subplots(nrows=3, ncols=1, sharex=True)



axs[0].plot(x_values, y1(x_values), color='blue', linewidth=2, label='y1=x^2')

axs[0].set_title('y1=x^2')



axs[1].plot(x_values, y2(x_values), color='red', linewidth=2, label='y2=cos(2x)')

axs[1].set_title('y2=cos(2x)')



axs[2].plot(x_values, y3(x_values), color='green', linewidth=2, label='y3=y1*y2')

axs[2].set_title('y3=y1*y2')



for ax in axs.flat:

    ax.set_yticks(np.arange(-1, 11, 1))



for ax in axs.flat:

    ax.label_outer()



plt.show()

【运行测试】

（四）、函数图形2绘制；（写到实验报告中）

【源代码程序】

import numpy as np

import matplotlib.pyplot as plt



# 定义函数

def y(x):

    return np.where(x >= 0, np.sqrt(2 * x**2 - x**2), -2.14 * np.sqrt(2 - np.abs(x)))



# 创建x值范围

x_values = np.linspace(-2, 2, 1000)



# 绘制函数图像

fig, ax = plt.subplots()



ax.fill_between(x_values, y(x_values), color='blue', alpha=0.5)



ax.set_xlabel('x-axis')

ax.set_ylabel('y-axis')

ax.set_title('Function Graph')



plt.show()

标签：10,plt,df,values,print,ax,4.30
From： https://www.cnblogs.com/szm123/p/18256978

相关文章

赞助商

阅读排行