导包
import yfinance as yf
import pandas as pd
import numpy as np
1.描述性统计的汇总与计算
#1.描述性统计的汇总与计算
#数值型数据
df=pd.DataFrame([[1.4,np.nan],[7.1,-4.5],[np.nan,np.nan],[0.75,-1.3]],
index=["a","b","c","d"],
columns=["one","two"])
#print(df.sum())#输出列之和的Series
#print(df.sum(axis=1))#输出行之和的Series
#print(df.sum(axis=1,skipna=False))#不跳过NA的计算,即有NA则和直接返回NA
#print(df.mean(axis=1))#算平均数,要求至少有一个非NA,此时计算的平均数是剩余非NA数的平均值
#print(df.idxmax())#最大值索引
#print(df.cumsum())#按列逐行累计加和
#print(df.describe())#生成多个汇总统计
#print("---------------------")
#非数值型
obj=pd.Series(["a","a","b","c"]*4)
#print(obj.describe())
2.相关系数与协方差
#2.相关系数与协方差
symbols=['AAPL','GOOG','IBM','MSFT']
#data=yf.download(symbols,"2016-10-17","2016-10-21")#下载历史五日数据,要tz,先留着(
3.唯一值,计数和成员属性
#3.唯一值,计数和成员属性
obj2=pd.Series(["c","a","d","a","a","b","b","c","c"])
uniques=obj2.unique()#生成唯一值的数组形式
#print(uniques)#数值型可用sort排序
#print(obj2.value_counts())#统计频次,按频次降序排列生成Series
#print(pd.Series(obj2.to_numpy()).value_counts())#统计频次,按频次降序排列生成Numpy
#print(obj2[obj2.isin(["b","c"])])#过滤指定项子集
vals=pd.Series(["c","b","a","d"])
#print(pd.Index(vals).get_indexer(obj2))#Index.get_indexer方法,输出vals规定的索引array
data2=pd.DataFrame({"Qu1":(1,3,4,3,4),
"Qu2":(2,3,1,2,3),
"Qu3":(1,5,2,4,4)})
#print(data2["Qu1"].value_counts().sort_index())#取一列计数
#print(data2.value_counts())#每行当作元组
标签:obj2,Python,Series,df,描述性,pd,NA,print,Pandas
From: https://blog.csdn.net/real_Tartaglia/article/details/140479210