Pandas 基础
Pandas Series
类似于一维数组,但可以提供索引。
Series 的创建
import pandas as pd
a = ["Google", "Runoob", "Wiki"]
myvar = pd.Series(a, index = ["x", "y", "z"])
print(myvar["y"])
sites = {1 : "Google", 2 : "Runoob", 3 : "Wiki"}
myvar = pd.Series(sites)
print(myvar)
myvar = pd.Series(sites, index = [1, 2])
print(myvar)
import numpy as np
myvar = pd.Series(np.array([1, 2, 3, 4]))
print(myvar)
Series 的基本操作
import pandas as pd
s = pd.Series([12, 15, 13, 11])
# 遍历
for index, value in s.items():
print(f"Index: {index}, Value: {value}")
print(s[1:4])
print(s[:3])
s[4] = 1 # 添加
del s[0] # 删除
s_dropped = s.drop(1) # 返回删除索引1的新series
print(s_dropped)
Series 的基本运算
import pandas as pd
s = pd.Series([12, 15, 13, 11])
print(s * 2)
print(s[s > 12])
import numpy as np
print(np.sqrt(s))
print(s.sum(), s.mean(), s.min(), s.max(), s.std())
Series 的属性和方法
import pandas as pd
s = pd.Series([12, 15, 13, 11], index = ['a', 'b', 'c', 'd'])
print(s.index)
print(s.values)
print(s.describe())
# 获取最大最小值的索引
print(s.idxmax(), s.idxmin())
print(s.shape)
print(s > 2)
print(s.astype('float64'))
Pandas DataFrame
DataFrame 是 Pandas 中的另一个核心数据结构,用于表示二维表格型数据。
DataFrame 的创建
import pandas as pd
import numpy as np
#列表创建
data = [['Google', 10], ['Runoob', 12], ['wiki', 13]]
df = pd.DataFrame(data, columns = ['Sites', 'Age']) # columns是列索引,index是行索引
df['Sites'] = df['Sites'].astype(str)
df['Age'] = df['Age'].astype(float)
print(df)
#字典创建
dict = {'Sites' : ['Google', 'Runoob', 'Wiki'], 'Age' : [10, 12, 13]}
df = pd.DataFrame(dict)
print(df)
#Ndarray创建
ndarray_data = np.array([['Google', 10], ['Runoob', 12], ['Wiki', 13]])
df = pd.DataFrame(ndarray_data, columns = ['Sites', 'Age'])
print(df)
DataFrame 的基本操作
import pandas as pd
data = {
"calories" : [420, 380, 390],
"duration" : [50, 40, 45]
}
df = pd.DataFrame(data)
print(df.loc[0])
print(df.loc[1])
print(df.loc[[0, 1]]) # 返回第一行和第二行
df = pd.DataFrame(data, index = ["day1", "day2", "day3"])
print(df.loc["day2"])
print(df["calories"]) # 查询列
print(df.loc[:, 'calories'])
print(df.iloc[:, 0])
import pandas as pd
data = {
"calories" : [420, 380, 390],
"duration" : [50, 40, 45]
}
df = pd.DataFrame(data)
print(df.shape)
print(df.columns)
print(df.index)
print(df.head())
print(df.tail())
print(df.info())
print(df.describe())
print(df.mean())
print(df.sum())
import pandas as pd
data = {
"calories" : [420, 380, 390],
"duration" : [50, 40, 45]
}
df = pd.DataFrame(data)
df['calories'] = [420, 390, 390] # 修改列
df['NewColumn'] = [100, 200, 300] # 创建新列
print(df)
df.loc[3] = [1, 2, 3] #添加新行
print(df)
new_row = pd.DataFrame([[440, 40, 400]], columns = ['calories', 'duration', 'NewColumn'])
df = pd.concat([df, new_row], ignore_index = True)
print(df)
df_dropped = df.drop('NewColumn', axis = 1) # 删除列
print(df_dropped)
df_dropped = df.drop(3, axis = 0)
print(df_dropped)
print(df[df['calories'] > 400])
import pandas as pd
df1 = pd.DataFrame([[1, 2, 3]], columns = ['columns1', 'columns2', 'columns3'])
df2 = pd.DataFrame([[1, 5, 6]], columns = ['columns1', 'columns2', 'columns3'])
print(pd.concat([df1, df2], ignore_index = True)) # 纵向合并
df1 = pd.DataFrame([[1, 2, 3]], columns = ['columns1', 'columns2', 'columns3'])
df2 = pd.DataFrame([[1, 5, 6]], columns = ['columns1', 'columns5', 'columns6'])
print(pd.merge(df1, df2, on = 'columns1'))
pandas 与 csv
csv 文件的存取
import pandas as pd
import os
FilePath = os.path.abspath('.')
os.chdir(FilePath)
df = pd.read_csv('DataForClassify.csv')
df.to_csv('DFC_cp.csv')
数据处理
print(df.head()) # default = 5
print(df.head(10))
print(df.tail())
print(df.tail(10))
pandas 与 json
读取json文件
import pandas as pd
df = pd.read_json('test.json')
# df = pd.read_json(URL)
读取内嵌的json
import pandas as pd
import os
import json
FilePath = os.path.abspath('.')
os.chdir(FilePath)
with open('nest.json', 'r') as f:
data = json.loads(f.read())
df_nest = pd.json_normalize(data, record_path = ['students'], meta = ['class'])
print(df_nest)
glom
import pandas as pd
from glom import glom
df = pd.read_json('nest.json')
data = df['students'].apply(lambda row: glom(row, 'grade.math'))
print(data)
标签:df,DataFrame,pandas,pd,print,import,Pandas
From: https://www.cnblogs.com/mklzc/p/18429988