import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
file_path = '9.4.xlsx'
df = pd.read_excel(file_path, header=0, index_col=0)
def split_to_float_list(s):
try:
return [float(x) for x in s.split(',')]
except (ValueError, TypeError):
return []
df_processed = df.applymap(split_to_float_list)
data = []
for var_name, var_data in df_processed.iterrows():
for fert_name, yields in var_data.iteritems():
for yield_value in yields:
data.append({'品种': var_name, '化肥': fert_name, '产量': yield_value})
df_long = pd.DataFrame(data)
df_long['品种'] = df_long['品种'].astype('category')
df_long['化肥'] = df_long['化肥'].astype('category')
model = ols('产量 ~ C(品种) + C(化肥) + C(品种):C(化肥)', data=df_long).fit()
anova_table = anova_lm(model, typ=2)
print(anova_table)
标签:化肥,df,小麦,品种,long,anova,产量,data From: https://www.cnblogs.com/howoo0808/p/18593959