`import pandas as pd
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
列名列表
column_names = ["城市1", "城市2", "城市3", "城市4"]
生成因子组合
factor_combinations = [
f"位置{i+1}广告形式行+1高档" for i in range(3) for _ in range(2)
] + [
f"位置{i+1}广告形式行{i+1}低档" for i in range(3) for _ in range(2)
]
读取Excel文件
df_raw = pd.read_excel('9.5.xlsx', header=None, names=column_names, index_col=0)
设置索引
df_raw.index = factor_combinations
提取位置因子
location_factor = [int(x.split('位置')[1].split('广告形式')[0].strip()) for x in df_raw.index if '位置' in x]
提取广告形式因子
ad_factor = []
for x in df_raw.index:
parts = x.split('广告形式')
if len(parts) > 1:
row_part = parts[1].split('高档')[0].split('低档')[0].strip()
if '行+1' in row_part:
ad_factor.append(1) # 假设'行+1'对应1
elif '行' in row_part:
ad_factor.append(int(row_part.replace('行', '').strip())) # 移除'行'并转换为整数
else:
ad_factor.append(0) # 如果没有'广告形式',则默认为0
提取装饰档次因子
decoration_factor = ['高档' if '高档' in x else '低档' for x in df_raw.index]
确保所有数组长度相同
assert len(location_factor) == len(ad_factor) == len(decoration_factor) == len(df_raw.values.flatten()), "Arrays are not of the same length"
准备数据
data = {
'销售量': df_raw.values.flatten(),
'位置': location_factor,
'广告形式': ad_factor,
'装饰档次': decoration_factor
}
创建DataFrame
df = pd.DataFrame(data)
创建城市因子
city_factor = [f'城市{i+1}' for i in range(4)] * (len(location_factor) // 4)
assert len(city_factor) == len(df), "City factor array is not of the same length as other arrays"
df['城市'] = city_factor
建立模型并进行方差分析
model = ols('销售量 ~ C(位置) + C(广告形式) + C(装饰档次) + C(城市)', data=df).fit()
anova_table = anova_lm(model, typ=2)
print(anova_table)
找出显著因素
significant_factors = anova_table[anova_table['PR(>F)'] < 0.05]['source']
print("在显著水平0.05下,以下因素对销售量有显著差异:")
print(significant_factors)
print("学号:05")`