how to performe anova on grouped variable in python
# load packages import scipy.stats as stats # stats f_oneway functions takes the groups as input and returns F and P-value fvalue, pvalue = stats.f_oneway(d['A'], d['B'], d['C'], d['D']) print(fvalue, pvalue) # 17.492810457516338 2.639241146210922e-05 # get ANOVA table as R like output import statsmodels.api as sm from statsmodels.formula.api import ols # reshape the d dataframe suitable for statsmodels package d_melt = pd.melt(d.reset_index(), id_vars=['index'], value_vars=['A', 'B', 'C', 'D']) # replace column names d_melt.columns = ['index', 'treatments', 'value'] # Ordinary Least Squares (OLS) model model = ols('value ~ C(treatments)', data=d_melt).fit() anova_table = sm.stats.anova_lm(model, typ=2) anova_table | | df | sum_sq | mean_sq | F | PR(>F) | |---------------|--------|---------|----------|----------|----------| | C(treatments) | 3.0 | 3010.95 | 1003.650 | 17.49281 | 0.000026 | | Residual | 16.0 | 918.00 | 57.375 | NaN | NaN | # note: if the data is balanced (equal sample size for each group), Type 1, 2, and 3 sums of squares # (typ parameter) will produce similar results.