pandas calculate mean by groups
# Basic syntax:
df.groupby('column_name').mean()
# Where this will return the mean of each group with the same values in
# the column "column_name"
# Example usage:
import pandas as pd
import numpy as np
df = pd.DataFrame({'A': [1, 1, 2, 1, 2],
'B': [np.nan, 2, 3, 4, 5],
'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C'])
print(df)
A B C
0 1 NaN 1
1 1 2.0 2
2 2 3.0 1
3 1 4.0 1
4 2 5.0 2
# Calculate the mean of columns B and C grouped by the values in column A
df.groupby('A').mean() # Returns:
B C
A
1 3.0 1.333333
2 4.0 1.500000
# Calculate the mean of column C grouped by the values in columns A and B
df.groupby(['A', 'B']).mean() # Returns:
C
A B
1 2.0 2
4.0 1
2 3.0 1
5.0 2