Groups the DataFrame using the specified columns
# Groups the DataFrame using the specified columns df.groupBy().avg().collect() # [Row(avg(age)=3.5)] sorted(df.groupBy('name').agg({'age': 'mean'}).collect()) # [Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)] sorted(df.groupBy(df.name).avg().collect()) # [Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)] sorted(df.groupBy(['name', df.age]).count().collect()) # [Row(name='Alice', age=2, count=1), Row(name='Bob', age=5, count=1)]