Answers for "groupby transform pandas"

2

Groups the DataFrame using the specified columns

# Groups the DataFrame using the specified columns

df.groupBy().avg().collect()
# [Row(avg(age)=3.5)]
sorted(df.groupBy('name').agg({'age': 'mean'}).collect())
# [Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)]
sorted(df.groupBy(df.name).avg().collect())
# [Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)]
sorted(df.groupBy(['name', df.age]).count().collect())
# [Row(name='Alice', age=2, count=1), Row(name='Bob', age=5, count=1)]
Posted by: Guest on April-08-2020
0

impute data by using groupby and transform

import pandas as pd
from datetime import datetime

def generate_data():
    ...

t = datetime.now()
df = generate_data()
df['value'] = df.groupby(['category', 'name'])['value']
    .transform(lambda x: x.fillna(x.mean()))
print(datetime.now()-t)

# 0:00:00.016012

t = datetime.now()
df = generate_data()
df["value"] = df.groupby(['category', 'name'])
    .transform(lambda x: x.fillna(x.mean()))['value']
print(datetime.now()-t)

# 0:00:00.030022
Posted by: Guest on August-10-2020
0

impute data by using groupby and transform

df['value'] = df.groupby(['category', 'name'])['value']
    .transform(lambda x: x.fillna(x.mean()))
Posted by: Guest on August-10-2020

Python Answers by Framework

Browse Popular Code Answers by Language