python: remove duplicate in a specific column
df = df.drop_duplicates(subset=['Column1', 'Column2'], keep='first')
python: remove duplicate in a specific column
df = df.drop_duplicates(subset=['Column1', 'Column2'], keep='first')
remove duplicates based on two columns in dataframe
df.drop_duplicates(['A','B'],keep= 'last')
remove duplicate columns python dataframe
df = df.loc[:,~df.columns.duplicated()]
remove duplicate values in data frame r
df <- df %>% distinct()
Return a new DataFrame with duplicate rows removed
# Return a new DataFrame with duplicate rows removed
from pyspark.sql import Row
df = sc.parallelize([
Row(name='Alice', age=5, height=80),
Row(name='Alice', age=5, height=80),
Row(name='Alice', age=10, height=80)]).toDF()
df.dropDuplicates().show()
# +---+------+-----+
# |age|height| name|
# +---+------+-----+
# | 5| 80|Alice|
# | 10| 80|Alice|
# +---+------+-----+
df.dropDuplicates(['name', 'height']).show()
# +---+------+-----+
# |age|height| name|
# +---+------+-----+
# | 5| 80|Alice|
# +---+------+-----+
Copyright © 2021 Codeinu
Forgot your account's password or having trouble logging into your Account? Don't worry, we'll help you to get back your account. Enter your email address and we'll send you a recovery link to reset your password. If you are experiencing problems resetting your password contact us