python: remove duplicate in a specific column
df = df.drop_duplicates(subset=['Column1', 'Column2'], keep='first')
python: remove duplicate in a specific column
df = df.drop_duplicates(subset=['Column1', 'Column2'], keep='first')
drop duplicates pandas first column
import pandas as pd
# making data frame from csv file
data = pd.read_csv("employees.csv")
# sorting by first name
data.sort_values("First Name", inplace = True)
# dropping ALL duplicte values
data.drop_duplicates(subset ="First Name",keep = False, inplace = True)
# displaying data
print(data)
drop row with duplicate value
import pandas as pd
df = pd.DataFrame({"A":["foo", "foo", "foo", "bar"], "B":[0,1,1,1], "C":["A","A","B","A"]})
df.drop_duplicates(subset=['A', 'C'], keep=False)
Return a new DataFrame with duplicate rows removed
# Return a new DataFrame with duplicate rows removed
from pyspark.sql import Row
df = sc.parallelize([
Row(name='Alice', age=5, height=80),
Row(name='Alice', age=5, height=80),
Row(name='Alice', age=10, height=80)]).toDF()
df.dropDuplicates().show()
# +---+------+-----+
# |age|height| name|
# +---+------+-----+
# | 5| 80|Alice|
# | 10| 80|Alice|
# +---+------+-----+
df.dropDuplicates(['name', 'height']).show()
# +---+------+-----+
# |age|height| name|
# +---+------+-----+
# | 5| 80|Alice|
# +---+------+-----+
Copyright © 2021 Codeinu
Forgot your account's password or having trouble logging into your Account? Don't worry, we'll help you to get back your account. Enter your email address and we'll send you a recovery link to reset your password. If you are experiencing problems resetting your password contact us