remove duplicate rows in csv file python
# credit to the Stack Overflow user in the source link with open('file_with_duplicates.csv','r') as in_file, open('ouput.csv','w') as out_file: seen = set() # set for fast O(1) amortized lookup for line in in_file: if line in seen: continue # skip duplicate seen.add(line) out_file.write(line)