pandas read_csv random rows
import pandas as pd import numpy as np filename = 'hugedatafile.csv' nlinesfile = 10000000 nlinesrandomsample = 10000 lines2skip = np.random.choice(np.arange(1,nlinesfile+1), (nlinesfile-nlinesrandomsample), replace=False) df = pd.read_csv(filename, skiprows=lines2skip)