clean crsp pandas
# How to clean CRSP the WRDS database, to calculate Mkt Cap on non financial US firms
CRSP = CRSP[CRSP['SHRCLS'].isin(['NaN', 'A'])] # Keep Share class A or missing
CRSP = CRSP[CRSP['SHRCD'].isin(['10', '11'])] # Keep only sharecode of 10 and 11
CRSP['RET'] = CRSP['RET'].replace(['C','B'],np.nan) # Clean return taking out strings
# Keep value above -50 to avoid any errors
CRSP['RET'] = CRSP['RET'].astype('float')
mask = CRSP['RET'] > -50
CRSP = CRSP[mask]
CRSP['PRC'] = CRSP['PRC'].abs() # Keep absolute value of Price
# Calculate Market value with adjustment in Price and Market Value
CRSP['market_value'] = ((CRSP['PRC']/CRSP['CFACPR'])*(CRSP['SHROUT']*CRSP['CFACSHR'])).shift(1)
# Take out SIC code with letter z and keep only rows with a SIC
mask_z = CRSP['SICCD'] == 'Z'
CRSP['SICCD'] = CRSP['SICCD'][-mask_z]
CRSP['SICCD'] = CRSP['SICCD'].dropna().astype(int)
# Keep non financial firms only
CRSP = CRSP[~CRSP['SICCD'].between(6000,6999)]
# Change date type from object to datetime and create new colums for month and year
CRSP['date'] = pd.to_datetime(CRSP['date'])
CRSP['year'] = pd.DatetimeIndex(CRSP['date']).year
CRSP['month'] = pd.DatetimeIndex(CRSP['date']).month
# Clean CRSP
CRSP.drop(['SHRCD','SHRCLS','PRC','SHROUT','CFACPR','CFACSHR'], axis=1, inplace = True)