encoding multiple categorical variables python
from sklearn.compose import make_column_transformer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
# for the purposes of this analysis, only use a small subset of features
feature_cols = [
'fuel_type', 'make', 'aspiration', 'highway_mpg', 'city_mpg',
'curb_weight', 'drive_wheels'
]
# Remove the empty price rows
df_ml = df.dropna(subset=['price'])
X = df_ml[feature_cols]
y = df_ml['price']