gridsearchcv multiple estimators
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
names = [
"Naive Bayes",
"Linear SVM",
"Logistic Regression",
"Random Forest",
"Multilayer Perceptron"
]
classifiers = [
MultinomialNB(),
LinearSVC(),
LogisticRegression(),
RandomForestClassifier(),
MLPClassifier()
]
parameters = [
{'vect__ngram_range': [(1, 1), (1, 2)],
'clf__alpha': (1e-2, 1e-3)},
{'vect__ngram_range': [(1, 1), (1, 2)],
'clf__C': (np.logspace(-5, 1, 5))},
{'vect__ngram_range': [(1, 1), (1, 2)],
'clf__C': (np.logspace(-5, 1, 5))},
{'vect__ngram_range': [(1, 1), (1, 2)],
'clf__max_depth': (1, 2)},
{'vect__ngram_range': [(1, 1), (1, 2)],
'clf__alpha': (1e-2, 1e-3)}
]
for name, classifier, params in zip(names, classifiers, parameters):
clf_pipe = Pipeline([
('vect', TfidfVectorizer(stop_words='english')),
('clf', classifier),
])
gs_clf = GridSearchCV(clf_pipe, param_grid=params, n_jobs=-1)
clf = gs_clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print("{} score: {}".format(name, score))