how to use tfidfvectorizer

tfidfvectorizer code

# TF-IDF vectorizer >>> Logistic Regression

from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
Vec = vectorizer.fit_transform(df['text_column_name_after_preprocessing'])
print(vectorizer.get_feature_names())

X = df.drop('column_name', axis = 1)
y = df["Column_name"].values

#train test split:>>>>>>>>>>>
X_train_tfidf, X_test_tfidf, y_train_tfidf, y_test_tfidf = train_test_split(X, y, test_size=0.2, random_state=2020)

model_logit_tf = LogisticRegression(class_weight="balanced",solver='saga', max_iter=100)
model_logit_tf.fit(X_train_tfidf, y_train_tfidf) # fit the model 

y_pred_tfidf = model_logit_tf.predict(X_test_tfidf) # prediction

#F1 score:>>>>>>>>>
f1score_TF = f1_score(y_test_tfidf, y_pred_tfidf, average='micro')
print(f"TF-IDF Model F1 Score for Logistic Regression: {f1score_TF * 100} %")

Rcall score:>>>>>>>>>
recall_score_TF = recall_score(y_test_tfidf, model_logit_tf.predict(X_test_tfidf), average = 'macro')
print(f"TF-IDF Model Recall Score for Logistic Regression: {recall_score_TF * 100} %")

precision score:>>>>>>>>>
precision_score_TF = precision_score(y_test_tfidf, model_logit_tf.predict(X_test_tfidf), average = 'macro')
print(f"TF-IDF Model Precision Score for Logistic Regression: {precision_score_TF * 100} %")

Posted by: Guest on March-28-2021

>>> from sklearn.feature_extraction.text import TfidfTransformer >>> from sklearn.feature_extraction.text import CountVectorizer >>> from sklearn.pipeline import Pipeline >>> import numpy as np >>> corpus = ['this is the first document', ... 'this document is the second document', ... 'and this is the third one', ... 'is this the first document'] >>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the', ... 'and', 'one'] >>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)), ... ('tfid', TfidfTransformer())]).fit(corpus) >>> pipe['count'].transform(corpus).toarray() array([[1, 1, 1, 1, 0, 1, 0, 0], [1, 2, 0, 1, 1, 1, 0, 0], [1, 0, 0, 1, 0, 1, 1, 1], [1, 1, 1, 1, 0, 1, 0, 0]]) >>> pipe['tfid'].idf_ array([1. , 1.22314355, 1.51082562, 1. , 1.91629073, 1. , 1.91629073, 1.91629073]) >>> pipe.transform(corpus).shape (4, 8)

Code answers related to "how to use tfidfvectorizer"

Code answers related to "Java"

Browse Popular Code Answers by Language

Answers for "how to use tfidfvectorizer"

Code answers related to "how to use tfidfvectorizer"

Code answers related to "Java"

Java Answers by Framework

Browse Popular Code Answers by Language

Popular Programming Languages

Advertisements

Company

Compilers

Help

Connect with us