Text Features

Importing necessary packages

import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

corpus = ['This is first sentence', 'Here is the second sentence', 'Third sentence']

count_vec = CountVectorizer()
features = count_vec.fit_transform(corpus)

pd.DataFrame(features.todense(), columns=count_vec.get_feature_names())

	first	here	is	second	sentence	the	third	this
0	1	0	1	0	1	0	0	1
1	0	1	1	1	1	1	0	0
2	0	0	0	0	1	0	1	0

tfidf = TfidfVectorizer()
features_tfidf = tfidf.fit_transform(corpus)

pd.DataFrame(features_tfidf.todense(), columns=tfidf.get_feature_names())

	first	here	is	second	sentence	the	third	this
0	0.584483	0.000000	0.444514	0.000000	0.345205	0.000000	0.000000	0.584483
1	0.000000	0.504611	0.383770	0.504611	0.298032	0.504611	0.000000	0.000000
2	0.000000	0.000000	0.000000	0.000000	0.508542	0.000000	0.861037	0.000000