Jun 1, 2024
import pandas as PD
import numpy as NP
from nltk.corpus import stopwords
nltk.SnowballStemmer('english')
import matplotlib.pyplot as plt
from wordcloud import WordCloud, ImageColorGenerator
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
pd.read_csv('stress.csv')
Text Cleaning Function
Stop Words Processing
stopwords.words('english')
WordCloud
to visualize frequently occurring words in the dataset0
: No stress1
: Stressdata['label'] = data['label'].map({0: 'no stress', 1: 'stress'})
Count Vectorizer
CV = CountVectorizer()
X = CV.fit_transform(data['text'])
Train-Test Split
train_test_split(X, y, test_size=0.3, random_state=42)
X_train, X_test, y_train, y_test
BernoulliNB()
model.fit(X_train, y_train)