-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel.py
More file actions
32 lines (27 loc) · 941 Bytes
/
model.py
File metadata and controls
32 lines (27 loc) · 941 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import re
import pandas as pd
from create_dataset import df
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
X= df['data']
y = df['labels']
# X_train,X_test,y_train,y_test = train_test_split(X,pd.get_dummies(y),random_state=2)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=2)
model_names = ['LogisticRegression()']
pattern = '[A-Za-z0-9]+(?=\\s+)'
for model in model_names:
"""
TO-DOs
1) convert labels to numbers using pd.get_dummies() ?
2) convert text data to numeric features.
3) Add multiple models
"""
pl = Pipeline([
('vectorizer',CountVectorizer()),
('clf',LogisticRegression())
])
pl.fit(X_train,y_train)
accuracy = pl.score(X_test,y_test)
print("Acuuracy for {} is {}".format(model,accuracy))