-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSVM_22_7_2022
63 lines (58 loc) · 2.15 KB
/
SVM_22_7_2022
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#SVC
import fastai
import kornia
from fastai.tabular import *
import pandas as pd
import os
from os.path import join, isfile
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, roc_curve, plot_roc_curve, accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import f1_score, jaccard_score, matthews_corrcoef
from sklearn.model_selection import train_test_split
import keras
from keras import datasets, layers, models
from torch.utils.data import Dataset
from fastai.tabular.all import *
from random import shuffle
from keras.models import Sequential
from keras.layers import LSTM
import tensorflow as tf
import random as rn
from tensorflow.keras.utils import to_categorical
path = '//home/dell/Desktop/Python_codes/ML/thesis//'
df = pd.read_csv(join(path,'nostoc_dataset_text.csv'))
df_list = df['DATASET'].tolist()
df_labels = df['LABEL'].tolist()
seqs=[list(c) for c in df_list]
df222=pd.DataFrame(seqs,columns=[str(i) for i in range(1,51)])
df222['LABEL']=df_labels
print(df222)
#print(df_labels)
#sequences=[list(c) for c in df_list]
columns= [str(i) for i in range(1, 51)] #numericalisation
for i in range(1,51):
df222[str(i)] = df222[str(i)].astype('category')
df222[columns] = df222[columns].apply(lambda x: x.cat.codes) #one-hot-encoding the nucleotide bases
df222["LABEL"] = df222["LABEL"].astype('category')
df222["LABEL"] = df222["LABEL"].cat.codes #converting positive and negative into binary values of 0 and 1
# columns.append("LABEL")
X=df222[columns] .values #feature matrix
y=df222['LABEL'].values #target array where target is the label
print(X)
print(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print("X_train.shape, X_test.shape")
print(X_train.shape, X_test.shape)
print("y_train.shape, y_test.shape")
print(y_train.shape, y_test.shape)
svc = SVC(random_state=0)
clf=svc.fit(X_train, y_train)
svc_disp = plot_roc_curve(svc, X_test, y_test)
#plt.show()
y_true=y_test
y_pred=svc.predict(X_test)
confusion_matrix(y_true, y_pred)
print('\nSVC')
#print('AUC=%s' %(roc_auc_score(y, clf.decision_function(X))))
print('accuracy=%s' %(accuracy_score(y_true, y_pred)))