-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCNN_27_7_22
97 lines (89 loc) · 4.09 KB
/
CNN_27_7_22
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#CNN
import fastai
import kornia
from fastai.tabular import *
import pandas as pd
import os
from os.path import join, isfile
import numpy as np
from sklearn.metrics import roc_auc_score, roc_curve, plot_roc_curve, accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import f1_score, jaccard_score, matthews_corrcoef
from sklearn.model_selection import train_test_split
import keras
from keras import datasets, layers, models
from fastai.tabular.all import *
from random import shuffle
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.convolutional import Convolution1D, MaxPooling1D
import tensorflow as tf
import random as rn
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers import Conv2D, MaxPooling2D, Flatten
path = '//home/dell/Desktop/Python_codes/ML/thesis//'
df = pd.read_csv(join(path,'nostoc_dataset_text.csv'))
df_list = df['DATASET'].tolist()
df_labels = df['LABEL'].tolist()
seqs=[list(c) for c in df_list]
df222=pd.DataFrame(seqs,columns=[str(i) for i in range(1,51)])
df222['LABEL']=df_labels
print(df222)
#print(df_labels)
#sequences=[list(c) for c in df_list]
columns= [str(i) for i in range(1, 51)] #numericalisation
for i in range(1,51):
df222[str(i)] = df222[str(i)].astype('category')
df222[columns] = df222[columns].apply(lambda x: x.cat.codes) #one-hot-encoding the nucleotide bases
df222["LABEL"] = df222["LABEL"].astype('category')
df222["LABEL"] = df222["LABEL"].cat.codes #converting positive and negative into binary values of 0 and 1
# columns.append("LABEL")
X=df222[columns] .values #feature matrix
y=df222['LABEL'].values #target array where target is the label
print(X)
print(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print("X_train.shape, X_test.shape")
print(X_train.shape, X_test.shape)
print("y_train.shape, y_test.shape")
print(y_train.shape, y_test.shape)
# Create sequential model
cnn_model = tf.keras.models.Sequential()
#First CNN layer with 32 filters, conv window 3, relu activation and same padding
cnn_model.add(Convolution1D(filters=64, kernel_size=(2,), padding='same', activation=tf.keras.layers.LeakyReLU(alpha=0.01), input_shape = (X_train1.shape[1],1)))
#Second CNN layer with 64 filters, conv window 3, relu activation and same padding
cnn_model.add(Convolution1D(filters=128, kernel_size=(2,), padding='same', activation=tf.keras.layers.LeakyReLU(alpha=0.01)))
#Third CNN layer with 128 filters, conv window 3, relu activation and same padding
cnn_model.add(Convolution1D(filters=128, kernel_size=(2,), padding='same', activation=tf.keras.layers.LeakyReLU(alpha=0.01)))
#Fourth CNN layer with Max pooling
cnn_model.add(MaxPooling1D(pool_size=(3,), strides=2, padding='same'))
cnn_model.add(Dropout(0.5))
#Flatten the output
cnn_model.add(Flatten())
#Add a dense layer with 256 neurons
cnn_model.add(Dense(units = 256, activation=tf.keras.layers.LeakyReLU(alpha=0.01)))
#Add a dense layer with 512 neurons
cnn_model.add(Dense(units = 512, activation=tf.keras.layers.LeakyReLU(alpha=0.01)))
#Softmax as last layer with five outputs
cnn_model.add(Dense(units = 5, activation='softmax'))
cnn_model.compile(optimizer='adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])
cnn_model.summary()
#cnn_model_history = cnn_model.fit(X_train1, y_train1, epochs=20, batch_size = 16, validation_data = (X_test1, y_test1))
#cnn_model.fit(X_train1, y_train1, epochs=100, batch_size = 128, validation_data = (X_test1, y_test1))
cnn_model.fit(X_train1, y_train1, epochs=200, validation_data = (X_test1, y_test1))
pred = cnn_model.predict(X_test)
labels = []
for inputs in range(len(y_test1)):
labels.append(y_test1[inputs])
y_true=labels #y_test
predicted_labels = []
for prediction in pred:
prediction_list = list(prediction)
predicted_labels.append(prediction_list.index(max(prediction_list)))
y_pred=predicted_labels
#print(y_true)
#print(y_pred)
confusion_matrix(y_true, y_pred)
print('CNN')
print('Testing accuracy=%s' %(accuracy_score(y_true, y_pred)))