regression_model_f.py

# -*- coding: utf-8 -*-
"""regression_model_F.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1AgNce9l9eP77s0WmGz-FUPbrBSpsLBUs
"""

import numpy as np
import os
import json
import cv2
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from skimage.feature import hog
from sklearn.externals import joblib

# from google.colab import drive
# drive.mount('/content/drive')

PATH_image = '/content/drive/My Drive/Independent-Prj/train/'
PATH_box_embedding = '/content/drive/My Drive/Independent-Prj/box_embeddings_F/'

def get_cordinates(shape, bb):
    cx = int(round(bb[0]*shape[1]))
    cy = int(round(bb[1]*shape[0]))
    w = int(round(bb[2]*shape[1]))
    h = int(round(bb[3]*shape[0]))
    xmin = int(cx-w/2)
    xmax = int(cx+w/2)
    ymin = int(cy-h/2)
    ymax = int(cy+h/2)
    return [xmin,ymin,xmax,ymax]

def illum_correct(image):
    lab = cv2.cvtColor(image,cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    cl_image = clahe.apply(l)
    img1 = cv2.merge((cl_image,a,b))
    output_image = cv2.cvtColor(img1, cv2.COLOR_LAB2RGB)
    return output_image

def preprocess_img(image):
    gamma=0.8
    image_on_denoise = cv2.fastNlMeansDenoisingColored(image,None,9,9,7,21)
    illum_correct_image=illum_correct(image_on_denoise)
    gamma_corrected = np.array(255*(illum_correct_image / 255) ** gamma, dtype = 'uint8')
    return gamma_corrected

def color_hog_f(image):
    R=image[:,:,2]
    G=image[:,:,1]
    B=image[:,:,0]
    hist_R =hog(R,orientations=9,pixels_per_cell=(8,8),cells_per_block=(3,3))
    hist_G=hog(G,orientations=9,pixels_per_cell=(8,8),cells_per_block=(3,3))
    hist_B=hog(B,orientations=9,pixels_per_cell=(8,8),cells_per_block=(3,3))
    temp1=[]
    temp2=[]
    temp3=[]
    for j in range(len(hist_R)):
        temp1.append(hist_R[j])
        temp2.append(hist_G[j])
        temp3.append(hist_B[j])
    return temp3+temp2+temp1

def feature_extraction(image):
    hog_f = color_hog_f(image)
    rgb_f = image.flatten()
    return rgb_f

#Object Labels for LF and RF
label_LF = ['LF_mtp_E__ip', 'LF_mtp_E__1',
       'LF_mtp_E__2', 'LF_mtp_E__3', 'LF_mtp_E__4', 'LF_mtp_E__5']

label_RF = ['RF_mtp_E__ip', 'RF_mtp_E__1', 'RF_mtp_E__2', 'RF_mtp_E__3',
       'RF_mtp_E__4', 'RF_mtp_E__5']

img_files = np.load('/content/drive/My Drive/Independent-Prj/image_ids_F.npy')
ids_LF = []
ids_RF = []
for f in img_files:
  if f[7] == 'L':
    ids_LF.append(f[:-4])
  elif f[7] == 'R':
    ids_RF.append(f[:-4])

#Prepare training images
train_images_LF = []
train_images_RF = []
for x in ids_LF:
      train_images_LF.append(x + ".jpg")
for x in ids_RF:
      train_images_RF.append(x + ".jpg")

y_train_all_LF = []
y_train_all_RF = []

df = pd.read_csv('/content/drive/My Drive/Independent-Prj/training.csv')
for x in ids_LF:
    val_1 =(df.loc[df['Patient_ID'] == x[:-3], label_LF]).values.tolist()
    y_train_all_LF.append(val_1)

for x in ids_RF:
    val_2 =(df.loc[df['Patient_ID'] == x[:-3], label_RF]).values.tolist()
    y_train_all_RF.append(val_2)
    
y_train_all_t_LF = []
y_train_all_t_RF = []
for x in y_train_all_LF:
    y_train_all_t_LF.append(x[0])
y_train_all_t_LF = np.array(y_train_all_t_LF)

for x in y_train_all_RF:
    y_train_all_t_RF.append(x[0])
y_train_all_t_RF = np.array(y_train_all_t_RF)

h = []
w = []
count = 0
outliers_train = []
x_train_LF = []
y_train_LF = []
crp_size = (32, 32)
for i in range(0,len(train_images_LF)):
  x_train_j_LF = []
  y_train_j_LF = []
  for joint in range(0,6):
      img = cv2.imread(PATH_image + train_images_LF[i])
      gray_img = cv2.imread(PATH_image + train_images_LF[i], 0)
      boxes = np.load(PATH_box_embedding + train_images_LF[i][:-4] + '.npy')
      newbb = boxes[joint]
      crop_img = img[newbb[1]:newbb[3], newbb[0]:newbb[2]]
      crop_img = cv2.resize(crop_img,crp_size)
      crop_img = feature_extraction(crop_img)
      x_train_j_LF.append(crop_img)
  if i%60 == 0:
    print("Id:",i,"...Done.")
  x_train_LF.append(x_train_j_LF)

h = []
w = []
count = 0
x_train_RF = []
crp_size = (32, 32)
outliers_RF = []
for i in range(0,len(train_images_RF)):
  x_train_j_RF = []
  for joint in range(0,6):
      img = cv2.imread(PATH_image + train_images_RF[i])
      gray_img = cv2.imread(PATH_image + train_images_RF[i], 0)
      boxes = np.load(PATH_box_embedding + train_images_RF[i][:-4] + '.npy')
      newbb = boxes[joint]
      crop_img = img[newbb[1]:newbb[3], newbb[0]:newbb[2]]
      r, c, ch = crop_img.shape
      if r < 32 or c < 32:
        outliers_RF.append([i, joint])
        x_train_j_RF.append([0]*3072)
      else:
        crop_img = cv2.resize(crop_img,crp_size)
        crop_img = feature_extraction(crop_img)
        x_train_j_RF.append(crop_img)
  if i%60 == 0:
    print("Id:",i,"...Done.")
  x_train_RF.append(x_train_j_RF)

np.shape(x_train_LF)

x_train = []
y_train = []
for idx in range(0, len(x_train_LF)):
  for joint in range(0,6):
    x_train.append(x_train_LF[idx][joint])
    y_train.append(y_train_all_t_LF[idx][joint])

for idx in range(0, len(x_train_RF)):
  for joint in range(0,6):
    if [idx, joint] not in outliers_RF:
      x_train.append(x_train_RF[idx][joint])
      y_train.append(y_train_all_t_RF[idx][joint])

len(x_train)

idx_0 =[]
freq = [0,0,0,0,0,0,0,0,0,0,0]
for i in range(0,len(y_train)):
  freq[y_train[i]]+=1
  if y_train[i] == 0:
    idx_0.append(i)
np.random.shuffle(idx_0)
rmv_idx = idx_0[:3000]
x_train = np.array(x_train)
y_train = np.array(y_train)
x_train = np.delete(x_train, rmv_idx, 0)
y_train = np.delete(y_train, rmv_idx)

np.save("/content/drive/My Drive/Independent-Prj/x_train_f.npy", x_train)
np.save("/content/drive/My Drive/Independent-Prj/y_train_f.npy", y_train)

x_train = np.array(x_train)
y_train = np.array(y_train)
LR_model = Lasso().fit(x_train, y_train)

joblib.dump(LR_model,'/content/drive/My Drive/Independent-Prj/models/lasso_reg_model_foot.pkl')

#Cross validation sets
x_train = np.load("/content/drive/My Drive/Independent-Prj/x_train_f.npy")
y_train = np.load("/content/drive/My Drive/Independent-Prj/y_train_f.npy")
x_train = x_train[:1200]
y_train = y_train[:1200]
x_cv_set = [x_train[:240], x_train[240:480], x_train[480:720], x_train[720:960]]
y_cv_set = [y_train[:240], y_train[240:480], y_train[480:720], y_train[720:960]]

#Create cross validation models
for i in range(0,len(x_cv_set)):
  LR_model = LinearRegression().fit(x_cv_set[i], y_cv_set[i])
  f_name = "cv_reg_model_foot_" + str(i) + ".pkl"
  joblib.dump(LR_model,'/content/drive/My Drive/Independent-Prj/models/' + f_name)

from google.colab import drive
drive.mount('/content/drive')