ตัวอย่างการใช้ Deep Learning ด้วย Caffe บน Facial Keypoints detection datasetของ Kaggle
ในตัวอย่าง http://danielnouri.org/notes/2014/12/17/using-convolutional-neural-nets-to-detect-facial-keypoints-tutorial/ ได้อธิบายถึงวิธีการทำ Facial Keypoint Detection บน Kyras. ในขณะที่มีผู้แปลงโค้ด Kyras ให้เป็น Caffe เรียบร้อยแล้ว https://github.com/olddocks/caffe-facialkp แต่ว่ายังไม่มีผู้ทำ ipython notebook มาอธิบาย บนบทความนี้เราอธิบายถึงไอเดียของการแปลงปัญหา facial keypoints prediction ไปเป็นปัญหา regression โดยใช้ Deep Learning Convolutional Neural Network เป็นผู้ทาย(predict) ตำแหน่งของ keypoints ในแต่ละรอบการเรียนรู้ และ Back Propagate ความผิดพลาด Eucliean Loss กลับสู่ Convolutional Neural Network. เราอธิบายถึงหลักการคิดของมนุษย์ในเรื่องนี้ว่า prediction จะกลายเป็น regression ได้อย่างไร และเราอธิบายถึง Source code ในแต่ละขั้นตอนจนกระทั่งผู้อ่านสามารถทำตามได้
code is now available at: https://github.com/peerajak/kaggle_facialpoints
kaggle
code is now available at: https://github.com/peerajak/kaggle_facialpoints
In [1]:
import os
import numpy as np
from pandas.io.parsers import read_csv
from sklearn.utils import shuffle
import h5py
from pylab import *
%matplotlib inline
FTRAIN = 'training.csv'
FTEST = 'test.csv'
In [2]:
def writeHdf5(t,data,label=None):
with h5py.File(os.getcwd()+ '/h5/'+t + '_data.h5', 'w') as f:
f['data'] = data
if label is not None:
f['label'] = label
with open(os.getcwd()+ '/h5/'+t + '_data_list.txt', 'w') as f:
f.write(os.getcwd()+ '/h5/' +t + '_data.h5\n')
In [3]:
def load(test=False, cols=None):
"""Loads data from FTEST if *test* is True, otherwise from FTRAIN.
Pass a list of *cols* if you're only interested in a subset of the
target columns.
"""
fname = FTEST if test else FTRAIN
df = read_csv(os.path.expanduser(fname)) # load pandas dataframe
# The Image column has pixel values separated by space; convert
# the values to numpy arrays:
df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' '))
if cols: # get a subset of columns
df = df[list(cols) + ['Image']]
print(df.count()) # prints the number of values for each column
df = df.dropna() # drop all rows that have missing values in them
X = np.vstack(df['Image'].values) / 255. # scale pixel values to [0, 1]
X = X.astype(np.float32)
if not test: # only FTRAIN has any target columns
y = df[df.columns[:-1]].values
y = (y - 48) / 48 # scale target coordinates to [-1, 1]
#X, y = shuffle(X, y, random_state=42) # shuffle train data
y = y.astype(np.float32)
else:
y = None
return X, y
In [4]:
X, y = load()
X = X.reshape((X.shape[0],1,96,96))
#sep = 1600
#writeHdf5('train',X[0:sep],y[0:sep])
#writeHdf5('val',X[sep:],y[sep:])
#X,y= load()
#X = X.reshape((X.shape[0],1,96,96))
In [9]:
print X.shape, y.shape
print X[0,:,:,:].squeeze().shape
imshow(X[0,:,:,:].squeeze(), cmap='gray')
print y[0]
In [10]:
def plot_training(x, y, axis):
img = x.reshape(96, 96)
axis.imshow(img, cmap='gray')
axis.scatter(y[0::2] * 48 + 48, y[1::2] * 48 + 48, marker='x', s=15)
In [12]:
fig = figure(figsize=(10, 10))
fig.subplots_adjust(
left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
for i in range(16):
ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[])
plot_training(X[i,:,:,:].squeeze(), y[i], ax)
show()
In [13]:
sep = 1600
writeHdf5('train',X[0:sep],y[0:sep])
writeHdf5('val',X[sep:],y[sep:])
In [14]:
X,y= load(True)
X = X.reshape((X.shape[0],1,96,96))
writeHdf5('test',X,y)
In [ ]:
#./build/tools/caffe train -solver examples/kaggle_facialpoints/fkp_solver.prototxt -gpu 0
In [16]:
!cat fkp_solver.prototxt
In [17]:
!cat fkp_net.prototxt
In [22]:
print '=============Data Layer has images and label ============'
print 'Top shape: 128 1 96 96 (1179648)'
print 'I0505 12:00:14.063249 13857 net.cpp:151] Top shape: 128 30 (3840)'
print '=========== This is the output prediction before Euclidean Loss == Please see that the size is same as label'
print 'I0505 12:00:14.186517 13857 net.cpp:151] Top shape: 128 30 (3840)'
print '===This is the EuclideanLoss\
layer {\n\
name: "loss"\n\
type: "EuclideanLoss"\n\
bottom: "fc6"\n\
bottom: "label"\n\
top: "loss"\n\
}'
In [23]:
def predictImg(data4D,layername):
data4DL = np.zeros([data4D.shape[0],1,1,1])
net.set_input_arrays(data4D.astype(np.float32),data4DL.astype(np.float32))
out = net.forward()
prediction = net.blobs[layername].data
return prediction
def plot_sample(x, y, axis):
img = x.reshape(height, height)
axis.imshow(img, cmap='gray')
axis.scatter(y[0::2] * height/2 + height/2, y[1::2] * height/2 + height/2, marker='x', s=10)
In [29]:
import caffe
MODEL_FILE = 'fkp_deploy.prototxt'
PRETRAINED = 'model/fkp_iter_1000000.caffemodel'
height = 96
t = 'h5/test'
f = h5py.File(t + '_data.h5','r')
X = f['data'][:]
print X.shape
net=caffe.Net(MODEL_FILE,PRETRAINED,caffe.TEST)
y_pred = predictImg(X,'fc6')
fig = figure(figsize=(6, 6))
fig.subplots_adjust(
left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
for i in range(16):
ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[])
plot_sample(X[i], y_pred[i], ax)
show()
In [ ]:
ความคิดเห็น
แสดงความคิดเห็น