这篇是Ren Shaoqing发表在cvpr2014上的paper,论文是在CPR框架下做的,想了解CPR的同学可以参见我之前的博客,网上有同学给出了code,该code部分实现了LBF,链接为https://github.com/jwyang/face-alignment。下面介绍算法的整个流程。
简单的说,该算法利用random forest和shape indexed features得到local binary features,然后利用linear regression 求解regressor,检测时利用训练得到的feature mapping和linear projection得倒update shape ΔS,然后与上一个stage相加得到当前stage的shape,之后不断迭代直到迭代结束。下面我会根据code做详细的描述。
算法分为train和test两个部分,首先介绍train.m,详细的介绍会在code中给出
%TRAIN_MODEL Summary of this function goes here
% Function: train face alignment model
% Detailed explanation goes here
% Input:
% dbnames: the names of database
% Configure the parameters for training model
global params;
global Tr_Data;
config_tr;
%这里COFW是paper rcpr中用到的数据库,该数据库与lfpw,helen等不同的地方在于每个shape的标签中包含是否受遮挡。
if size(dbnames) > & sum(strcmp(dbnames, 'COFW')) >
disp('Sorry, COFW cannnot be combined with others')
return;
end
%读取meanshape
if sum(strcmp(dbnames, 'COFW')) >
load('../initial_shape/InitialShape_29.mat');
params.meanshape = S0;
else
load('../initial_shape/InitialShape_68.mat');
params.meanshape = S0;
end %是否选择并行计算
if params.isparallel
disp('Attention, if error occurs, plese ensure you used the correct version of parallel initialization.'); if isempty(gcp('nocreate'))
parpool();
else
disp('Already initialized');
end %{
if matlabpool('size') <=
matlabpool('open','local',);
else
disp('Already initialized');
end
%}
end % load trainning data from hardware
Tr_Data = [];
% Tr_Bboxes = [];
for i = :length(dbnames)
% load training samples (including training images, and groundtruth shapes)
imgpathlistfile = strcat('..\datasets\', dbnames{i}, '\Path_Images.txt');
tr_data = loadsamples(imgpathlistfile, );%读取数据
Tr_Data = [Tr_Data; tr_data];
end % Augmentate data for traing: assign multiple initial shapes to each image
Data = Tr_Data; % (::end);
Param = params;
%翻转图像,目的是为了enlarge training data
if Param.flipflag % if conduct flipping
Data_flip = cell(size(Data, ), );
for i = :length(Data_flip)
Data_flip{i}.img_gray = fliplr(Data{i}.img_gray);
Data_flip{i}.width_orig = Data{i}.width_orig;
Data_flip{i}.height_orig = Data{i}.height_orig;
Data_flip{i}.width = Data{i}.width;
Data_flip{i}.height = Data{i}.height; Data_flip{i}.shape_gt = flipshape(Data{i}.shape_gt);
Data_flip{i}.shape_gt(:, ) = Data{i}.width - Data_flip{i}.shape_gt(:, ); Data_flip{i}.bbox_gt = Data{i}.bbox_gt;
Data_flip{i}.bbox_gt() = Data_flip{i}.width - Data_flip{i}.bbox_gt() - Data_flip{i}.bbox_gt(); Data_flip{i}.bbox_facedet = Data{i}.bbox_facedet;
Data_flip{i}.bbox_facedet() = Data_flip{i}.width - Data_flip{i}.bbox_facedet() - Data_flip{i}.bbox_facedet();
end
Data = [Data; Data_flip];
end % choose corresponding points for training
for i = :length(Data)
Data{i}.shape_gt = Data{i}.shape_gt(Param.ind_usedpts, :);%选取特定的landmark
Data{i}.bbox_gt = getbbox(Data{i}.shape_gt);%根据ground truth shape 确定bounding bbox % modify detection boxes
shape_facedet = resetshape(Data{i}.bbox_facedet, Param.meanshape);%将mean shape reproject face detection bbox上
shape_facedet = shape_facedet(Param.ind_usedpts, :);
Data{i}.bbox_facedet = getbbox(shape_facedet);
end Param.meanshape = S0(Param.ind_usedpts, :);%选取特定的landmark dbsize = length(Data); % load('Ts_bbox.mat'); augnumber = Param.augnumber; for i = :dbsize
% initializ the shape of current face image by randomly selecting multiple shapes from other face images
% indice = ceil(dbsize*rand(, augnumber)); indice_rotate = ceil(dbsize*rand(, augnumber));
indice_shift = ceil(dbsize*rand(, augnumber));
scales = + 0.2*(rand([ augnumber]) - 0.5); Data{i}.intermediate_shapes = cell(, Param.max_numstage);
Data{i}.intermediate_bboxes = cell(, Param.max_numstage); Data{i}.intermediate_shapes{} = zeros([size(Param.meanshape), augnumber]);
Data{i}.intermediate_bboxes{} = zeros([augnumber, size(Data{i}.bbox_gt, )]); Data{i}.shapes_residual = zeros([size(Param.meanshape), augnumber]);
Data{i}.tf2meanshape = cell(augnumber, );
Data{i}.meanshape2tf = cell(augnumber, ); % if Data{i}.isdet ==
% Data{i}.bbox_facedet = Data{i}.bbox_facedet*ts_bbox;
% end
for sr = :params.augnumber
if sr ==
% estimate the similarity transformation from initial shape to mean shape
% Data{i}.intermediate_shapes{}(:,:, sr) = resetshape(Data{i}.bbox_gt, Param.meanshape);
% Data{i}.intermediate_bboxes{}(sr, :) = Data{i}.bbox_gt;
Data{i}.intermediate_shapes{}(:,:, sr) = resetshape(Data{i}.bbox_facedet, Param.meanshape);%
Data{i}.intermediate_bboxes{}(sr, :) = Data{i}.bbox_facedet; meanshape_resize = resetshape(Data{i}.intermediate_bboxes{}(sr, :), Param.meanshape);将mean shape reproject face detection bbox上
%计算当前的shape与mean shape之间的相似性变换
Data{i}.tf2meanshape{} = fitgeotrans(bsxfun(@minus, Data{i}.intermediate_shapes{}(:end,:, ), mean(Data{i}.intermediate_shapes{}(:end,:, ))), ...
(bsxfun(@minus, meanshape_resize(:end, :), mean(meanshape_resize(:end, :)))), 'NonreflectiveSimilarity');
Data{i}.meanshape2tf{} = fitgeotrans((bsxfun(@minus, meanshape_resize(:end, :), mean(meanshape_resize(:end, :)))), ...
bsxfun(@minus, Data{i}.intermediate_shapes{}(:end,:, ), mean(Data{i}.intermediate_shapes{}(:end,:, ))), 'NonreflectiveSimilarity'); % calculate the residual shape from initial shape to groundtruth shape under normalization scale shape_residual = bsxfun(@rdivide, Data{i}.shape_gt - Data{i}.intermediate_shapes{}(:,:, ), [Data{i}.intermediate_bboxes{}(, ) Data{i}.intermediate_bboxes{}(, )]);
% transform the shape residual in the image coordinate to the mean shape coordinate
[u, v] = transformPointsForward(Data{i}.tf2meanshape{}, shape_residual(:, )', shape_residual(:, 2)');
Data{i}.shapes_residual(:, , ) = u';
Data{i}.shapes_residual(:, , ) = v';
else
% randomly rotate the shape % shape = resetshape(Data{i}.bbox_gt, Param.meanshape); % Data{indice_rotate(sr)}.shape_gt
shape = resetshape(Data{i}.bbox_facedet, Param.meanshape); % Data{indice_rotate(sr)}.shape_gt
%根据随机选取的scale,rotation,translate计算新的初始shape然后投影到bbox上
if params.augnumber_scale ~=
shape = scaleshape(shape, scales(sr));
end if params.augnumber_rotate ~=
shape = rotateshape(shape);
end if params.augnumber_shift ~=
shape = translateshape(shape, Data{indice_shift(sr)}.shape_gt);
end Data{i}.intermediate_shapes{}(:, :, sr) = shape;
Data{i}.intermediate_bboxes{}(sr, :) = getbbox(shape); meanshape_resize = resetshape(Data{i}.intermediate_bboxes{}(sr, :), Param.meanshape); Data{i}.tf2meanshape{sr} = fitgeotrans(bsxfun(@minus, Data{i}.intermediate_shapes{}(:end,:, sr), mean(Data{i}.intermediate_shapes{}(:end,:, sr))), ...
bsxfun(@minus, meanshape_resize(:end, :), mean(meanshape_resize(:end, :))), 'NonreflectiveSimilarity');
Data{i}.meanshape2tf{sr} = fitgeotrans(bsxfun(@minus, meanshape_resize(:end, :), mean(meanshape_resize(:end, :))), ...
bsxfun(@minus, Data{i}.intermediate_shapes{}(:end,:, sr), mean(Data{i}.intermediate_shapes{}(:end,:, sr))), 'NonreflectiveSimilarity'); shape_residual = bsxfun(@rdivide, Data{i}.shape_gt - Data{i}.intermediate_shapes{}(:,:, sr), [Data{i}.intermediate_bboxes{}(sr, ) Data{i}.intermediate_bboxes{}(sr, )]);
[u, v] = transformPointsForward(Data{i}.tf2meanshape{}, shape_residual(:, )', shape_residual(:, 2)');
Data{i}.shapes_residual(:, , sr) = u';
Data{i}.shapes_residual(:, , sr) = v';
% Data{i}.shapes_residual(:, :, sr) = tformfwd(Data{i}.tf2meanshape{sr}, shape_residual(:, ), shape_residual(:, ));
end
end
end % train random forests for each landmark
randf = cell(Param.max_numstage, );
Ws = cell(Param.max_numstage, ); %{
if nargin >
n = size(LBFRegModel_initial.ranf, );
for i = :n
randf(:n, :) = LBFRegModel_initial.ranf;
end
end
%}
%开始训练
for s = :Param.max_numstage
% learn random forest for s-th stage
disp('train random forests for landmarks...'); %{
if isempty(randf{s})
if exist(strcat('randfs\randf', num2str(s), '.mat'))
load(strcat('randfs\randf', num2str(s), '.mat'));
else
tic;
randf{s} = train_randomfs(Data, Param, s);
toc;
save(strcat('randfs\randf', num2str(s), '.mat'), 'randf', '-v7.3');
end
end
%}
%利用随机森林训练得倒feature mapping
if isempty(randf{s})
tic;
randf{s} = train_randomfs(Data, Param, s);
toc;
end % derive binary codes given learned random forest in current stage
disp('extract local binary features...'); tic;
binfeatures = derivebinaryfeat(randf{s}, Data, Param, s);
% save(strcat('LBFeats\LBFeats', num2str(s), '.mat'), 'binfeatures', '-v7.3');
toc; % learn global linear regrassion given binary feature
disp('learn global regressors...');
tic;
[W, Data] = globalregression(binfeatures, Data, Param, s);
Ws{s} = W;
% save(strcat('Ws\W', num2str(s), '.mat'), 'W', '-v7.3');
toc; end
%保存regression model
LBFRegModel.ranf = randf;
LBFRegModel.Ws = Ws;
接下来介绍train_randomfs.m,该文件是利用随机森林训练得倒feature mapping,具体流程为假设shape 有68个landmark,对每个lanmark训练一个随机森林,每个森林中包含多个决策树,具体介绍见code。
function rfs = train_randomfs(Tr_Data, params, stage)
%TRAIN_RANDOMFS Summary of this function goes here
% Function: train random forest for each landmark
% Detailed explanation goes here
% Input:
% lmarkID: ID of landmark
% stage: the stage of training process
% Output:
% randf: learned random forest
dbsize = length(Tr_Data); % rf = cell(, params.max_numtrees);
%这里是将所有training data 按照一定的重合度分开,分开的data分别作为训练一个决策树的data
overlap_ratio = params.bagging_overlap; Q = floor(double(dbsize)/((-params.bagging_overlap)*(params.max_numtrees))); Data = cell(, params.max_numtrees);
for t = :params.max_numtrees
% calculate the number of samples for each random tree
% train t-th random tree
is = max(floor((t-)*Q - (t-)*Q*overlap_ratio + ), );
ie = min(is + Q, dbsize);
Data{t} = Tr_Data(is:ie);
end % divide local region into grid
params.radius = ([:/:]');
params.angles = *pi*[:/:]'; rfs = cell(length(params.meanshape), params.max_numtrees);
%数据的初始化
parfor i = :length(params.meanshape)
rf = cell(, params.max_numtrees);
% disp(strcat(num2str(i), 'th landmark is processing...'));
for t = :params.max_numtrees
% disp(strcat('training', {''}, num2str(t), '-th tree for', {''}, num2str(lmarkID), '-th landmark')); % calculate the number of samples for each random tree
% train t-th random tree
is = max(floor((t-)*Q - (t-)*Q*overlap_ratio + ), );
ie = min(is + Q, dbsize); max_numnodes = ^params.max_depth - ; rf{t}.ind_samples = cell(max_numnodes, );%样本的索引值
rf{t}.issplit = zeros(max_numnodes, );%是否分裂
rf{t}.pnode = zeros(max_numnodes, );%父亲节点
rf{t}.depth = zeros(max_numnodes, );%深度
rf{t}.cnodes = zeros(max_numnodes, );%孩子节点
rf{t}.isleafnode = zeros(max_numnodes, );%是否是叶子节点
rf{t}.feat = zeros(max_numnodes, );%特征
rf{t}.thresh = zeros(max_numnodes, );%阈值 rf{t}.ind_samples{} = :(ie - is + )*(params.augnumber);
rf{t}.issplit() = ;
rf{t}.pnode() = ;
rf{t}.depth() = ;
rf{t}.cnodes(, :) = [ ];
rf{t}.isleafnode() = ;
rf{t}.feat(, :) = zeros(, );
rf{t}.thresh() = ; num_nodes = ;
num_leafnodes = ;
stop = ;
while(~stop)
num_nodes_iter = num_nodes;
num_split = ;
for n = :num_nodes_iter
if ~rf{t}.issplit(n)
if rf{t}.depth(n) == params.max_depth % || length(rf{t}.ind_samples{n}) <
if rf{t}.depth(n) ==
rf{t}.depth(n) = ;
end
rf{t}.issplit(n) = ;
else
% separate the samples into left and right path [thresh, feat, lcind, rcind, isvalid] = splitnode(i, rf{t}.ind_samples{n}, Data{t}, params, stage);%对每个节点分裂为左子树和右子树 %{
if ~isvalid
rf{t}.feat(n, :) = [ ];
rf{t}.thresh(n) = ;
rf{t}.issplit(n) = ;
rf{t}.cnodes(n, :) = [ ];
rf{t}.isleafnode(n) = ;
continue;
end
%} % set the threshold and featture for current node
rf{t}.feat(n, :) = feat;
rf{t}.thresh(n) = thresh;
rf{t}.issplit(n) = ;
rf{t}.cnodes(n, :) = [num_nodes+ num_nodes+];
rf{t}.isleafnode(n) = ; % add left and right child nodes into the random tree rf{t}.ind_samples{num_nodes+} = lcind;
rf{t}.issplit(num_nodes+) = ;
rf{t}.pnode(num_nodes+) = n;
rf{t}.depth(num_nodes+) = rf{t}.depth(n) + ;
rf{t}.cnodes(num_nodes+, :) = [ ];
rf{t}.isleafnode(num_nodes+) = ; rf{t}.ind_samples{num_nodes+} = rcind;
rf{t}.issplit(num_nodes+) = ;
rf{t}.pnode(num_nodes+) = n;
rf{t}.depth(num_nodes+) = rf{t}.depth(n) + ;
rf{t}.cnodes(num_nodes+, :) = [ ];
rf{t}.isleafnode(num_nodes+) = ; num_split = num_split + ;
num_leafnodes = num_leafnodes + ;
num_nodes = num_nodes + ;
end
end
end if num_split ==
stop = ;
else
rf{t}.num_leafnodes = num_leafnodes;
rf{t}.num_nodes = num_nodes;
rf{t}.id_leafnodes = find(rf{t}.isleafnode == );
end
end end
% disp(strcat(num2str(i), 'th landmark is over'));
rfs(i, :) = rf;
end
end function [thresh, feat, lcind, rcind, isvalid] = splitnode(lmarkID, ind_samples, Tr_Data, params, stage) if isempty(ind_samples)
thresh = ;
feat = [ ];
rcind = [];
lcind = [];
isvalid = ;
return;
end % generate params.max_rand cndidate feature
% anglepairs = samplerandfeat(params.max_numfeat);
% radiuspairs = [rand([params.max_numfeat, ]) rand([params.max_numfeat, ])];
[radiuspairs, anglepairs] = getproposals(params.max_numfeats(stage), params.radius, params.angles);%随机选取pixel值,这里是随机得到半径和角度 angles_cos = cos(anglepairs);
angles_sin = sin(anglepairs);
%shape indexed features是指随机选取两个landmark,然后在两个landmark周围随机选取offest,得倒相对应的pixel值,将两个pixel值相减得到features
% extract pixel difference features from pairs pdfeats = zeros(params.max_numfeats(stage), length(ind_samples)); shapes_residual = zeros(length(ind_samples), ); for i = :length(ind_samples)
s = floor((ind_samples(i)-)/(params.augnumber)) + ;
k = mod(ind_samples(i)-, (params.augnumber)) + ; % calculate the relative location under the coordinate of meanshape
pixel_a_x_imgcoord = (angles_cos(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*Tr_Data{s}.intermediate_bboxes{stage}(k, );
pixel_a_y_imgcoord = (angles_sin(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*Tr_Data{s}.intermediate_bboxes{stage}(k, ); pixel_b_x_imgcoord = (angles_cos(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*Tr_Data{s}.intermediate_bboxes{stage}(k, );
pixel_b_y_imgcoord = (angles_sin(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*Tr_Data{s}.intermediate_bboxes{stage}(k, ); % no transformation
%{
pixel_a_x_lmcoord = pixel_a_x_imgcoord;
pixel_a_y_lmcoord = pixel_a_y_imgcoord; pixel_b_x_lmcoord = pixel_b_x_imgcoord;
pixel_b_y_lmcoord = pixel_b_y_imgcoord;
%} % transform the pixels from image coordinate (meanshape) to coordinate of current shape [pixel_a_x_lmcoord, pixel_a_y_lmcoord] = transformPointsForward(Tr_Data{s}.meanshape2tf{k}, pixel_a_x_imgcoord', pixel_a_y_imgcoord');
pixel_a_x_lmcoord = pixel_a_x_lmcoord';
pixel_a_y_lmcoord = pixel_a_y_lmcoord'; [pixel_b_x_lmcoord, pixel_b_y_lmcoord] = transformPointsForward(Tr_Data{s}.meanshape2tf{k}, pixel_b_x_imgcoord', pixel_b_y_imgcoord');
pixel_b_x_lmcoord = pixel_b_x_lmcoord';
pixel_b_y_lmcoord = pixel_b_y_lmcoord'; pixel_a_x = int32(bsxfun(@plus, pixel_a_x_lmcoord, Tr_Data{s}.intermediate_shapes{stage}(lmarkID, , k)));
pixel_a_y = int32(bsxfun(@plus, pixel_a_y_lmcoord, Tr_Data{s}.intermediate_shapes{stage}(lmarkID, , k))); pixel_b_x = int32(bsxfun(@plus, pixel_b_x_lmcoord, Tr_Data{s}.intermediate_shapes{stage}(lmarkID, , k)));
pixel_b_y = int32(bsxfun(@plus, pixel_b_y_lmcoord, Tr_Data{s}.intermediate_shapes{stage}(lmarkID, , k))); width = (Tr_Data{s}.width);
height = (Tr_Data{s}.height); pixel_a_x = max(, min(pixel_a_x, width));
pixel_a_y = max(, min(pixel_a_y, height)); pixel_b_x = max(, min(pixel_b_x, width));
pixel_b_y = max(, min(pixel_b_y, height)); pdfeats(:, i) = double(Tr_Data{s}.img_gray(pixel_a_y + (pixel_a_x-)*height)) - double(Tr_Data{s}.img_gray(pixel_b_y + (pixel_b_x-)*height));%计算shape indexed features
%./ double(Tr_Data{s}.img_gray(pixel_a_y + (pixel_a_x-)*height)) + double(Tr_Data{s}.img_gray(pixel_b_y + (pixel_b_x-)*height)); % drawshapes(Tr_Data{s}.img_gray, [pixel_a_x pixel_a_y pixel_b_x pixel_b_y]);
% hold off; shapes_residual(i, :) = Tr_Data{s}.shapes_residual(lmarkID, :, k);
end E_x_2 = mean(shapes_residual(:, ).^);
E_x = mean(shapes_residual(:, )); E_y_2 = mean(shapes_residual(:, ).^);
E_y = mean(shapes_residual(:, ));
%
var_overall = length(ind_samples)*((E_x_2 - E_x^) + (E_y_2 - E_y^));%计算方差 % var_overall = length(ind_samples)*(var(shapes_residual(:, )) + var(shapes_residual(:, ))); % max_step = min(length(ind_samples), params.max_numthreshs);
% step = floor(length(ind_samples)/max_step);
max_step = ; var_reductions = zeros(params.max_numfeats(stage), max_step);
thresholds = zeros(params.max_numfeats(stage), max_step); [pdfeats_sorted] = sort(pdfeats, );%将特征排序 % shapes_residual = shapes_residual(ind, :); for i = :params.max_numfeats(stage)
% for t = :max_step
t = ;
ind = ceil(length(ind_samples)*(0.5 + 0.9*(rand() - 0.5)));
threshold = pdfeats_sorted(i, ind); % pdfeats_sorted(i, t*step); % 随机选取阈值
thresholds(i, t) = threshold;
ind_lc = (pdfeats(i, :) < threshold);%小于阈值的样本放入左子树
ind_rc = (pdfeats(i, :) >= threshold);%大于阈值的样本放入右子树 % figure, hold on, plot(shapes_residual(ind_lc, ), shapes_residual(ind_lc, ), 'r.')
% plot(shapes_residual(ind_rc, ), shapes_residual(ind_rc, ), 'g.')
% close;
% compute E_x_2_lc = mean(shapes_residual(ind_lc, ).^);
E_x_lc = mean(shapes_residual(ind_lc, )); E_y_2_lc = mean(shapes_residual(ind_lc, ).^);
E_y_lc = mean(shapes_residual(ind_lc, )); var_lc = (E_x_2_lc + E_y_2_lc)- (E_x_lc^ + E_y_lc^);%左子树的方差 E_x_2_rc = (E_x_2*length(ind_samples) - E_x_2_lc*sum(ind_lc))/sum(ind_rc);
E_x_rc = (E_x*length(ind_samples) - E_x_lc*sum(ind_lc))/sum(ind_rc); E_y_2_rc = (E_y_2*length(ind_samples) - E_y_2_lc*sum(ind_lc))/sum(ind_rc);
E_y_rc = (E_y*length(ind_samples) - E_y_lc*sum(ind_lc))/sum(ind_rc); var_rc = (E_x_2_rc + E_y_2_rc)- (E_x_rc^ + E_y_rc^);%右子树的方差 var_reduce = var_overall - sum(ind_lc)*var_lc - sum(ind_rc)*var_rc;%减少的方差 % var_reduce = var_overall - sum(ind_lc)*(var(shapes_residual(ind_lc, )) + var(shapes_residual(ind_lc, ))) - sum(ind_rc)*(var(shapes_residual(ind_rc, )) + var(shapes_residual(ind_rc, )));
var_reductions(i, t) = var_reduce;
% end
% plot(var_reductions(i, :));
end [~, ind_colmax] = max(var_reductions);%选取减少方差的最大值的索引
%这里之所以选择减少的方差的最大值我的理解是减少的方差的越大,则剩下的方差越小,而剩下的方差等于左子树的方差加上右子树的方差,故左子树和右子树的方差越小,方差越小,表示数据越集中,分散的程度越小,这样说分类越正确。
ind_max = ; %{
if var_max <=
isvalid = ;
else
isvalid = ;
end
%}
isvalid = ;
%将对应的特征和阈值保存起来,这些参数即是所求的feature mapping
thresh = thresholds(ind_colmax(ind_max), ind_max); feat = [anglepairs(ind_colmax(ind_max), :) radiuspairs(ind_colmax(ind_max), :)]; lcind = ind_samples(find(pdfeats(ind_colmax(ind_max), :) < thresh));
rcind = ind_samples(find(pdfeats(ind_colmax(ind_max), :) >= thresh)); end
之后将training data通过刚才得到的feature mapping 计算binary feature,即每个training data到达的每个树的叶子节点即表示为1,其他为0,然后将每个树的binary feature连接接起来,具体见derivebinaryfeat.m
function binfeatures = derivebinaryfeat(randf, Tr_Data, params, stage)
%DERIVEBINARYFEAT Summary of this function goes here
% Function: Derive binary features for each sample given learned random forest
% Detailed explanation goes here
% Input:
% lmarkID: the ID of landmark
% randf: learned random forest in current stage
% Tr_Data: training data
% params: parameters for curent stage % calculate the overall dimension of binary feature, concatenate the
% features for all landmarks, and the feature of one landmark is sum
% leafnodes of all random trees; dims_binfeat = ; ind_bincode = zeros(size(randf));
%计算binary features的维度
for l = :size(randf, )
for t = :size(randf, )
ind_bincode(l, t) = randf{l, t}.num_leafnodes;
dims_binfeat = dims_binfeat + randf{l, t}.num_leafnodes;
end
end % initilaize the memory for binfeatures
dbsize = length(Tr_Data); % faster implementation
%{
tic;
binfeatures = zeros(dbsize*(params.augnumber), dims_binfeat);
img_sizes = zeros(, dbsize*(params.augnumber));
shapes = zeros([size(params.meanshape), dbsize*(params.augnumber)]);
tfs2meanshape = cell(, dbsize*params.augnumber); img_areas = zeros(, dbsize);
parfor i = :dbsize
img_areas(i) = Tr_Data{i}.width*Tr_Data{i}.height;
end imgs_gray = zeros(dbsize, max(img_areas)); for i = :dbsize
area = img_areas(i);
imgs_gray(i, :area) = Tr_Data{i}.img_gray(:)';
shapes(:, :, (i-)*params.augnumber+:i*params.augnumber) = Tr_Data{i}.intermediate_shapes{stage}; for k = :params.augnumber
img_sizes(:, (i-)*params.augnumber+k) = [Tr_Data{i}.width; Tr_Data{i}.height];
tfs2meanshape{(i-)*params.augnumber+k} = Tr_Data{i}.tf2meanshape{k};
end
end binfeature_lmarks = cell(, size(params.meanshape, ));
num_leafnodes = zeros(, size(params.meanshape, ));
for l = :size(params.meanshape, )
[binfeature_lmarks{l}, num_leafnodes(l)] = lbf_faster(randf{l}, imgs_gray, img_sizes, shapes(l, :, :), tfs2meanshape, params, stage);
end cumnum_leafnodes = [ cumsum(num_leafnodes)];
binfeatures_faster = binfeatures;
for l = :size(params.meanshape, )
binfeatures_faster(:, cumnum_leafnodes(l)+:cumnum_leafnodes(l+)) = binfeature_lmarks{l};
end toc;
%}
feats = cell(size(params.meanshape, ), );
isleaf = cell(size(params.meanshape, ), );
threshs = cell(size(params.meanshape, ), );
cnodes = cell(size(params.meanshape, ), ); % prepare for the derivation of local binary codes
for l = :size(params.meanshape, )
% concatenate all nodes of all random trees
rf = randf(l, :);
num_rfnodes = ;
for t = :params.max_numtrees
num_rfnodes = num_rfnodes + rf{t}.num_nodes;
end % fast implementation
feats{l} = zeros(num_rfnodes, );
isleaf{l} = zeros(num_rfnodes, );
threshs{l} = zeros(num_rfnodes, );
cnodes{l} = zeros(num_rfnodes, ); id_rfnode = ; for t = :params.max_numtrees
feats{l}(id_rfnode:(id_rfnode + rf{t}.num_nodes - ), :) = rf{t}.feat(:rf{t}.num_nodes, :);
isleaf{l}(id_rfnode:(id_rfnode + rf{t}.num_nodes - ), :) = rf{t}.isleafnode(:rf{t}.num_nodes, :);
threshs{l}(id_rfnode:(id_rfnode + rf{t}.num_nodes - ), :) = rf{t}.thresh(:rf{t}.num_nodes, :);
cnodes{l}(id_rfnode:(id_rfnode + rf{t}.num_nodes - ), :) = rf{t}.cnodes(:rf{t}.num_nodes, :); id_rfnode = id_rfnode + rf{t}.num_nodes;
end
end % extract feature for each samples
parfor i = :dbsize*(params.augnumber)
k = floor((i-)/(params.augnumber)) + ;
s = mod(i-, (params.augnumber)) + ;
img_gray = Tr_Data{k}.img_gray;
bbox = Tr_Data{k}.intermediate_bboxes{stage}(s, :);
shapes = Tr_Data{k}.intermediate_shapes{stage};
shape = shapes(:, :, s); % tf2meanshape = Tr_Data{k}.tf2meanshape{s};
meanshape2tf = Tr_Data{k}.meanshape2tf{s};
%{
feats_cell = cell(size(params.meanshape, ), params.max_numtrees); for l = :size(params.meanshape, )
% extract feature for each landmark from the random forest
for t = :params.max_numtrees
% extract feature for each ladnmark from a random tree
bincode = traversetree(randf{l}{t}, img_gray, bbox, shape(l, :), tf2meanshape, params, stage);
feats_cell{l, t} = bincode;
end
end binfeature = zeros(, dims_binfeat);
for l = :size(params.meanshape, )
% extract feature for each landmark from the random forest
offset = sum(ind_bincodes(:l-, end));
for t = :params.max_numtrees
ind_s = ind_bincodes(l, t) + + offset;
ind_e = ind_bincodes(l, t+) + offset;
% extract feature for each ladnmark from a random tree
binfeature(ind_s:ind_e) = feats_cell{l, t};
end
end
%}
% fast implementation binfeature_lmarks = cell(, size(params.meanshape, ));
num_leafnodes = zeros(, size(params.meanshape, ));
for l = :size(params.meanshape, )
% concatenate all nodes of all random trees
[binfeature_lmarks{l}, num_leafnodes(l)]= lbf_fast(randf(l, :), feats{l}, isleaf{l}, threshs{l}, cnodes{l}, img_gray, bbox, shape(l, :), meanshape2tf, params, stage);
end cumnum_leafnodes = [ cumsum(num_leafnodes)]; binfeature_alllmarks = zeros(, cumnum_leafnodes(end));
for l = :size(params.meanshape, )
binfeature_alllmarks(cumnum_leafnodes(l)+:cumnum_leafnodes(l+)) = binfeature_lmarks{l};
end %{
% faster implementation (thinking...)
binfeature_lmarks = cell(, size(params.meanshape, ));
num_leafnodes = zeros(, size(params.meanshape, ));
for l = :size(params.meanshape, )
% concatenate all nodes of all random trees
[binfeature_lmarks{l}, num_leafnodes(l)]= lbf_faster(randf{l}, img_gray, bbox, shape(l, :), tf2meanshape, params, stage);
end cumnum_leafnodes = [ cumsum(num_leafnodes)]; binfeature_alllmarks = zeros(, cumnum_leafnodes(end));
for l = :size(params.meanshape, )
binfeature_alllmarks(cumnum_leafnodes(l)+:cumnum_leafnodes(l+)) = binfeature_lmarks{l};
end
%} binfeatures(i, :) = binfeature_alllmarks;
end end function bincode = traversetree(tree, img_gray, bbox, shape, tf2meanshape, params, stage) currnode = tree.rtnodes{}; bincode = zeros(, tree.num_leafnodes); width = size(img_gray, );
height = size(img_gray, ); while() anglepairs = currnode.feat(:);
radiuspairs = currnode.feat(:); % calculate the relative location under the coordinate of meanshape
pixel_a_x_imgcoord = cos(anglepairs(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*bbox();
pixel_a_y_imgcoord = sin(anglepairs(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*bbox(); pixel_b_x_imgcoord = cos(anglepairs(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*bbox();
pixel_b_y_imgcoord = sin(anglepairs(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*bbox(); % transform the pixels from image coordinate (meanshape) to coordinate of current shape
[pixel_a_x_lmcoord, pixel_a_y_lmcoord] = tforminv(tf2meanshape, pixel_a_x_imgcoord, pixel_a_y_imgcoord); [pixel_b_x_lmcoord, pixel_b_y_lmcoord] = tforminv(tf2meanshape, pixel_b_x_imgcoord, pixel_b_y_imgcoord); pixel_a_x = ceil(pixel_a_x_lmcoord + shape());
pixel_a_y = ceil(pixel_a_y_lmcoord + shape()); pixel_b_x = ceil(pixel_b_x_lmcoord + shape());
pixel_b_y = ceil(pixel_b_y_lmcoord + shape()); pixel_a_x = max(, min(pixel_a_x, width));
pixel_a_y = max(, min(pixel_a_y, height)); pixel_b_x = max(, min(pixel_b_x, width));
pixel_b_y = max(, min(pixel_b_y, height)); f = int16(img_gray(pixel_a_y + (pixel_a_x-)*height)) - int16(img_gray(pixel_b_y + (pixel_b_x-)*height)); if f < (currnode.thresh)
id_chilenode = currnode.cnodes();
currnode = tree.rtnodes{id_chilenode};
else
id_chilenode = currnode.cnodes();
currnode = tree.rtnodes{id_chilenode};
end if isempty(currnode.cnodes)
% if the current node is a leaf node, then stop traversin
bincode(tree.id_leafnodes == id_chilenode) = ;
break;
end
end end function [binfeature, num_leafnodes] = lbf_fast(rf, feats, isleaf, threshs, cnodes, img_gray, bbox, shape, meanshape2tf, params, stage) %{
num_rfnodes = ;
for t = :params.max_numtrees
num_rfnodes = num_rfnodes + rf{t}.num_nodes;
end % fast implementation
feats = zeros(num_rfnodes, );
isleaf = zeros(num_rfnodes, );
threshs = zeros(num_rfnodes, );
cnodes = zeros(num_rfnodes, ); id_rfnode = ; for t = :params.max_numtrees
feats(id_rfnode:(id_rfnode + rf{t}.num_nodes - ), :) = rf{t}.feat(:rf{t}.num_nodes, :);
isleaf(id_rfnode:(id_rfnode + rf{t}.num_nodes - ), :) = rf{t}.isleafnode(:rf{t}.num_nodes, :);
threshs(id_rfnode:(id_rfnode + rf{t}.num_nodes - ), :) = rf{t}.thresh(:rf{t}.num_nodes, :);
cnodes(id_rfnode:(id_rfnode + rf{t}.num_nodes - ), :) = rf{t}.cnodes(:rf{t}.num_nodes, :); id_rfnode = id_rfnode + rf{t}.num_nodes;
end
%} width = size(img_gray, );
height = size(img_gray, ); anglepairs = feats(:, :);
radiuspairs = feats(:, :); % calculate the relative location under the coordinate of meanshape
pixel_a_x_imgcoord = cos(anglepairs(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*bbox();
pixel_a_y_imgcoord = sin(anglepairs(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*bbox(); pixel_b_x_imgcoord = cos(anglepairs(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*bbox();
pixel_b_y_imgcoord = sin(anglepairs(:, )).*radiuspairs(:, )*params.max_raio_radius(stage)*bbox(); % no transformaiton
%{
pixel_a_x_lmcoord = pixel_a_x_imgcoord;
pixel_a_y_lmcoord = pixel_a_y_imgcoord; pixel_b_x_lmcoord = pixel_b_x_imgcoord;
pixel_b_y_lmcoord = pixel_b_y_imgcoord;
%} % transform the pixels from image coordinate (meanshape) to coordinate of current shape [pixel_a_x_lmcoord, pixel_a_y_lmcoord] = transformPointsForward(meanshape2tf, pixel_a_x_imgcoord', pixel_a_y_imgcoord');
pixel_a_x_lmcoord = pixel_a_x_lmcoord';
pixel_a_y_lmcoord = pixel_a_y_lmcoord'; [pixel_b_x_lmcoord, pixel_b_y_lmcoord] = transformPointsForward(meanshape2tf, pixel_b_x_imgcoord', pixel_b_y_imgcoord');
pixel_b_x_lmcoord = pixel_b_x_lmcoord';
pixel_b_y_lmcoord = pixel_b_y_lmcoord'; pixel_a_x = ceil(pixel_a_x_lmcoord + shape());
pixel_a_y = ceil(pixel_a_y_lmcoord + shape()); pixel_b_x = ceil(pixel_b_x_lmcoord + shape());
pixel_b_y = ceil(pixel_b_y_lmcoord + shape()); pixel_a_x = max(, min(pixel_a_x, width));
pixel_a_y = max(, min(pixel_a_y, height)); pixel_b_x = max(, min(pixel_b_x, width));
pixel_b_y = max(, min(pixel_b_y, height)); pdfeats = double(img_gray(pixel_a_y + (pixel_a_x-)*height)) - double(img_gray(pixel_b_y + (pixel_b_x-)*height));
% ./ double(img_gray(pixel_a_y + (pixel_a_x-)*height)) + double(img_gray(pixel_b_y + (pixel_b_x-)*height)); cind = (pdfeats >= threshs) + ; % obtain the indice of child nodes for all nodes, if the current node is
% leaf node, then the indice of its child node is
ind_cnodes = cnodes; % diag(cnodes(:, cind)); binfeature = zeros(, sum(isleaf)); cumnum_nodes = ;
cumnum_leafnodes = ;
for t = :params.max_numtrees
num_nodes = (rf{t}.num_nodes);
id_cnode = ;
while()
if isleaf(id_cnode + cumnum_nodes)
binfeature(cumnum_leafnodes + find(rf{t}.id_leafnodes == id_cnode)) = ;
cumnum_nodes = cumnum_nodes + num_nodes;
cumnum_leafnodes = cumnum_leafnodes + rf{t}.num_leafnodes;
break;
end
id_cnode = ind_cnodes(cumnum_nodes + id_cnode, cind(cumnum_nodes + id_cnode));
end
end num_leafnodes = sum(isleaf); end
之后利用linear regression求解paper中的公式(4)
function [W, Tr_Data] = globalregression(binaryfeatures, Tr_Data, params, stage)
%GLOBALREGRESSION Summary of this function goes here
% Function: implement global regression given binary features and
% groundtruth shape
% Detailed explanation goes here
% Input:
% binaryfeatures: extracted binary features from all samples (N X d )
% Tr_Data: training data
% params: parameters for model
% Output:
% W: regression matrix % organize the groundtruth shape
dbsize = length(Tr_Data);
deltashapes = zeros(dbsize*(params.augnumber), *size(params.meanshape, )); % concatenate -D coordinates into a vector (N X (*L))
dist_pupils = zeros(dbsize*(params.augnumber), );
gtshapes = zeros([size(params.meanshape) dbsize*(params.augnumber)]); % concatenate -D coordinates into a vector (N X (*L))
%数据初始化
%linear regression简单的说就是计算A*W=Y,Y就是deltashapes,A是binaryfeatures,所求即是W
for i = :dbsize*(params.augnumber)
k = floor((i-)/(params.augnumber)) + ;
s = mod(i-, (params.augnumber)) + ; shape_gt = Tr_Data{k}.shape_gt;
if size(shape_gt, ) ==
dist_pupils(i) = norm((mean(shape_gt(:, :)) - mean(shape_gt(:, :))));
elseif size(shape_gt, ) ==
dist_pupils(i) = norm((mean(shape_gt(, :)) - mean(shape_gt(, :))));
elseif size(shape_gt, ) ==
dist_pupils(i) = norm((mean(shape_gt(::, :)) - mean(shape_gt(::, :))));
else
dist_pupils(i) = norm((mean(shape_gt(:, :)) - mean(shape_gt(end - :end, :))));
end
gtshapes(:, :, i) = shape_gt;
delta_shape = Tr_Data{k}.shapes_residual(:, :, s);
deltashapes(i, :) = delta_shape(:)';
end % conduct regression using libliear
% X : binaryfeatures
% Y: gtshapes
%利用工具包liblinear
param = sprintf('-s 12 -p 0 -c %f -q heart_scale', /(size(binaryfeatures, )));%参数
W_liblinear = zeros(size(binaryfeatures, ), size(deltashapes, ));
tic;
parfor o = :size(deltashapes, )
model = train(deltashapes(:, o), sparse(binaryfeatures), param);
W_liblinear(:, o) = model.w';
end
toc;
W = W_liblinear; % Predict the location of lanmarks using current regression matrix deltashapes_bar = binaryfeatures*W;
predshapes = zeros([size(params.meanshape) size(binaryfeatures, )]); % concatenate -D coordinates into a vector (N X (*L)) for i = :dbsize*(params.augnumber)
k = floor((i-)/(params.augnumber)) + ;
s = mod(i-, (params.augnumber)) + ; shapes_stage = Tr_Data{k}.intermediate_shapes{stage};
shape_stage = shapes_stage(:, :, s); deltashapes_bar_xy = reshape(deltashapes_bar(i, :), [uint8(size(deltashapes_bar, )/) ]); % transform above delta shape into the coordinate of current intermmediate shape
% delta_shape_interm_coord = [deltashapes_bar_x(i, :)', deltashapes_bar_y(i, :)'];
[u, v] = transformPointsForward(Tr_Data{k}.meanshape2tf{s}, deltashapes_bar_xy(:, )', deltashapes_bar_xy(:, 2)');
delta_shape_interm_coord = [u', v']; % derive the delta shape in the coordinate system of meanshape
delta_shape_interm_coord = bsxfun(@times, delta_shape_interm_coord, Tr_Data{k}.intermediate_bboxes{stage}(s, :)); shape_newstage = shape_stage + delta_shape_interm_coord;
predshapes(:, :, i) = shape_newstage;
%得到当前stage 的shape
Tr_Data{k}.intermediate_shapes{stage+}(:, :, s) = shape_newstage; % update transformation of current intermediate shape to meanshape,目的是为了之后stage的训练
Tr_Data{k}.intermediate_bboxes{stage+}(s, :) = getbbox(Tr_Data{k}.intermediate_shapes{stage+}(:, :, s)); meanshape_resize = (resetshape(Tr_Data{k}.intermediate_bboxes{stage+}(s, :), params.meanshape)); shape_residual = bsxfun(@rdivide, Tr_Data{k}.shape_gt - shape_newstage, Tr_Data{k}.intermediate_bboxes{stage+}(s, :)); Tr_Data{k}.tf2meanshape{s} = fitgeotrans(bsxfun(@minus, Tr_Data{k}.intermediate_shapes{stage+}(:end,:, s), mean(Tr_Data{k}.intermediate_shapes{stage+}(:end,:, s))), ...
bsxfun(@minus, meanshape_resize(:end, :), mean(meanshape_resize(:end, :))), 'NonreflectiveSimilarity'); Tr_Data{k}.meanshape2tf{s} = fitgeotrans(bsxfun(@minus, meanshape_resize(:end, :), mean(meanshape_resize(:end, :))), ...
bsxfun(@minus, Tr_Data{k}.intermediate_shapes{stage+}(:end,:, s), mean(Tr_Data{k}.intermediate_shapes{stage+}(:end,:, s))), 'NonreflectiveSimilarity'); [u, v] = transformPointsForward(Tr_Data{k}.tf2meanshape{s}, shape_residual(:, ), shape_residual(:, ));
Tr_Data{k}.shapes_residual(:, , s) = u';
Tr_Data{k}.shapes_residual(:, , s) = v';
%{
drawshapes(Tr_Data{k}.img_gray, [Tr_Data{k}.shape_gt shape_stage shape_newstage]);
hold off;
drawnow;
w = waitforbuttonpress;
%}
end
%计算误差
error_per_image = compute_error(gtshapes, predshapes); MRSE = *mean(error_per_image);
MRSE_display = sprintf('Mean Root Square Error for %d Test Samples: %f', length(error_per_image), MRSE);
disp(MRSE_display); end function [ error_per_image ] = compute_error( ground_truth_all, detected_points_all )
%compute_error
% compute the average point-to-point Euclidean error normalized by the
% inter-ocular distance (measured as the Euclidean distance between the
% outer corners of the eyes)
%
% Inputs:
% grounth_truth_all, size: num_of_points x x num_of_images
% detected_points_all, size: num_of_points x x num_of_images
% Output:
% error_per_image, size: num_of_images x num_of_images = size(ground_truth_all,);
num_of_points = size(ground_truth_all,); error_per_image = zeros(num_of_images,); for i =:num_of_images
detected_points = detected_points_all(:,:,i);
ground_truth_points = ground_truth_all(:,:,i);
if num_of_points ==
interocular_distance = norm(mean(ground_truth_points(:,:))-mean(ground_truth_points(:,:))); % norm((mean(shape_gt(:, :)) - mean(shape_gt(:, :))));
elseif num_of_points ==
interocular_distance = norm(ground_truth_points(,:) - ground_truth_points(,:));
elseif num_of_points ==
interocular_distance = norm(mean(ground_truth_points(::,:))-mean(ground_truth_points(::,:)));
else
interocular_distance = norm(mean(ground_truth_points(:,:))-mean(ground_truth_points(end - :end,:)));
end sum=;
for j=:num_of_points
sum = sum+norm(detected_points(j,:)-ground_truth_points(j,:));
end
error_per_image(i) = sum/(num_of_points*interocular_distance);
end end
test流程与train的流程类似,详细code可见test_model.m