CV_BOOST_IMPL
CvClassifier* cvCreateStumpClassifier( CvMat* trainData, //训练样本的数据,包含图像大小。数量,类别,权重等
int flags, //0表示矩阵的列是训练样本。1表示行是训练样本
CvMat* trainClasses, //表示训练样本的类别矩阵
CvMat* /*typeMask*/,
CvMat* missedMeasurementsMask,
CvMat* compIdx, //特征序列
CvMat* sampleIdx, //训练样本排序后的寻列
CvMat* weights, //训练样本的权重矩阵
CvClassifierTrainParams* trainParams//训练參数
)
{
CvStumpClassifier* stump = NULL;
int m = 0; /* 样本数量number of samples */
int n = 0; /* 特征数量number of components */
uchar* data = NULL;
int cstep = 0; //一个特征(component)在水平方向上的长度,即是水平方向上所占字节数
int sstep = 0; //一个样本(sample)在水平方向上的长度,即是水平方向上所占字节数(这两个參数我看了非常长时间才看出来)
uchar* ydata = NULL;
int ystep = 0;
uchar* idxdata = NULL;
int idxstep = 0;
int l = 0; /* number of indices */
uchar* wdata = NULL;
int wstep = 0; int* idx = NULL;
int i = 0; float sumw = FLT_MAX;
float sumwy = FLT_MAX;
float sumwyy = FLT_MAX; CV_Assert( trainData != NULL );
CV_Assert( CV_MAT_TYPE( trainData->type ) == CV_32FC1 );
CV_Assert( trainClasses != NULL );
CV_Assert( CV_MAT_TYPE( trainClasses->type ) == CV_32FC1 );
CV_Assert( missedMeasurementsMask == NULL );
CV_Assert( compIdx == NULL );
CV_Assert( weights != NULL );
CV_Assert( CV_MAT_TYPE( weights->type ) == CV_32FC1 );
CV_Assert( trainParams != NULL ); data = trainData->data.ptr;
if( CV_IS_ROW_SAMPLE( flags ) )//当traindata训练样本是按行排列,一行表示一个训练样本在不同特征下的特征值
{
cstep = CV_ELEM_SIZE( trainData->type );
sstep = trainData->step;
m = trainData->rows; //行数表示样本数量
n = trainData->cols; //列数表示特征的个数
}
else //当traindata训练样本是按列排列,一列表示一个训练样本在不同特征下的特征值
{
sstep = CV_ELEM_SIZE( trainData->type );
cstep = trainData->step;
m = trainData->cols; //列数表示样本的数量
n = trainData->rows; //行数表示特征的个数
} ydata = trainClasses->data.ptr;
if( trainClasses->rows == 1 )
{
assert( trainClasses->cols == m );
ystep = CV_ELEM_SIZE( trainClasses->type );
}
else
{
assert( trainClasses->rows == m );
ystep = trainClasses->step;
} wdata = weights->data.ptr;
if( weights->rows == 1 )
{
assert( weights->cols == m );
wstep = CV_ELEM_SIZE( weights->type );
}
else
{
assert( weights->rows == m );
wstep = weights->step;
} l = m;
if( sampleIdx != NULL )
{
assert( CV_MAT_TYPE( sampleIdx->type ) == CV_32FC1 ); idxdata = sampleIdx->data.ptr;
if( sampleIdx->rows == 1 )
{
l = sampleIdx->cols;
idxstep = CV_ELEM_SIZE( sampleIdx->type );
}
else
{
l = sampleIdx->rows;
idxstep = sampleIdx->step;
}
assert( l <= m );
} idx = (int*) cvAlloc( l * sizeof( int ) );//为idx分配内存
stump = (CvStumpClassifier*) cvAlloc( sizeof( CvStumpClassifier) );//为stump分配内存 /* START */
memset( (void*) stump, 0, sizeof( CvStumpClassifier ) );//stump内存初始化为零 stump->eval = cvEvalStumpClassifier;
stump->tune = NULL;
stump->save = NULL;
stump->release = cvReleaseStumpClassifier; stump->lerror = FLT_MAX;
stump->rerror = FLT_MAX;
stump->left = 0.0F;
stump->right = 0.0F; /* copy indices */
if( sampleIdx != NULL )
{
for( i = 0; i < l; i++ )
{
idx[i] = (int) *((float*) (idxdata + i*idxstep));
}
}
else
{
for( i = 0; i < l; i++ )
{
idx[i] = i;
}
} for( i = 0; i < n; i++ ) //遍历全部特征
{
CvValArray va; va.data = data + i * ((size_t) cstep);
va.step = sstep;
icvSortIndexedValArray_32s( idx, l, &va );//对数据进行排序
if( findStumpThreshold_32s[(int) ((CvStumpTrainParams*) trainParams)->error]
//该error是计算不纯度的方式,包含四种,各自是:熵不纯度,吉尼不纯度,错分类不纯度,和最小二乘不纯度
( data + i * ((size_t) cstep), sstep,
wdata, wstep, ydata, ystep, (uchar*) idx, sizeof( int ), l,
&(stump->lerror), &(stump->rerror),
&(stump->threshold), &(stump->left), &(stump->right),
&sumw, &sumwy, &sumwyy ) ) //寻找树桩分类器的阈值
{
stump->compidx = i;
}
} /* for each component */ /* END */ cvFree( &idx ); if( ((CvStumpTrainParams*) trainParams)->type == CV_CLASSIFICATION_CLASS )
{
stump->left = 2.0F * (stump->left >= 0.5F) - 1.0F;
stump->right = 2.0F * (stump->right >= 0.5F) - 1.0F;
} return (CvClassifier*) stump;
}