作为一个新的 R 用户,我很好奇当我们输入一个函数时 R 在做什么。例如,我在类包中使用 knn 函数。我需要做的就是输入 knn 并通过训练和测试数据集定义。然后我得到的是我的测试数据的预测类。但是,我很好奇是否有办法查看 knn 中的实际方程/公式。我浏览了一些 knn 引用资料,但仍然对 R 正在做什么感到好奇!是否有可能找到这样的信息?
任何帮助是极大的赞赏!!!
最佳答案
好吧,您可以做的第一件事就是简单地输入函数的名称,在许多情况下,它会立即为您提供源代码。例如:
> knn
function (train, test, cl, k = 1, l = 0, prob = FALSE, use.all = TRUE)
{
train <- as.matrix(train)
if (is.null(dim(test)))
dim(test) <- c(1, length(test))
test <- as.matrix(test)
if (any(is.na(train)) || any(is.na(test)) || any(is.na(cl)))
stop("no missing values are allowed")
p <- ncol(train)
ntr <- nrow(train)
if (length(cl) != ntr)
stop("'train' and 'class' have different lengths")
if (ntr < k) {
warning(gettextf("k = %d exceeds number %d of patterns",
k, ntr), domain = NA)
k <- ntr
}
if (k < 1)
stop(gettextf("k = %d must be at least 1", k), domain = NA)
nte <- nrow(test)
if (ncol(test) != p)
stop("dims of 'test' and 'train' differ")
clf <- as.factor(cl)
nc <- max(unclass(clf))
Z <- .C(VR_knn, as.integer(k), as.integer(l), as.integer(ntr),
as.integer(nte), as.integer(p), as.double(train), as.integer(unclass(clf)),
as.double(test), res = integer(nte), pr = double(nte),
integer(nc + 1), as.integer(nc), as.integer(FALSE), as.integer(use.all))
res <- factor(Z$res, levels = seq_along(levels(clf)), labels = levels(clf))
if (prob)
attr(res, "prob") <- Z$pr
res
}
<bytecode: 0x393c650>
<environment: namespace:class>
>
在这种情况下,您可以看到真正的工作是通过对 VR_knn 的外部调用来完成的。如果你想深入挖掘,你可以去 http://cran.r-project.org/web/packages/class/index.html ,并下载这个包的源代码。如果您下载并解压源代码,您将找到一个名为“src”的文件夹,其中包含 C 代码,您可以查看该文件夹并找到该函数的源代码:
void
VR_knn(Sint *kin, Sint *lin, Sint *pntr, Sint *pnte, Sint *p,
double *train, Sint *class, double *test, Sint *res, double *pr,
Sint *votes, Sint *nc, Sint *cv, Sint *use_all)
{
int i, index, j, k, k1, kinit = *kin, kn, l = *lin, mm, npat, ntie,
ntr = *pntr, nte = *pnte, extras;
int pos[MAX_TIES], nclass[MAX_TIES];
int j1, j2, needed, t;
double dist, tmp, nndist[MAX_TIES];
RANDIN;
/*
Use a 'fence' in the (k+1)st position to avoid special cases.
Simple insertion sort will suffice since k will be small.
*/
for (npat = 0; npat < nte; npat++) {
kn = kinit;
for (k = 0; k < kn; k++)
nndist[k] = 0.99 * DOUBLE_XMAX;
for (j = 0; j < ntr; j++) {
if ((*cv > 0) && (j == npat))
continue;
dist = 0.0;
for (k = 0; k < *p; k++) {
tmp = test[npat + k * nte] - train[j + k * ntr];
dist += tmp * tmp;
}
/* Use 'fuzz' since distance computed could depend on order of coordinates */
if (dist <= nndist[kinit - 1] * (1 + EPS))
for (k = 0; k <= kn; k++)
if (dist < nndist[k]) {
for (k1 = kn; k1 > k; k1--) {
nndist[k1] = nndist[k1 - 1];
pos[k1] = pos[k1 - 1];
}
nndist[k] = dist;
pos[k] = j;
/* Keep an extra distance if the largest current one ties with current kth */
if (nndist[kn] <= nndist[kinit - 1])
if (++kn == MAX_TIES - 1)
error("too many ties in knn");
break;
}
nndist[kn] = 0.99 * DOUBLE_XMAX;
}
for (j = 0; j <= *nc; j++)
votes[j] = 0;
if (*use_all) {
for (j = 0; j < kinit; j++)
votes[class[pos[j]]]++;
extras = 0;
for (j = kinit; j < kn; j++) {
if (nndist[j] > nndist[kinit - 1] * (1 + EPS))
break;
extras++;
votes[class[pos[j]]]++;
}
} else { /* break ties at random */
extras = 0;
for (j = 0; j < kinit; j++) {
if (nndist[j] >= nndist[kinit - 1] * (1 - EPS))
break;
votes[class[pos[j]]]++;
}
j1 = j;
if (j1 == kinit - 1) { /* no ties for largest */
votes[class[pos[j1]]]++;
} else {
/* Use reservoir sampling to choose amongst the tied distances */
j1 = j;
needed = kinit - j1;
for (j = 0; j < needed; j++)
nclass[j] = class[pos[j1 + j]];
t = needed;
for (j = j1 + needed; j < kn; j++) {
if (nndist[j] > nndist[kinit - 1] * (1 + EPS))
break;
if (++t * UNIF < needed) {
j2 = j1 + (int) (UNIF * needed);
nclass[j2] = class[pos[j]];
}
}
for (j = 0; j < needed; j++)
votes[nclass[j]]++;
}
}
/* Use reservoir sampling to choose amongst the tied votes */
ntie = 1;
if (l > 0)
mm = l - 1 + extras;
else
mm = 0;
index = 0;
for (i = 1; i <= *nc; i++)
if (votes[i] > mm) {
ntie = 1;
index = i;
mm = votes[i];
} else if (votes[i] == mm && votes[i] >= l) {
if (++ntie * UNIF < 1.0)
index = i;
}
res[npat] = index;
pr[npat] = (double) mm / (kinit + extras);
}
RANDOUT;
}
关于r - 如何知道 r 在幕后做什么,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/19192407/