我正在尝试使用k-d树编写不使用任何库的K-NN分类代码。到目前为止,我已经能够为k-d树编写代码,但是我似乎无法理解一旦从训练集中形成树后如何找到k个最近的邻居。
k-d树代码:

#include<bits/stdc++.h>
using namespace std;

const int k = 2; // 2-dimensions

struct Node
{
    int point[k];
    Node *left, *right;
};

struct Node* newNode(int arr[])
{
    struct Node* temp = new Node;

    for (int i=0; i<k; i++)
    temp->point[i] = arr[i];

    temp->left = temp->right = NULL;
    return temp;
}
// Inserts a new node and returns root of modified tree
Node *insertRec(Node *root, int point[], unsigned depth)
{
    if (root == NULL)
    return newNode(point);
    unsigned cd = depth % k;
    if (point[cd] < (root->point[cd]))
        root->left = insertRec(root->left, point, depth + 1);
    else
        root->right = insertRec(root->right, point, depth + 1);

    return root;
}
// Function to insert a new point with given point and return new root
Node* insert(Node *root, int point[])
{
    return insertRec(root, point, 0);
}

// driver
int main()
{
    struct Node *root = NULL;
    int points[][k] = {{3, 6}, {17, 15}, {13, 15}, {6, 12},
                    {9, 1}, {2, 7}, {10, 19}};
    int n = sizeof(points)/sizeof(points[0]);
    for (int i=0; i<n; i++)
    root = insert(root, points[i]);
    return 0;
}

最佳答案

首先不要使用<bits/stdc++.h>。错了

要找到k个最接近的元素,您需要以首先遍历最接近的元素的方式遍历树。然后,如果您没有足够的元素,请遍历更远的元素。

我不会在这里写代码,只写伪代码(因为我已经构建了一个a long time ago):

list l; # list of the elements, sorted by distance
heap p; # heap of nodes to traverse, sorted by distance

p.push(root)
while (!p.empty())
{
    node = p.pop(); # Get a new node
    d = distance(point, node); # compute the closest distance from the point to the node
    if(l.empty() or distance(point, l.back()) > d)
    {
        add(node->left); # iteration on subnodes
        add(node->right);
        l.push(points); # Add points from the current node
    }
    l.pop_elements(k); # pop elements to keep only k
}

关于c++ - 如何从k-d树实现K-NN分类?,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/54632821/

10-10 15:52