# kNN分类算法C++与Python实现

2015年9月28日 15756点热度 26人点赞 2条评论

[author]博主前一段时间一直在做嵌入式开发学习,苦于一些廉价传感器的差质量，我的项目一直停滞在demo阶段。遇到了较大的瓶颈，我决定暂时放一放，开始直接入手信息处理层。暑期的数学建模中，我也感觉自己对于数据的处理能力还欠缺，遂开始恶补基础。开始《线性代数》《离散数学》《集体编程的智慧》《廖雪峰的python2.7教程》《机器学习实战》《Ensemnle Methods Foundations and Algroithns》并行学习。[/author]

k-Nearest Heighbor

[newtext title='kNN']

from numpy import *
import operator

def createDataSet():
group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group, labels

def classify0(inX,dataSet,labels,k):
dataSetSize=dataSet.shape[0]
diffMat=tile(inX,(dataSetSize,1)) - dataSet
sqDiffMat=diffMat**2
sqDistances=sqDiffMat.sum(axis=1)
distances=sqDistances**0.5
sortedDistIndicies=distances.argsort()
classCount={}
for i in range(k):
voteIlabel=labels[sortedDistIndicies[i]]
classCount[voteIlabel]=classCount.get(voteIlabel,0)+1
sortedClassCount=sorted(classCount.iteritems(),
key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0] [0]

[/newtext]

[newtext title='输入如下：']

Python 2.7.9 (default, Dec 10 2014, 12:24:55) [MSC v.1500 32 bit (Intel)] on win32
>>> import kNN
>>> group,labels=kNN.createDataSet()
>>> group
array([[ 1. , 1.1],
[ 1. , 1. ],
[ 0. , 0. ],
[ 0. , 0.1]])
>>> labels
['A', 'A', 'B', 'B']
>>> kNN.classify0([0.5,0.1],group,labels,3)
'B'
>>> kNN.classify0([0.5,0.5],group,labels,2)
'A'
>>> kNN.classify0([0.5,0.5],group,labels,3)
'B'
>>> [/newtext]

[text]

#include<iostream>
#include<map>
#include<vector>
#include<stdio.h>
#include<cmath>
#include<cstdlib>
#include<algorithm>
#include<fstream>

using namespace std;

typedef char tLabel;
typedef double tData;
typedef pair<int,double> PAIR;
const int colLen = 2;
const int rowLen = 12;
ifstream fin;
ofstream fout;

class KNN
{
private:
tData dataSet[rowLen][colLen];
tLabel labels[rowLen];
tData testData[colLen];
int k;
map<int,double> map_index_dis;
map<tLabel,int> map_label_freq;
double get_distance(tData *d1,tData *d2);
public:

KNN(int k);

void get_all_distance();

void get_max_freq_label();

struct CmpByValue
{
bool operator() (const PAIR& lhs,const PAIR& rhs)
{
return lhs.second < rhs.second;
}
};

};

KNN::KNN(int k)
{
this->k = k;

fin.open("data.txt");

if(!fin)
{
cout<<"can not open the file data.txt"<<endl;
exit(1);
}

/* input the dataSet */
for(int i=0;i<rowLen;i++)
{
for(int j=0;j<colLen;j++)
{
fin>>dataSet[i][j];
}
fin>>labels[i];
}

cout<<"please input the test data :"<<endl;
/* inuput the test data */
for(int i=0;i<colLen;i++)
cin>>testData[i];

}

/*
* calculate the distance between test data and dataSet[i]
*/
double KNN:: get_distance(tData *d1,tData *d2)
{
double sum = 0;
for(int i=0;i<colLen;i++)
{
sum += pow( (d1[i]-d2[i]) , 2 );
}

// cout<<"the sum is = "<<sum<<endl;
return sqrt(sum);
}

/*
* calculate all the distance between test data and each training data
*/
void KNN:: get_all_distance()
{
double distance;
int i;
for(i=0;i<rowLen;i++)
{
distance = get_distance(dataSet[i],testData);
//<key,value> => <i,distance>
map_index_dis[i] = distance;
}

//traverse the map to print the index and distance
map<int,double>::const_iterator it = map_index_dis.begin();
while(it!=map_index_dis.end())
{
cout<<"index = "<<it->first<<" distance = "<<it->second<<endl;
it++;
}
}

/*
* check which label the test data belongs to to classify the test data
*/
void KNN:: get_max_freq_label()
{
//transform the map_index_dis to vec_index_dis
vector<PAIR> vec_index_dis( map_index_dis.begin(),map_index_dis.end() );
//sort the vec_index_dis by distance from low to high to get the nearest data
sort(vec_index_dis.begin(),vec_index_dis.end(),CmpByValue());

for(int i=0;i<k;i++)
{
cout<<"the index = "<<vec_index_dis[i].first<<" the distance = "<<vec_index_dis[i].second<<" the label = "<<labels[vec_index_dis[i].first]<<" the coordinate ( "<<dataSet[ vec_index_dis[i].first ][0]<<","<<dataSet[ vec_index_dis[i].first ][1]<<" )"<<endl;
//calculate the count of each label
map_label_freq[ labels[ vec_index_dis[i].first ] ]++;
}

map<tLabel,int>::const_iterator map_it = map_label_freq.begin();
tLabel label;
int max_freq = 0;
//find the most frequent label
while( map_it != map_label_freq.end() )
{
if( map_it->second > max_freq )
{
max_freq = map_it->second;
label = map_it->first;
}
map_it++;
}
cout<<"The test data belongs to the "<<label<<" label"<<endl;
}

int main()
{
int k ;
cout<<"please input the k value : "<<endl;
cin>>k;
KNN knn(k);
knn.get_all_distance();
knn.get_max_freq_label();
system("pause");
return 0;
}[/text]

Dong Wang

I will work as a PhD student of TU Graz in Austria. My research interests include Embedded/Edge AI, federated learning, computer vision, and IoT.

### 文章评论

• AH主机

博客速度很快啊。不过KNN算法局限也蛮大。

2015年12月20日
• 南国羽

@AH主机 哈哈，我还在学习当中。 :grin:

2015年12月20日