1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
| import numpy as np import operator import matplotlib.pyplot as plt
class KNN: def __init__(self,samples,s_labels,k=3): self.samples = samples self.s_labels = s_labels self.k = k
def distance(self,trains,samples,flag): if flag != 2: return np.sqrt(np.sum(np.square(trains-samples))) else: return np.sum(np.fabs(trains-samples))
def vote(self,s_labels): vote_dict = {} for i in s_labels: if i not in vote_dict.keys(): vote_dict[i] = 1 else: vote_dict[i] += 1 sorted_vote_dict = sorted(vote_dict.items(), key=operator.itemgetter(1), reverse=True) return sorted_vote_dict[0][0]
def TopKPredict(self,trains,flag=1): t_labels = [] for i in range(len(trains)): dist_arr = [self.distance(trains[i],self.samples[j],flag) for j in range(len(self.samples))] sorted_index = np.argsort(dist_arr) top_k_index = sorted_index[:self.k] t_labels.append(self.vote(self.s_labels[top_k_index]))
return np.array(t_labels)
class Picture: def __init__(self,samples,s_labels,trains,t_labels): s_colors = [] t_colors = [] for i in s_labels: if i == 1: s_colors.append('b') else: s_colors.append('y') for j in t_labels: if j == 1: t_colors.append('b') else: t_colors.append('y') plt.scatter(samples[:,0], samples[:,1], c=s_colors, marker='.') plt.scatter(trains[:,0], trains[:,1], c=t_colors, marker='x')
def Show(self): plt.show()
def createdata(): datasize_1 = 50 datasize_2 = 60 trainsize = 5 samples_1 = np.random.normal(loc=10,scale=2,size=(datasize_1,2)) s_labels_1 = np.array([1 for i in range(datasize_1)]) samples_2 = np.random.normal(loc=5,scale=2,size=(datasize_2,2)) s_labels_2 = np.array([-1 for i in range(datasize_2)]) samples = np.concatenate((samples_1,samples_2),axis=0) s_labels = np.concatenate((s_labels_1,s_labels_2),axis=0) trains = np.random.randint(2,12,size=[trainsize,2]) return samples,s_labels,trains
if __name__ == '__main__':
samples,s_labels,trains=createdata() knn = KNN(samples,s_labels,k=1) t_labels = knn.TopKPredict(trains,flag=1) pic = Picture(samples,s_labels,trains,t_labels) pic.Show()
|