算法一
首选,我们利用Python,按照上一节介绍的感知机算法基本思想,实现感知算法的原始形式和对偶形式。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
#利用Python实现感知机算法的原始形式 # -*- encoding:utf-8 -*- """ Created on 2017.6.7 @author: Ada """ import numpy as np import matplotlib.pyplot as plt #1、创建数据集 def createdata(): samples = np.array([[ 3 , - 3 ],[ 4 , - 3 ],[ 1 , 1 ],[ 1 , 2 ]]) labels = [ - 1 , - 1 , 1 , 1 ] return samples,labels #训练感知机模型 class Perceptron: def __init__( self ,x,y,a = 1 ): self .x = x self .y = y self .w = np.zeros((x.shape[ 1 ], 1 )) #初始化权重,w1,w2均为0 self .b = 0 self .a = 1 #学习率 self .numsamples = self .x.shape[ 0 ] self .numfeatures = self .x.shape[ 1 ] def sign( self ,w,b,x): y = np.dot(x,w) + b return int (y) def update( self ,label_i,data_i): tmp = label_i * self .a * data_i tmp = tmp.reshape( self .w.shape) #更新w和b self .w = tmp + self .w self .b = self .b + label_i * self .a def train( self ): isFind = False while not isFind: count = 0 for i in range ( self .numsamples): tmpY = self .sign( self .w, self .b, self .x[i,:]) if tmpY * self .y[i]< = 0 : #如果是一个误分类实例点 print '误分类点为:' , self .x[i,:], '此时的w和b为:' , self .w, self .b count + = 1 self .update( self .y[i], self .x[i,:]) if count = = 0 : print '最终训练得到的w和b为:' , self .w, self .b isFind = True return self .w, self .b #画图描绘 class Picture: def __init__( self ,data,w,b): self .b = b self .w = w plt.figure( 1 ) plt.title( 'Perceptron Learning Algorithm' ,size = 14 ) plt.xlabel( 'x0-axis' ,size = 14 ) plt.ylabel( 'x1-axis' ,size = 14 ) xData = np.linspace( 0 , 5 , 100 ) yData = self .expression(xData) plt.plot(xData,yData,color = 'r' ,label = 'sample data' ) plt.scatter(data[ 0 ][ 0 ],data[ 0 ][ 1 ],s = 50 ) plt.scatter(data[ 1 ][ 0 ],data[ 1 ][ 1 ],s = 50 ) plt.scatter(data[ 2 ][ 0 ],data[ 2 ][ 1 ],s = 50 ,marker = 'x' ) plt.scatter(data[ 3 ][ 0 ],data[ 3 ][ 1 ],s = 50 ,marker = 'x' ) plt.savefig( '2d.png' ,dpi = 75 ) def expression( self ,x): y = ( - self .b - self .w[ 0 ] * x) / self .w[ 1 ] #注意在此,把x0,x1当做两个坐标轴,把x1当做自变量,x2为因变量 return y def Show( self ): plt.show() if __name__ = = '__main__' : samples,labels = createdata() myperceptron = Perceptron(x = samples,y = labels) weights,bias = myperceptron.train() Picture = Picture(samples,weights,bias) Picture.Show() |
实验结果:
误分类点为: [ 3 -3] 此时的w和b为: [[ 0.]
[ 0.]] 0
误分类点为: [1 1] 此时的w和b为: [[-3.]
[ 3.]] -1
最终训练得到的w和b为: [[-2.]
[ 4.]] 0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
#利用Python实现感知机算法的对偶形式 # -*- encoding:utf-8 -*- """ Created on 2017.6.7 @author: Ada """ import numpy as np import matplotlib.pyplot as plt #1、创建数据集 def createdata(): samples = np.array([[ 3 , - 3 ],[ 4 , - 3 ],[ 1 , 1 ],[ 1 , 2 ]]) labels = np.array([ - 1 , - 1 , 1 , 1 ]) return samples,labels #训练感知机模型 class Perceptron: def __init__( self ,x,y,a = 1 ): self .x = x self .y = y self .w = np.zeros(( 1 ,x.shape[ 0 ])) self .b = 0 self .a = 1 #学习率 self .numsamples = self .x.shape[ 0 ] self .numfeatures = self .x.shape[ 1 ] self .gMatrix = self .cal_gram( self .x) def cal_gram( self ,x): gMatrix = np.zeros(( self .numsamples, self .numsamples)) for i in xrange ( self .numsamples): for j in xrange ( self .numsamples): gMatrix[i][j] = np.dot( self .x[i,:], self .x[j,:]) return gMatrix def sign( self ,w,b,key): y = np.dot(w * self .y, self .gMatrix[:,key]) + b return int (y) def update( self ,i): self .w[i,] = self .w[i,] + self .a self .b = self .b + self .y[i] * self .a def cal_w( self ): w = np.dot( self .w * self .y, self .x) return w def train( self ): isFind = False while not isFind: count = 0 for i in range ( self .numsamples): tmpY = self .sign( self .w, self .b,i) if tmpY * self .y[i]< = 0 : #如果是一个误分类实例点 print '误分类点为:' , self .x[i,:], '此时的w和b为:' , self .cal_w(), ',' , self .b count + = 1 self .update(i) if count = = 0 : print '最终训练得到的w和b为:' , self .cal_w(), ',' , self .b isFind = True weights = self .cal_w() return weights, self .b #画图描绘 class Picture: def __init__( self ,data,w,b): self .b = b self .w = w plt.figure( 1 ) plt.title( 'Perceptron Learning Algorithm' ,size = 14 ) plt.xlabel( 'x0-axis' ,size = 14 ) plt.ylabel( 'x1-axis' ,size = 14 ) xData = np.linspace( 0 , 5 , 100 ) yData = self .expression(xData) plt.plot(xData,yData,color = 'r' ,label = 'sample data' ) plt.scatter(data[ 0 ][ 0 ],data[ 0 ][ 1 ],s = 50 ) plt.scatter(data[ 1 ][ 0 ],data[ 1 ][ 1 ],s = 50 ) plt.scatter(data[ 2 ][ 0 ],data[ 2 ][ 1 ],s = 50 ,marker = 'x' ) plt.scatter(data[ 3 ][ 0 ],data[ 3 ][ 1 ],s = 50 ,marker = 'x' ) plt.savefig( '2d.png' ,dpi = 75 ) def expression( self ,x): y = ( - self .b - self .w[:, 0 ] * x) / self .w[:, 1 ] return y def Show( self ): plt.show() if __name__ = = '__main__' : samples,labels = createdata() myperceptron = Perceptron(x = samples,y = labels) weights,bias = myperceptron.train() Picture = Picture(samples,weights,bias) Picture.Show() |
实验结果:
误分类点为: [ 3 -3] 此时的w和b为: [[ 0. 0.]] , 0
最终训练得到的w和b为: [[-5. 9.]] , -1
通过以上实验结果可以看出,两种方法的结果是不同的,一方面,是由于两种优化方法不同;二是,因为在选择实例点的顺序上有关系。但是无论用哪种方法,都可以找到一条直线,把数据完全分开。实际上,就算使用同一算法,如果改变初始值w0,b0,或者改变选择实例点的顺序,也可以使得结果不同。
算法二
Python的机器学习包sklearn中也包含了感知机学习算法,我们可以直接调用,因为感知机算法属于线性模型,所以从sklearn.linear_model中import下面给出例子。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
# -*- encoding:utf-8 -*- """ 利用sklearn中的感知机学习算法进行实验 Created on 2017.6.7 @author: Ada """ import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import Perceptron #创建数据,直接定义数据列表 def creatdata1(): samples = np.array([[ 3 , - 3 ],[ 4 , - 3 ],[ 1 , 1 ],[ 1 , 2 ]]) labels = np.array([ - 1 , - 1 , 1 , 1 ]) return samples,labels def MyPerceptron(samples,labels): #定义感知机 clf = Perceptron(fit_intercept = True ,n_iter = 30 ,shuffle = False ) #训练感知机 clf.fit(samples,labels) #得到权重矩阵 weigths = clf.coef_ #得到截距bisa bias = clf.intercept_ return weigths,bias #画图描绘 class Picture: def __init__( self ,data,w,b): self .b = b self .w = w plt.figure( 1 ) plt.title( 'Perceptron Learning Algorithm' ,size = 14 ) plt.xlabel( 'x0-axis' ,size = 14 ) plt.ylabel( 'x1-axis' ,size = 14 ) xData = np.linspace( 0 , 5 , 100 ) yData = self .expression(xData) plt.plot(xData,yData,color = 'r' ,label = 'sample data' ) plt.scatter(data[ 0 ][ 0 ],data[ 0 ][ 1 ],s = 50 ) plt.scatter(data[ 1 ][ 0 ],data[ 1 ][ 1 ],s = 50 ) plt.scatter(data[ 2 ][ 0 ],data[ 2 ][ 1 ],s = 50 ,marker = 'x' ) plt.scatter(data[ 3 ][ 0 ],data[ 3 ][ 1 ],s = 50 ,marker = 'x' ) plt.savefig( '3d.png' ,dpi = 75 ) def expression( self ,x): y = ( - self .b - self .w[:, 0 ] * x) / self .w[:, 1 ] return y def Show( self ): plt.show() if __name__ = = '__main__' : samples,labels = creatdata1() weights,bias = MyPerceptron(samples,labels) print '最终训练得到的w和b为:' ,weights, ',' ,bias Picture = Picture(samples,weights,bias) Picture.Show() |
实验结果:
最终训练得到的w和b为: [[-2. 4.]] , [ 0.]
算法三
利用sklearn包中的感知器算法,并进行测试与评估
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
# -*- encoding:utf-8 -*- ''' 利用sklearn中的的Perceptron进行实验,并进行测试 ''' from sklearn.datasets import make_classification from sklearn.linear_model import Perceptron from sklearn.cross_validation import train_test_split from matplotlib import pyplot as plt import numpy as np #利用算法进行创建数据集 def creatdata(): x,y = make_classification(n_samples = 1000 , n_features = 2 ,n_redundant = 0 ,n_informative = 1 ,n_clusters_per_class = 1 ) ''' #n_samples:生成样本的数量 #n_features=2:生成样本的特征数,特征数=n_informative() + n_redundant + n_repeated #n_informative:多信息特征的个数 #n_redundant:冗余信息,informative特征的随机线性组合 #n_clusters_per_class :某一个类别是由几个cluster构成的 make_calssification默认生成二分类的样本,上面的代码中,x代表生成的样本空间(特征空间) y代表了生成的样本类别,使用1和0分别表示正例和反例 y=[0 0 0 1 0 1 1 1... 1 0 0 1 1 0] ''' return x,y if __name__ = = '__main__' : x,y = creatdata() #将生成的样本分为训练数据和测试数据,并将其中的正例和反例分开 x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2 ,random_state = 0 ) #正例和反例 positive_x1 = [x[i, 0 ] for i in range ( len (y)) if y[i] = = 1 ] positive_x2 = [x[i, 1 ] for i in range ( len (y)) if y[i] = = 1 ] negetive_x1 = [x[i, 0 ] for i in range ( len (y)) if y[i] = = 0 ] negetive_x2 = [x[i, 1 ] for i in range ( len (y)) if y[i] = = 0 ] #定义感知机 clf = Perceptron(fit_intercept = True ,n_iter = 50 ,shuffle = False ) # 使用训练数据进行训练 clf.fit(x_train,y_train) #得到训练结果,权重矩阵 weights = clf.coef_ #得到截距 bias = clf.intercept_ #到此时,我们已经得到了训练出的感知机模型参数,下面用测试数据对其进行验证 acc = clf.score(x_test,y_test) #Returns the mean accuracy on the given test data and labels. print '平均精确度为:%.2f' % (acc * 100.0 ) #最后,我们将结果用图像显示出来,直观的看一下感知机的结果 #画出正例和反例的散点图 plt.scatter(positive_x1,positive_x2,c = 'red' ) plt.scatter(negetive_x1,negetive_x2,c = 'blue' ) #画出超平面(在本例中即是一条直线) line_x = np.arange( - 4 , 4 ) line_y = line_x * ( - weights[ 0 ][ 0 ] / weights[ 0 ][ 1 ]) - bias plt.plot(line_x,line_y) plt.show() |
实验结果为:平均精确度为:96.00
通过算法三和算法四可以看出,直接调用开源包里面的算法还是比较简单的,思路是通用的。
算法四
我们利用sklearn包中的感知机算法进行分类算法的实现。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
# -*- encoding:utf-8 -*- import numpy as np ''' 以scikit-learn 中的perceptron为例介绍分类算法 应用及其学习分类算法的五个步骤 (1)选择特征 (2)选择一个性能指标 (3)选择一个分类器和一个优化算法 (4)评价模型的性能 (5)优化算法 以scikit-learn 中的perceptron为例介绍分类算法 1 读取数据-iris 2 分配训练集和测试集 3 标准化特征值 4 训练感知器模型 5 用训练好的模型进行预测 6 计算性能指标 7 描绘分类界面 ''' from sklearn import datasets import numpy as np import matplotlib.pyplot as plt iris = datasets.load_iris() X = iris.data[:,[ 2 , 3 ]] y = iris.target #训练数据和测试数据分为7:3 from sklearn.cross_validation import train_test_split x_train,x_test,y_train,y_test = train_test_split(X,y,test_size = 0.3 ,random_state = 0 ) #标准化数据 from sklearn.preprocessing import StandardScaler sc = StandardScaler() sc.fit(x_train) x_train_std = sc.transform(x_train) x_test_std = sc.transform(x_test) #引入skleran 的Perceptron并进行训练 from sklearn.linear_model import Perceptron ppn = Perceptron(n_iter = 40 ,eta0 = 0.01 ,random_state = 0 ) ppn.fit(x_train_std,y_train) y_pred = ppn.predict(x_test_std) print '错误分类数:%d' % (y_test! = y_pred). sum () from sklearn.metrics import accuracy_score print '准确率为:%.2f' % accuracy_score(y_test,y_pred) #绘制决策边界 from matplotlib.colors import ListedColormap import warnings def versiontuple(v): return tuple ( map ( int ,(v.split( '.' )))) def plot_decision_regions(X,y,classifier,test_idx = None ,resolution = 0.02 ): #设置标记点和颜色 markers = ( 's' , 'x' , 'o' , '^' , 'v' ) colors = ( 'red' , 'blue' , 'lightgreen' , 'gray' , 'cyan' ) cmap = ListedColormap(colors[: len (np.unique(y))]) # 绘制决策面 x1_min, x1_max = X[:, 0 ]. min () - 1 , X[:, 0 ]. max () + 1 x2_min, x2_max = X[:, 1 ]. min () - 1 , X[:, 1 ]. max () + 1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plt.contourf(xx1, xx2, Z, alpha = 0.4 , cmap = cmap) plt.xlim(xx1. min (), xx1. max ()) plt.ylim(xx2. min (), xx2. max ()) for idx, cl in enumerate (np.unique(y)): plt.scatter(x = X[y = = cl, 0 ], y = X[y = = cl, 1 ], alpha = 0.8 , c = cmap(idx), marker = markers[idx], label = cl) if test_idx: # 绘制所有数据点 if not versiontuple(np.__version__) > = versiontuple( '1.9.0' ): X_test, y_test = X[ list (test_idx), :], y[ list (test_idx)] warnings.warn( 'Please update to NumPy 1.9.0 or newer' ) else : X_test, y_test = X[test_idx, :], y[test_idx] plt.scatter(X_test[:, 0 ], X_test[:, 1 ], c = '', alpha = 1.0 , linewidth = 1 , marker = 'o' , s = 55 , label = 'test set' ) def plot_result(): X_combined_std = np.vstack((x_train_std, x_test_std)) y_combined = np.hstack((y_train, y_test)) plot_decision_regions(X = X_combined_std, y = y_combined, classifier = ppn, test_idx = range ( 105 , 150 )) plt.xlabel( 'petal length [standardized]' ) plt.ylabel( 'petal width [standardized]' ) plt.legend(loc = 'upper left' ) plt.tight_layout() plt.show() plot_result() |
实验结果为:错误分类数:4;准确率为:0.91
<完>
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:http://blog.csdn.net/u010626937/article/details/72896144#算法一