逻辑回归
适用类型:解决二分类问题
逻辑回归的出现:线性回归可以预测连续值,但是不能解决分类问题,我们需要根据预测的结果判定其属于正类还是负类。所以逻辑回归就是将线性回归的结果,通过Sigmoid函数映射到(0,1)之间
线性回归的决策函数:数据与θ的乘法,数据的矩阵格式(样本数×列数),θ的矩阵格式(列数×1)
将其通过Sigmoid函数,获得逻辑回归的决策函数
使用Sigmoid函数的原因:
可以对(-∞, +∞)的结果,映射到(0, 1)之间作为概率
可以将1/2作为决策边界
数学特性好,求导容易
逻辑回归的损失函数
线性回归的损失函数维平方损失函数,如果将其用于逻辑回归的损失函数,则其数学特性不好,有很多局部极小值,难以用梯度下降法求解最优
这里使用对数损失函数
解释:如果一个样本为正样本,那么我们希望将其预测为正样本的概率p越大越好,也就是决策函数的值越大越好,则logp越大越好,逻辑回归的决策函数值就是样本为正的概率;如果一个样本为负样本,那么我们希望将其预测为负样本的概率越大越好,也就是(1-p)越大越好,即log(1-p)越大越好
为什么使用对数函数:样本集中有很多样本,要求其概率连乘,概率为0-1之间的数,连乘越来越小,利用log变换将其变为连加,不会溢出,不会超出计算精度
损失函数:: y(1->m)表示Sigmoid值(样本数×1),hθx(1->m)表示决策函数值(样本数×1),所以中括号的值(1×1)
二分类逻辑回归直线编码实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
import numpy as np from matplotlib import pyplot as plt from scipy.optimize import minimize from sklearn.preprocessing import PolynomialFeatures class MyLogisticRegression: def __init__( self ): plt.rcParams[ "font.sans-serif" ] = [ "SimHei" ] # 包含数据和标签的数据集 self .data = np.loadtxt( "./data2.txt" , delimiter = "," ) self .data_mat = self .data[:, 0 : 2 ] self .label_mat = self .data[:, 2 ] self .thetas = np.zeros(( self .data_mat.shape[ 1 ])) # 生成多项式特征,最高6次项 self .poly = PolynomialFeatures( 6 ) self .p_data_mat = self .poly.fit_transform( self .data_mat) def cost_func_reg( self , theta, reg): """ 损失函数具体实现 :param theta: 逻辑回归系数 :param data_mat: 带有截距项的数据集 :param label_mat: 标签数据集 :param reg: :return: """ m = self .label_mat.size label_mat = self .label_mat.reshape( - 1 , 1 ) h = self .sigmoid( self .p_data_mat.dot(theta)) J = - 1 * ( 1 / m) * (np.log(h).T.dot(label_mat) + np.log( 1 - h).T.dot( 1 - label_mat))\ + (reg / ( 2 * m)) * np. sum (np.square(theta[ 1 :])) if np.isnan(J[ 0 ]): return np.inf return J[ 0 ] def gradient_reg( self , theta, reg): m = self .label_mat.size h = self .sigmoid( self .p_data_mat.dot(theta.reshape( - 1 , 1 ))) label_mat = self .label_mat.reshape( - 1 , 1 ) grad = ( 1 / m) * self .p_data_mat.T.dot(h - label_mat) + (reg / m) * np.r_[[[ 0 ]], theta[ 1 :].reshape( - 1 , 1 )] return grad def gradient_descent_reg( self , alpha = 0.01 , reg = 0 , iterations = 200 ): """ 逻辑回归梯度下降收敛函数 :param alpha: 学习率 :param reg: :param iterations: 最大迭代次数 :return: 逻辑回归系数组 """ m, n = self .p_data_mat.shape theta = np.zeros((n, 1 )) theta_set = [] for i in range (iterations): grad = self .gradient_reg(theta, reg) theta = theta - alpha * grad.reshape( - 1 , 1 ) theta_set.append(theta) return theta, theta_set def plot_data_reg( self , x_label = None , y_label = None , neg_text = "negative" , pos_text = "positive" , thetas = None ): neg = self .label_mat = = 0 pos = self .label_mat = = 1 fig1 = plt.figure(figsize = ( 12 , 8 )) ax1 = fig1.add_subplot( 111 ) ax1.scatter( self .p_data_mat[neg][:, 1 ], self .p_data_mat[neg][:, 2 ], marker = "o" , s = 100 , label = neg_text) ax1.scatter( self .p_data_mat[pos][:, 1 ], self .p_data_mat[pos][:, 2 ], marker = "+" , s = 100 , label = pos_text) ax1.set_xlabel(x_label, fontsize = 14 ) # 描绘逻辑回归直线(曲线) if isinstance (thetas, type (np.array([]))): x1_min, x1_max = self .p_data_mat[:, 1 ]. min (), self .p_data_mat[:, 1 ]. max () x2_min, x2_max = self .p_data_mat[:, 2 ]. min (), self .p_data_mat[:, 2 ]. max () xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max)) h = self .sigmoid( self .poly.fit_transform(np.c_[xx1.ravel(), xx2.ravel()]).dot(thetas)) h = h.reshape(xx1.shape) ax1.contour(xx1, xx2, h, [ 0.5 ], linewidths = 3 ) ax1.legend(fontsize = 14 ) plt.show() @staticmethod def sigmoid(z): return 1.0 / ( 1 + np.exp( - z)) if __name__ = = '__main__' : my_logistic_regression = MyLogisticRegression() # my_logistic_regression.plot_data(x_label="线性不可分数据集") thetas, theta_set = my_logistic_regression.gradient_descent_reg(alpha = 0.5 , reg = 0 , iterations = 500 ) my_logistic_regression.plot_data_reg(thetas = thetas, x_label = "$\\lambda$ = {}" . format ( 0 )) thetas = np.zeros((my_logistic_regression.p_data_mat.shape[ 1 ], 1 )) # 未知错误,有大佬解决可留言 result = minimize(my_logistic_regression.cost_func_reg, thetas, args = ( 0 , ), method = None , jac = my_logistic_regression.gradient_reg) my_logistic_regression.plot_data_reg(thetas = result.x, x_label = "$\\lambda$ = {}" . format ( 0 )) |
二分类问题逻辑回归曲线编码实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
import numpy as np from matplotlib import pyplot as plt from scipy.optimize import minimize from sklearn.preprocessing import PolynomialFeatures class MyLogisticRegression: def __init__( self ): plt.rcParams[ "font.sans-serif" ] = [ "SimHei" ] # 包含数据和标签的数据集 self .data = np.loadtxt( "./data2.txt" , delimiter = "," ) self .data_mat = self .data[:, 0 : 2 ] self .label_mat = self .data[:, 2 ] self .thetas = np.zeros(( self .data_mat.shape[ 1 ])) # 生成多项式特征,最高6次项 self .poly = PolynomialFeatures( 6 ) self .p_data_mat = self .poly.fit_transform( self .data_mat) def cost_func_reg( self , theta, reg): """ 损失函数具体实现 :param theta: 逻辑回归系数 :param data_mat: 带有截距项的数据集 :param label_mat: 标签数据集 :param reg: :return: """ m = self .label_mat.size label_mat = self .label_mat.reshape( - 1 , 1 ) h = self .sigmoid( self .p_data_mat.dot(theta)) J = - 1 * ( 1 / m) * (np.log(h).T.dot(label_mat) + np.log( 1 - h).T.dot( 1 - label_mat))\ + (reg / ( 2 * m)) * np. sum (np.square(theta[ 1 :])) if np.isnan(J[ 0 ]): return np.inf return J[ 0 ] def gradient_reg( self , theta, reg): m = self .label_mat.size h = self .sigmoid( self .p_data_mat.dot(theta.reshape( - 1 , 1 ))) label_mat = self .label_mat.reshape( - 1 , 1 ) grad = ( 1 / m) * self .p_data_mat.T.dot(h - label_mat) + (reg / m) * np.r_[[[ 0 ]], theta[ 1 :].reshape( - 1 , 1 )] return grad def gradient_descent_reg( self , alpha = 0.01 , reg = 0 , iterations = 200 ): """ 逻辑回归梯度下降收敛函数 :param alpha: 学习率 :param reg: :param iterations: 最大迭代次数 :return: 逻辑回归系数组 """ m, n = self .p_data_mat.shape theta = np.zeros((n, 1 )) theta_set = [] for i in range (iterations): grad = self .gradient_reg(theta, reg) theta = theta - alpha * grad.reshape( - 1 , 1 ) theta_set.append(theta) return theta, theta_set def plot_data_reg( self , x_label = None , y_label = None , neg_text = "negative" , pos_text = "positive" , thetas = None ): neg = self .label_mat = = 0 pos = self .label_mat = = 1 fig1 = plt.figure(figsize = ( 12 , 8 )) ax1 = fig1.add_subplot( 111 ) ax1.scatter( self .p_data_mat[neg][:, 1 ], self .p_data_mat[neg][:, 2 ], marker = "o" , s = 100 , label = neg_text) ax1.scatter( self .p_data_mat[pos][:, 1 ], self .p_data_mat[pos][:, 2 ], marker = "+" , s = 100 , label = pos_text) ax1.set_xlabel(x_label, fontsize = 14 ) # 描绘逻辑回归直线(曲线) if isinstance (thetas, type (np.array([]))): x1_min, x1_max = self .p_data_mat[:, 1 ]. min (), self .p_data_mat[:, 1 ]. max () x2_min, x2_max = self .p_data_mat[:, 2 ]. min (), self .p_data_mat[:, 2 ]. max () xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max)) h = self .sigmoid( self .poly.fit_transform(np.c_[xx1.ravel(), xx2.ravel()]).dot(thetas)) h = h.reshape(xx1.shape) ax1.contour(xx1, xx2, h, [ 0.5 ], linewidths = 3 ) ax1.legend(fontsize = 14 ) plt.show() @staticmethod def sigmoid(z): return 1.0 / ( 1 + np.exp( - z)) if __name__ = = '__main__' : my_logistic_regression = MyLogisticRegression() # my_logistic_regression.plot_data(x_label="线性不可分数据集") thetas, theta_set = my_logistic_regression.gradient_descent_reg(alpha = 0.5 , reg = 0 , iterations = 500 ) my_logistic_regression.plot_data_reg(thetas = thetas, x_label = "$\\lambda$ = {}" . format ( 0 )) thetas = np.zeros((my_logistic_regression.p_data_mat.shape[ 1 ], 1 )) # 未知错误,有大佬解决可留言 result = minimize(my_logistic_regression.cost_func_reg, thetas, args = ( 0 , ), method = None , jac = my_logistic_regression.gradient_reg) my_logistic_regression.plot_data_reg(thetas = result.x, x_label = "$\\lambda$ = {}" . format ( 0 )) |
以上就是python 实现逻辑回归的详细内容,更多关于python 实现逻辑回归的资料请关注服务器之家其它相关文章!
原文链接:https://www.cnblogs.com/aitiknowledge/p/12668794.html