1. 原理
Cost function
Theta
2. Python
# -*- coding:utf8 -*-
import numpy as np
import matplotlib.pyplot as plt def cost_function(input_X, _y, theta):
"""
cost function of binary classification using logistic regression
:param input_X: np.matrix input X
:param _y: np.matrix y
:param theta: np.matrix theta
"""
m = input_X.shape[0]
z = input_X * theta
h = np.asmatrix(1 / np.asarray(1 + np.exp(-z)))
J = 1.0 / m * (np.log(h) * _y.T + np.log(1 - h) * (1 - _y).T)
return J def gradient_descent(input_X, _y, theta, learning_rate=0.1,
iterate_times=3000):
"""
gradient descent of logistic regression
:param input_X: np.matrix input X
:param _y: np.matrix y
:param theta: np.matrix theta
:param learning_rate: float learning rate
:param iterate_times: int max iteration times
:return: tuple
"""
m = input_X.shape[0]
Js = [] for i in range(iterate_times):
z = input_X * theta
h = np.asmatrix(1 / np.asarray(1 + np.exp(-z)))
errors = h - _y
delta = 1.0 / m * (errors.T * input_X).T
theta -= learning_rate * delta
Js.append(cost_function(input_X, _y, theta)) return theta, Js
3. C++
#include <iostream>
#include <vector>
#include <Eigen/Dense> using namespace std;
using namespace Eigen; double cost_function(MatrixXd &input_X, MatrixXd &_y, MatrixXd &theta):
double m = input_X.rows();
ArrayXd _z = - (input_X * theta).array();
ArrayXd h = 1.0 / (1.0 + _z.exp());
double J = h.log().matrix() * _y.transpose() + \
( - h).log().matrix() * ( - _y.array()).matrix().transpose();
return J class GradientDescent{
public:
GradientDescent(MatrixXd &x, MatrixXd &y, MatrixXd &t, double r,
int i): input_X(x), _y(y), theta(t), learning_rate(r),
iterate_times(i) {}
MatrixXd theta;
vector<double> Js;
void run();
private:
MatrixXd input_X;
MatrixXd _y;
double learning_rate;
int iterate_times;
} void GradientDescent::run() {
double rows = input_X.rows();
for(int i=; i<iterate_times; ++i) {
ArrayXd _z = - (input_X * theta).array();
ArrayXd h = 1.0 / (1.0 + _z.exp());
MatrixXd errors = h.matrix() - y;
MatrixXd delta = 1.0 / rows * (errors.transpose() * input_X).transpose();
theta -= learning_rate * delta;
double J = cost_function(input_X, _y, theta);
Js.push_back(J);
}
}