线性回归和逻辑回归的实现大体一致,将其抽象出一个抽象类Regression,包含整体流程,其中有三个抽象函数,将在线性回归和逻辑回归中重写。
将样本设为Sample类,其中采用数组作为特征的存储形式。
1. 样本类Sample
public class Sample { double[] features;
int feaNum; // the number of sample's features
double value; // value of sample in regression
int label; // class of sample public Sample(int number) {
feaNum = number;
features = new double[feaNum];
} public void outSample() {
System.out.println("The sample's features are:");
for(int i = 0; i < feaNum; i++) {
System.out.print(features[i] + " ");
}
System.out.println();
System.out.println("The label is: " + label);
System.out.println("The value is: " + value);
}
}
2. 抽象类Regression
public abstract class Regression { double[] theta; //parameters
int paraNum; //the number of parameters
double rate; //learning rate
Sample[] sam; // samples
int samNum; // the number of samples
double th; // threshold value /**
* initialize the samples
* @param s : training set
* @param num : the number of training samples
*/
public void Initialize(Sample[] s, int num) {
samNum = num;
sam = new Sample[samNum];
for(int i = 0; i < samNum; i++) {
sam[i] = s[i];
}
} /**
* initialize all parameters
* @param para : theta
* @param learning_rate
* @param threshold
*/
public void setPara(double[] para, double learning_rate, double threshold) {
paraNum = para.length;
theta = para;
rate = learning_rate;
th = threshold;
} /**
* predicte the value of sample s
* @param s : prediction sample
* @return : predicted value
*/
public abstract double PreVal(Sample s); /**
* calculate the cost of all samples
* @return : the cost
*/
public abstract double CostFun(); /**
* update the theta
*/
public abstract void Update(); public void OutputTheta() {
System.out.println("The parameters are:");
for(int i = 0; i < paraNum; i++) {
System.out.print(theta[i] + " ");
}
System.out.println(CostFun());
}
}
3. 线性回归LinearRegression
public class LinearRegression extends Regression{ public double PreVal(Sample s) {
double val = 0;
for(int i = 0; i < paraNum; i++) {
val += theta[i] * s.features[i];
}
return val;
} public double CostFun() {
double sum = 0;
for(int i = 0; i < samNum; i++) {
double d = PreVal(sam[i]) - sam[i].value;
sum += Math.pow(d, 2);
}
return sum / (2*samNum);
} public void Update() {
double former = 0; // the cost before update
double latter = CostFun(); // the cost after updatedouble[] p = new double[paraNum];
do {
former = latter;
//update theta
for(int i = 0; i < paraNum; i++) {
// for theta[i]
double d = 0;
for(int j = 0; j < samNum; j++) {
d += (PreVal(sam[j]) - sam[j].value) * sam[j].features[i];
}
p[i] -= (rate * d) / samNum;
}
theta = p;
latter = CostFun();
if(former - latter < 0){
System.out.println("α is larger!!!");
break;
}
}while(former - latter > th);
} }
4. 逻辑回归LogisticRegression
public class LogisticRegression extends Regression{ public double PreVal(Sample s) {
double val = 0;
for(int i = 0; i < paraNum; i++) {
val += theta[i] * s.features[i];
}
return 1/(1 + Math.pow(Math.E, -val));
} public double CostFun() {
double sum = 0;
for(int i = 0; i < samNum; i++) {
double p = PreVal(sam[i]);
double d = Math.log(p) * sam[i].label + (1 - sam[i].label) * Math.log(1 - p);
sum += d;
}
return -1 * (sum / samNum);
} public void Update() {
double former = 0; // the cost before update
double latter = CostFun(); // the cost after update
double d = 0;
double[] p = new double[paraNum];
do {
former = latter;
//update theta
for(int i = 0; i < paraNum; i++) {
// for theta[i]
double d = 0;
for(int j = 0; j < samNum; j++) {
d += (PreVal(sam[j]) - sam[j].value) * sam[j].features[i];
}
p[i] -= (rate * d) / samNum;
}
latter = CostFun();
if(former - latter < 0){
System.out.println("α is larger!!!");
break;
}
}while(former - latter > th);
theta = p;
}
}
5. 使用的线性回归样本
x0 x1 x2 x3 x4 y
1 2104 5 1 45 460
1 1416 3 2 40 232
1 1534 3 2 30 315
1 852 2 1 36 178
1 1254 3 3 45 321
1 987 2 2 35 241
1 1054 3 2 30 287
1 645 2 3 25 87
1 542 2 1 30 94
1 1065 3 1 25 241
1 2465 7 2 50 687
1 2410 6 1 45 654
1 1987 4 2 45 436
1 457 2 3 35 65
1 587 2 2 25 54
1 468 2 1 40 87
1 1354 3 1 35 215
1 1587 4 1 45 345
1 1789 4 2 35 325
1 2500 8 2 40 720
6. 线性回归测试
import java.io.IOException;
import java.io.RandomAccessFile; public class Test { public static void main(String[] args) throws IOException {
//read Sample.txt
Sample[] sam = new Sample[25];
int w = 0; long filePoint = 0;
String s;
RandomAccessFile file = new RandomAccessFile("resource//LinearSample.txt", "r");
long fileLength = file.length(); while(filePoint < fileLength) {
s = file.readLine();
//s --> sample
String[] sub = s.split(" ");
sam[w] = new Sample(sub.length - 1);
for(int i = 0; i < sub.length; i++) {
if(i == sub.length - 1) {
sam[w].value = Double.parseDouble(sub[i]);
}
else {
sam[w].features[i] = Double.parseDouble(sub[i]);
}
}//for
w++;
filePoint = file.getFilePointer();
}//while read file LinearRegression lr = new LinearRegression();
double[] para = {0,0,0,0,0};
double rate = 0.5;
double th = 0.001;
lr.Initialize(sam, w);
lr.setPara(para, rate, th);
lr.Update();
lr.OutputTheta();
} }
7. 使用的逻辑回归样本
x0 x1 x2 class
1 0.23 0.35 0
1 0.32 0.24 0
1 0.6 0.12 0
1 0.36 0.54 0
1 0.02 0.89 0
1 0.36 -0.12 0
1 -0.45 0.62 0
1 0.56 0.42 0
1 0.4 0.56 0
1 0.46 0.51 0
1 1.2 0.32 1
1 0.6 0.9 1
1 0.32 0.98 1
1 0.2 1.3 1
1 0.15 1.36 1
1 0.54 0.98 1
1 1.36 1.05 1
1 0.22 1.65 1
1 1.65 1.54 1
1 0.25 1.68 1
8. 逻辑回归测试
import java.io.IOException;
import java.io.RandomAccessFile; public class Test { public static void main(String[] args) throws IOException {
//read Sample.txt
Sample[] sam = new Sample[25];
int w = 0; long filePoint = 0;
String s;
RandomAccessFile file = new RandomAccessFile("resource//LogisticSample.txt", "r");
long fileLength = file.length(); while(filePoint < fileLength) {
s = file.readLine();
//s --> sample
String[] sub = s.split(" ");
sam[w] = new Sample(sub.length - 1);
for(int i = 0; i < sub.length; i++) {
if(i == sub.length - 1) {
sam[w].label = Integer.parseInt(sub[i]);
}
else {
sam[w].features[i] = Double.parseDouble(sub[i]);
}
}//for
//sam[w].outSample();
w++;
filePoint = file.getFilePointer();
}//while read file LogisticRegression lr = new LogisticRegression();
double[] para = {0,0,0};
double rate = 0.5;
double th = 0.001;
lr.Initialize(sam, w);
lr.setPara(para, rate, th);
lr.Update();
lr.OutputTheta();
} }