from numpy import *
import random
import time
st = time.time() def loaddata(filename):
fr = open(''.join([filename, '.txt'])).readlines()
trainx = [[1] + map(float, line.split()[:-1]) for line in fr] # trainx = [[1,12.2,22.4],[1,22.3,31.2],...]
trainy = [[float(line.split()[-1])] for line in fr] # trainy = [0,1,1,0,...]
return trainx, trainy def sigmod(z):
return 1.0 / (1 + exp(-z)) def optimizaion(trainx, trainy):
trainxmat = mat(trainx)
m = len(trainx)
# beta = [0,0,0]
beta = ones((len(trainx[0]),1)) # array
# maxiter
M = 500
"""
# error permid
e =
"""
"""
for i in xrange(M):
#if error2sum > e:
# z = betat.T * x = trainx (matricdoc)* beta = [beta.Tx1,beta.Tx2,...,beta.Txn]
sigmodz = sigmod(trainxmat * beta)
# [error_i = yi - sigmod(zi)]
error = trainy - sigmodz
# update beta
beta += alpha * trainxmat.T * error
print beta
"""
# random gradascent
for j in xrange(M):
for i in xrange(m):
# per span
alpha = 0.01 + 4 / (1.0 + i +j)
randid = random.randint(0, m - 1)
sigmodz = sigmod(trainxmat[randid] * beta)
error = trainy[randid] - sigmodz
beta += alpha * trainxmat[randid].T * error
#print beta return beta def logregress(testx, beta):
if mat(testx) * beta > 0: return [1.0]
else: return [0.0] def main():
# step 1: loading data...
print "step 1: loading data..."
trainx, trainy = loaddata('horseColicTraining')
testx, testy = loaddata('horseColicTest')
"""
print 'trainx', trainx
print 'trainy', trainy
print 'testx', testx
print 'testy', testy
print 'testy[2]',testy[2]
""" # step 2: training...
print "step 2: training..."
beta = optimizaion(trainx, trainy)
#print "beta = ",beta # step 3: testing...
print "step 3: testing..."
numTests = 10; errorSum = 0.0; l = len(testx)
for j in xrange(numTests):
errorcount = 0.0
#print 'the total number is: ',l
for i in xrange(l):
if logregress(testx[i], beta) != testy[i]:
errorcount += 1
#print "the number of error is: ", errorcount
print "the error rate is: ", (errorcount / l)
errorSum += (errorcount / l)
print "after %d iterations the average error rate is: %f" %(numTests, errorSum/numTests) """
trainx, trainy = loaddata('testSet')
print trainy
optimizaion(trainx, trainy)
""" main() print "cost time: ", (time.time() - st) """ lineregres
# ssi = sigmod(zi) - sigmod(zi) ** 2
ss = [sigmodzi - sigmodzi ** 2 for sigmodzi in sigmodz]
# errssi = errori * ssi
errss = map(lambda x, y: x * y, error, ss)
# treri = errssi * trainxi(vector)
trer = [errss[i] * array(trainx[i]) for i in xrange(m)]
"""
04-25 03:26