我从本文中得到实现我的深度特征选择版本的想法,http://link.springer.com/chapter/10.1007%2F978-3-319-16706-0_20
根据本文的深度特征选择的基本思想是,在任何完全连接的隐藏层之前添加一个一对一的映射层,然后添加一个正则化项(套索或弹性网)以在输入层权重中产生零。
我的问题是,尽管似乎我已经很好地实现了深度特征选择框架,但对numpy.rand.random(1000,50)生成的随机数据进行测试却无法给我初始权重任何零。套索是否像正则化一样常见?我是否要调整用于该框架的参数(甚至更大的时期)?还是我的代码做错了什么。
class DeepFeatureSelectionMLP:
def __init__(self, X, Y, hidden_dims=[100], epochs=1000,
lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0, learning_rate=0.1):
# Initiate the input layer
# Get the dimension of the input X
n_sample, n_feat = X.shape
n_classes = len(np.unique(Y))
# One hot Y
one_hot_Y = np.zeros((len(Y), n_classes))
for i,j in enumerate(Y):
one_hot_Y[i][j] = 1
self.epochs = epochs
Y = one_hot_Y
# Store up original value
self.X = X
self.Y = Y
# Two variables with undetermined length is created
self.var_X = tf.placeholder(dtype=tf.float32, shape=[None, n_feat], name='x')
self.var_Y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes], name='y')
self.input_layer = One2OneInputLayer(self.var_X)
self.hidden_layers = []
layer_input = self.input_layer.output
# Create hidden layers
for dim in hidden_dims:
self.hidden_layers.append(DenseLayer(layer_input, dim))
layer_input = self.hidden_layers[-1].output
# Final classification layer, variable Y is passed
self.softmax_layer = SoftmaxLayer(self.hidden_layers[-1].output, n_classes, self.var_Y)
n_hidden = len(hidden_dims)
# regularization terms on coefficients of input layer
self.L1_input = tf.reduce_sum(tf.abs(self.input_layer.w))
self.L2_input = tf.nn.l2_loss(self.input_layer.w)
# regularization terms on weights of hidden layers
L1s = []
L2_sqrs = []
for i in xrange(n_hidden):
L1s.append(tf.reduce_sum(tf.abs(self.hidden_layers[i].w)))
L2_sqrs.append(tf.nn.l2_loss(self.hidden_layers[i].w))
L1s.append(tf.reduce_sum(tf.abs(self.softmax_layer.w)))
L2_sqrs.append(tf.nn.l2_loss(self.softmax_layer.w))
self.L1 = tf.add_n(L1s)
self.L2_sqr = tf.add_n(L2_sqrs)
# Cost with two regularization terms
self.cost = self.softmax_layer.cost \
+ lambda1*(1.0-lambda2)*0.5*self.L2_input + lambda1*lambda2*self.L1_input \
+ alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2*self.L1
self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)
self.y = self.softmax_layer.y
def train(self, batch_size=100):
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for i in xrange(self.epochs):
x_batch, y_batch = get_batch(self.X, self.Y, batch_size)
sess.run(self.optimizer, feed_dict={self.var_X: x_batch, self.var_Y: y_batch})
if (i + 1) % 50 == 0:
l = sess.run(self.cost, feed_dict={self.var_X: x_batch, self.var_Y: y_batch})
print('epoch {0}: global loss = {1}'.format(i, l))
self.selected_w = sess.run(self.input_layer.w)
print(self.selected_w)
class One2OneInputLayer(object):
# One to One Mapping!
def __init__(self, input):
"""
The second dimension of the input,
for each input, each row is a sample
and each column is a feature, since
this is one to one mapping, n_in equals
the number of features
"""
n_in = input.get_shape()[1].value
self.input = input
# Initiate the weight for the input layer
w = tf.Variable(tf.zeros([n_in,]), name='w')
self.w = w
self.output = self.w * self.input
self.params = [w]
class DenseLayer(object):
# Canonical dense layer
def __init__(self, input, n_out, activation='sigmoid'):
"""
The second dimension of the input,
for each input, each row is a sample
and each column is a feature, since
this is one to one mapping, n_in equals
the number of features
n_out defines how many nodes are there in the
hidden layer
"""
n_in = input.get_shape()[1].value
self.input = input
# Initiate the weight for the input layer
w = tf.Variable(tf.ones([n_in, n_out]), name='w')
b = tf.Variable(tf.ones([n_out]), name='b')
output = tf.add(tf.matmul(input, w), b)
output = activate(output, activation)
self.w = w
self.b = b
self.output = output
self.params = [w]
class SoftmaxLayer(object):
def __init__(self, input, n_out, y):
"""
The second dimension of the input,
for each input, each row is a sample
and each column is a feature, since
this is one to one mapping, n_in equals
the number of features
n_out defines how many nodes are there in the
hidden layer
"""
n_in = input.get_shape()[1].value
self.input = input
# Initiate the weight and biases for this layer
w = tf.Variable(tf.random_normal([n_in, n_out]), name='w')
b = tf.Variable(tf.random_normal([n_out]), name='b')
pred = tf.add(tf.matmul(input, w), b)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
self.y = y
self.w = w
self.b = b
self.cost = cost
self.params= [w]
最佳答案
使用l1正则化时,诸如Adam之类的梯度下降算法不会给出精确的零。相反,类似ftrl或proximal adagrad的东西可以给您精确的零。
关于python - 为什么这里的套索没有为我提供零系数?,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/37951090/