为什么我的结果仍然无法重现?

本文介绍了为什么我的结果仍然无法重现?的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我想获得CNN的可重复结果.我将Keras和Google Colab与GPU配合使用.

I want to get reproducible results for a CNN. I use Keras and Google Colab with GPU.

除了建议插入某些代码段(应具有可重复性)的建议之外，我还向这些层添加了种子.

In addition to recommendations to insert certain code snippets, which should allow a reproducibility, I also added seeds to the layers.

###### This is the first code snipped to run #####

!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

###### This is the second code snipped to run #####

from __future__ import print_function
import numpy as np

import tensorflow as tf
print(tf.test.gpu_device_name())

import random as rn
import os
os.environ['PYTHONASHSEED'] = '0'
np.random.seed(1)
rn.seed(1)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)


###### This is the third code snipped to run #####

from keras import backend as K

tf.set_random_seed(1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

###### This is the fourth code snipped to run #####

def model_cnn():
  model = Sequential()
  model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=1), input_shape=(28,28,1)))
  model.add(BatchNormalization())
  model.add(Activation('relu'))

  model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=2)))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(Dropout(0.25, seed=1))

  model.add(Flatten())

  model.add(Dense(512, kernel_initializer=initializers.glorot_uniform(seed=2)))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0.5, seed=1))
  model.add(Dense(10, kernel_initializer=initializers.glorot_uniform(seed=2)))
  model.add(Activation('softmax'))

  model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=['accuracy'])
  return model


def split_data(X,y):
  X_train_val, X_val, y_train_val, y_val = train_test_split(X, y, random_state=42, test_size=1/5, stratify=y)
  return(X_train_val, X_val, y_train_val, y_val)


def train_model_with_EarlyStopping(model, X, y):
  # make train and validation data
  X_tr, X_val, y_tr, y_val = split_data(X,y)

  es = EarlyStopping(monitor='val_loss', patience=20, mode='min', restore_best_weights=True)

  history = model.fit(X_tr, y_tr,
                      batch_size=64,
                      epochs=200,
                      verbose=1,
                      validation_data=(X_val,y_val),
                      callbacks=[es])

  return history

###### This is the fifth code snipped to run #####

train_model_with_EarlyStopping(model_cnn(), X, y)

总是运行上面的代码，我得到不同的结果.原因是在代码中，还是在支持GPU的Google Colab中根本无法获得可重复的结果?

Always I run the above code I get different results.Does the reason lies in the code, or it is simply not possible to obtain reproducible results in Google Colab with GPU support?

完整的代码(代码中有不必要的部分，例如未使用的库):

The complete code (there are unneccessary parts in the code, such as libraries which are not used):

!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
from __future__ import print_function # NEU
import numpy as np

import tensorflow as tf
import random as rn
import os
os.environ['PYTHONASHSEED'] = '0'
np.random.seed(1)
rn.seed(1)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
from keras import backend as K

tf.set_random_seed(1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

import os
local_root_path = os.path.expanduser("~/data/data")
print(local_root_path)
try:
  os.makedirs(local_root_path, exist_ok=True)
except: pass

def ListFolder(google_drive_id, destination):
  file_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % google_drive_id}).GetList()
  counter = 0
  for f in file_list:
    # If it is a directory then, create the dicrectory and upload the file inside it
    if f['mimeType']=='application/vnd.google-apps.folder':
      folder_path = os.path.join(destination, f['title'])
      os.makedirs(folder_path, exist_ok=True)
      print('creating directory {}'.format(folder_path))
      ListFolder(f['id'], folder_path)
    else:
      fname = os.path.join(destination, f['title'])
      f_ = drive.CreateFile({'id': f['id']})
      f_.GetContentFile(fname)
      counter += 1
  print('{} files were uploaded in {}'.format(counter, destination))
ListFolder("1DyM_D2ZJ5UHIXmXq4uHzKqXSkLTH-lSo", local_root_path)

import glob
import h5py
from time import time
from keras import initializers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, merge
from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.optimizers import SGD, Adam, RMSprop, Adagrad, Adadelta, Adamax, Nadam
from keras.utils import np_utils
from keras.callbacks import LearningRateScheduler, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras.regularizers import l2
from keras.layers.advanced_activations import LeakyReLU, ELU
from keras import backend as K
import numpy as np
import pickle as pkl
from matplotlib import pyplot as plt
%matplotlib inline
import gzip
import numpy as np
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
from keras.datasets import fashion_mnist
from numpy import mean, std
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, StratifiedKFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from keras.optimizers import SGD, Adam
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import auc, average_precision_score, f1_score

import time
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from google.colab import files
from PIL import Image



def model_cnn():
  model = Sequential()
  model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=1), input_shape=(28,28,1)))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer=initializers.glorot_uniform(seed=2)))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(Dropout(0.25, seed=1))
  model.add(Flatten())
  model.add(Dense(512, kernel_initializer=initializers.glorot_uniform(seed=2)))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0.5, seed=1))
  model.add(Dense(10, kernel_initializer=initializers.glorot_uniform(seed=2)))
  model.add(Activation('softmax'))
  model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=['accuracy'])
  return model

def train_model_with_EarlyStopping(model, X, y):
  X_tr, X_val, y_tr, y_val = split_train_val_data(X,y)
  es = EarlyStopping(monitor='val_loss', patience=20, mode='min', restore_best_weights=True)
  history = model.fit(X_tr, y_tr,
                      batch_size=64,
                      epochs=200,
                      verbose=1,
                      validation_data=(X_val,y_val),
                      callbacks=[es])
  evaluate_model(model, history, X_tr, y_tr)
  return history


```

推荐答案

问题不仅限于Colab，而且可以在本地重现.但是，这种行为可能是不可避免的.

The problem isn't limited to Colab, and is reproducible locally. The behavior, however, may be inevitable.

底部的代码是代码的最低可复制版本，并调整了适合参数以加快测试速度.我观察到的是，在5次运行中，每次运行468次迭代的最大损失差异仅为 0.0144％.很好使用batch_size=64，60000样本和20历元，您将有18750次迭代-这将大大放大该数字.

Code at bottom is a minimally-reproducible version of your code, with fit parameters tweaked for faster testing. What I observed is, the maximum difference for loss is only 0.0144% for 468 iterations per run, across 5 runs. This is pretty good. With batch_size=64, 60000 samples, and 20 epochs, you'll have 18750 iterations - which will amplify this figure substantially.

无论如何，GPU并行性是驱动随机数的最可能元凶-随着时间的推移，细微差异 do 会累积，从而产生较大差异-以下示例.如果1e-8看起来很小，请尝试将随机噪声添加到一半的重量上，并限制在1e-8上，并观察其生活理念的变化.

Regardless, GPU parallelism is the most likely culprit driving the randomnes - and the small differences do accumulate over time to yield a substantial difference - demo below. If 1e-8 seems small, try adding random noise to half your weights w/ magnitude clipped at 1e-8, and witness its life philosophy change.

如果您不使用种子，种子的作用将显着突出-尝试一下，您的所有指标将在前10次迭代中迅速蔓延.另外，损失更适合测量运行时差异，因为精度对数值精度误差更加敏感:10个样本批次的60％精度和70％精度之间的差异是一个预测，其差异为0.000001 wrt 0.5-但损失几乎不会动摇.

The role of the seeds becomes dramatically pronounced if you don't use them - try it, all your metrics will fly rampant within the first 10 iterations. Also, loss is better for measuring runtime differences, as accuracy's lot more sensitive to numeric precision errors: the difference between 60% accuracy and 70% accuracy on a 10-sample batch is a prediction that differs by 0.000001 w.r.t. 0.5 - but loss will barely budge.

最后，请注意，您的超参数选择对模型性能的影响远大于随机性.无论您扔了多少种子，它们都不会将模型变成SOTA. -我推荐这个精细剪辑.

Lastly, note that your hyperparameter choice will have a far greater impact upon model performance than randomness; no matter how many seeds you throw, they won't magic a model into SOTA. -- I recommend this fine clip.

您的代码-很好.您已采取所有实际步骤来确保可重复性，但有一个例外:必须在 Python内核启动之前将PYTHONHASHSEED设置为.

Your code - is fine. You've taken all practical steps to ensure reproducibility, with an exception: PYTHONHASHSEED must be set before your Python kernel starts.

如何减少随机性?

重复运行，平均结果.可以理解这是很昂贵的，但是请注意，即使是完全可重复的运行也不是完全 informative ，因为 modelvariation w.r.t.培训验证集可能比噪声引起的随机性要大得多

Repeat runs, average results. Understandably that's expensive, but note that even a perfectly reproducible run isn't perfectly informative, as model variance w.r.t. train & validation sets is likely to be much greater than noise-induced randomness

K折叠交叉验证:既可以减轻数据负担，又可以减轻噪声方差明显

K-Fold Cross-Validation: can mitigate both data & noise variance significantly

更大的验证集:由于噪声，提取的特征可能相差太大；验证集越大，权重的扰动就应该越小地反映在指标中

Larger validation set: extracted features can differ only so much due to noise; the larger the validation set, the less small perturbations in weights should reflect in metrics

GPU并行性:放大浮动错误

print(2. * 11. / 9.)  # 2.4444444444444446
print(2. / 9. * 11.)  # 2.444444444444444

操作顺序很重要，并且通过利用多线程，GPU并行性无法保证任何以相同顺序执行的操作.乍一看，差异可能看起来是无害的-但要给它足够的迭代...

Order of operations matters, and by exploiting multithreading, GPU parallelism gives no guarantee whatsoever of operations being executed in the same order. On a first look, the difference may look innocent - but give it enough iterations ...

one = 1
for _ in range(int(1e8)):
    one *= (2. / 9. * 11.) / (2. * 11. / 9.)
print(one)     # 0.9999999777955395
print(1 - one) # 1.8167285897874308e-08

...和"1"是1e-08的典型小权重值，而不是它的原始自身.如果1亿次迭代似乎很繁琐，请考虑一下该操作在大约半分钟内完成，而您的模型可以训练一个多小时，而前一个模型则完全在CPU上运行.

... and a "one" is a typical small weight value of 1e-08 away from being its original self. If 100 million iterations seems to be a stretch, consider that the operation completed in ~half a minute, whereas your model can train over an hour, and former runs entirely on CPU.

可重复的实验最少:

import tensorflow as tf
import random as rn
import numpy as np
np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import MaxPooling2D, Conv2D
from keras.optimizers import Adam

def model_cnn():
  model = Sequential()
  model.add(Conv2D(32, kernel_size=(3,3),
                   kernel_initializer='he_uniform', input_shape=(28,28,1)))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Conv2D(32, kernel_size=(3,3), kernel_initializer='he_uniform'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(Dropout(0.25))
  model.add(Flatten())
  model.add(Dense(512, kernel_initializer='he_uniform'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
  model.add(Dense(10, kernel_initializer='he_uniform'))
  model.add(Activation('softmax'))
  model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001),
                metrics=['accuracy'])
  return model

np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)

X_train = np.random.randn(30000, 28, 28, 1)
y_train = np.random.randint(0, 2, (30000, 10))
X_val   = np.random.randn(30000, 28, 28, 1)
y_val   = np.random.randint(0, 2, (30000, 10))
model = model_cnn()

np.random.seed(1)
rn.seed(2)
tf.set_random_seed(3)

history = model.fit(X_train, y_train, batch_size=64,shuffle=True,
                    epochs=1, verbose=1, validation_data=(X_val,y_val))

运行差异:

loss: 12.5044 - acc: 0.0971 - val_loss: 11.5389 - val_acc: 0.1051
loss: 12.5047 - acc: 0.0958 - val_loss: 11.5369 - val_acc: 0.1018
loss: 12.5055 - acc: 0.0955 - val_loss: 11.5382 - val_acc: 0.0980
loss: 12.5042 - acc: 0.0961 - val_loss: 11.5382 - val_acc: 0.1179
loss: 12.5062 - acc: 0.0960 - val_loss: 11.5366 - val_acc: 0.1082

这篇关于为什么我的结果仍然无法重现?的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持！