The first step is to start MongoDB. For that, open a new terminal and type:
sudo mongod
Next, we have to write a config.json file, which contains the configuration of the BO experiment. In the folder,
Practicasl/tune_neural_network_under_constraints/bo,
you should be able to find the corresponding config.json file. This file is similar for the unconstrained case. However, it specifies the objective and the constraints to use:
{
"language" : "PYTHON",
"main_file" : "wrapper",
"experiment-name" : "Tune-NN-Hypers-Constrained-Time",
"acquisition" : "PES",
"random_seed" : 1,
"likelihood" : "GAUSSIAN",
"max_finished_jobs" : 50,
"variables" : {
"log_learning_rate" : {
"type" : "FLOAT",
"size" : 1,
"min" : -10,
"max" : -1
},
"log_weight_decay" : {
"type" : "FLOAT",
"size" : 1,
"min" : -10,
"max" : 10
},
"n_hidden_units" : {
"type" : "INT",
"size" : 1,
"min" : 10,
"max" : 50
}
},
"tasks": {
"error" : {
"type" : "OBJECTIVE"
},
"time" : {
"type" : "CONSTRAINT"
}
}
}
This file contains all the information needed by Spearmint to run the BO experiment. It specifies the variables the objective has and its ranges and types, the acquistiion function to use, etc. Below you have the code of the wrapper, which evaluates the objective. It does not do much, since all the code is found in nn.py
from nn import *
if __name__ == '__main__':
main(0, {u'log_learning_rate': np.array([ np.log(1e-3) ]), u'log_weight_decay': np.array([ np.log(1.0) ]),
u'n_hidden_units': np.array([ 20 ]) })
The content of the nn.py file is given below. Spearmint will simply call the main method, which now return the objective and the constraint value. The constraint is simply that the prediction time of the network is not above 10% of the fastest network (smallest one).
from __future__ import absolute_import
from __future__ import print_function
import autograd.numpy as np
import autograd.numpy.random as npr
import autograd.scipy.stats.norm as norm
from autograd import grad
import sys
def make_nn_funs(layer_sizes, weight_decay):
"""These functions implement a standard multi-layer perceptron."""
shapes = zip(layer_sizes[:-1], layer_sizes[1:])
num_weights = sum((m+1)*n for m, n in shapes)
def unpack_layers(weights):
for m, n in shapes:
cur_layer_weights = weights[:m*n] .reshape((m, n))
cur_layer_biases = weights[m*n:m*n+n].reshape((1, n))
yield cur_layer_weights, cur_layer_biases
weights = weights[(m+1)*n:]
def predictions(weights, inputs):
for W, b in unpack_layers(weights):
outputs = np.dot(inputs, W) + b
inputs = np.maximum(outputs, 0.0)
return outputs
def loss(weights, inputs, targets, N):
preds = predictions(weights, inputs)
return np.sum(weights**2) * weight_decay + 1.0 * N / inputs.shape[ 0 ] * np.sum((preds - targets)**2)
return num_weights, predictions, loss
def load_data():
# We load the data
data = np.loadtxt('../../data/data.txt')
index_train = np.loadtxt('../../data/index_train_0.txt', dtype = int)
index_validation = np.loadtxt('../../data/index_validation_0.txt', dtype = int)
index_test = np.loadtxt('../../data/index_test_0.txt', dtype = int)
index_features = np.loadtxt('../../data/index_features.txt', dtype = int)
index_target = np.array([ np.loadtxt('../../data/index_target.txt', dtype = int) ])
X_train = data[ index_train, : ][ :, index_features ]
X_validation = data[ index_validation, : ][ :, index_features ]
X_test = data[ index_test, : ][ :, index_features ]
y_train = np.array(data[ index_train, index_target ], ndmin = 2).T
y_validation = np.array(data[ index_validation, index_target ], ndmin = 2).T
y_test = np.array(data[ index_test, index_target ], ndmin = 2).T
# We normalize the input features
mean_X_train = np.mean(X_train, axis = 0)
std_X_train = np.std(X_train, axis = 0)
std_X_train[ std_X_train == 0 ] = 1
X_train = (X_train - mean_X_train) / std_X_train
X_validation = (X_validation - mean_X_train) / std_X_train
X_test = (X_test - mean_X_train) / std_X_train
# We normalize the output variables
mean_y_train = np.mean(y_train)
std_y_train = np.std(y_train)
y_train = (y_train - mean_y_train) / std_y_train
y_validation = (y_validation - mean_y_train) / std_y_train
y_test = (y_test - mean_y_train) / std_y_train
return X_train, X_validation, X_test, y_train, y_validation, y_test, mean_y_train, std_y_train
def main(job_id, params, test = False):
#We load the data
X_train, X_validation, X_test, y_train, y_validation, y_test, mean_y_train, std_y_train = load_data()
if test:
X_train = np.concatenate((X_train, X_validation), 0)
y_train = np.concatenate((y_train, y_validation), 0)
# Implement a 3-hidden layer neural network.
learning_rate = np.exp(params[ 'log_learning_rate' ][ 0 ])
weight_decay = np.exp(params[ 'log_weight_decay' ][ 0 ])
n_hidden_units = params[ 'n_hidden_units' ][ 0 ]
num_weights, predictions, loss = \
make_nn_funs([ X_train.shape[ 1 ], n_hidden_units, n_hidden_units, n_hidden_units, 1], weight_decay)
gradient_objective = grad(loss)
np.random.seed(1)
rs = npr.RandomState(0)
weights = 0.1 * rs.randn(num_weights)
def print_perf(epoch, w):
print("{0:3}|{1:15}".format(epoch, -loss(w, X_train, y_train, len(y_train))))
sys.stdout.flush()
# Train with sgd and adam
def make_batches(N_data, batch_size):
return [ slice(i, min(i + batch_size, N_data)) for i in range(0, N_data, batch_size) ]
batch_size = 100
batch_idxs = make_batches(X_train.shape[0], batch_size)
m1 = 0
m2 = 0
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-8
alpha = learning_rate
t = 0
epochs = 50
N = len(X_train)
for epoch in range(epochs):
permutation = np.random.choice(range(N), N, replace = False)
print_perf(epoch, weights)
for idxs in batch_idxs:
t += 1
grad_w = gradient_objective(weights, X_train[ permutation[ idxs ] ], y_train[ permutation[ idxs ] ], N)
m1 = beta1 * m1 + (1 - beta1) * grad_w
m2 = beta2 * m2 + (1 - beta2) * grad_w**2
m1_hat = m1 / (1 - beta1**t)
m2_hat = m2 / (1 - beta2**t)
weights -= alpha * m1_hat / (np.sqrt(m2_hat) + epsilon)
# We evaluate the validation error or test error
if test:
preds = predictions(weights, X_test)
error = np.sqrt(np.mean((preds - y_test)**2)) * std_y_train
else:
preds = predictions(weights, X_validation)
error = np.sqrt(np.mean((preds - y_validation)**2)) * std_y_train
print("RMSE:", error)
sys.stdout.flush()
# We evaluate the prediction time
X_test_time = np.random.randn(int(5e5), X_train.shape[ 1 ])
import time
start = time.time()
preds = predictions(weights, X_test_time)
end = time.time()
elapsed_time_candidate = end - start
_, predictions, _= \
make_nn_funs([ X_train.shape[ 1 ], 10, 10, 10, 1], weight_decay)
start = time.time()
preds = predictions(weights, X_test_time)
end = time.time()
elapsed_time_fastest = end - start
time_ratio = elapsed_time_candidate /elapsed_time_fastest
print("Time ratio:", time_ratio)
print("Constraint:", 1.1 - time_ratio)
return { 'error': error, 'time': 1.1 - time_ratio}
Now we are ready to run Spearmint. Before, that however, we will have to install the right version. Simply go to the folder Spearmint-PESC and type
sudo python setup.py install
Go back to tune_neural_network/bo. The first step is to clean the MongoDB Base. For that, type
python ../../Spearmint-PESC/spearmint/cleanup.py .
Now we can start our BO experimnet. Simply type
python ../../Spearmint-PESC/spearmint/main.py .
This will run Spearmint for a total of 50 iterations. After that, we can extract the recommendation for the problem's solution. Simply type,
python get_final_result.py
This python script will connect to MongoDB to extract the recommendaiton. Below you can see the code.
import os
import sys
import importlib
import imp
import pdb
import numpy as np
import numpy.random as npr
import numpy.linalg as npla
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from spearmint.visualizations import plots_2d
from spearmint.utils.parsing import parse_config_file
from spearmint.utils.parsing import parse_tasks_from_jobs
from spearmint.utils.parsing import repeat_experiment_name
from spearmint.utils.parsing import get_objectives_and_constraints
from spearmint.utils.parsing import DEFAULT_TASK_NAME
from spearmint.utils.database.mongodb import MongoDB
from spearmint.tasks.input_space import InputSpace
from spearmint.tasks.input_space import paramify_no_types
from spearmint.main import load_jobs
if __name__ == '__main__':
options = parse_config_file('.', 'config.json')
experiment_name = options["experiment-name"]
db = MongoDB(database_address=options['database']['address'])
recommendations = db.load(experiment_name, 'recommendations')
values = paramify_no_types(recommendations[-1][ 'params_o' ])
print(values)
main(0, values)
We will also compare results with a random exploration of the parameter space. The code is found in the file random_search.py located in the folder random_search. Below you can see the code.
from nn import *
if __name__ == '__main__':
np.random.seed(1)
grid_size = 50
samples_log_learning_rate = np.array(-10.0 + np.random.rand(grid_size) * (10.0 - -1.0), ndmin = 2).T
samples_log_weight_decay = np.array(-10.0 + np.random.rand(grid_size) * (10.0 - -10.0), ndmin = 2).T
samples_h_hidden_units = np.array(10.0 + np.round(np.random.rand(grid_size) * (50.0 - 10.0) - 0.5), dtype = int, ndmin = 2).T
values = np.zeros((grid_size, 1))
for i in range(grid_size):
values[ i ] = main(0, {u'log_learning_rate': samples_log_learning_rate[ i ],
u'log_weight_decay': samples_log_weight_decay[ i ],
u'n_hidden_units': samples_h_hidden_units[ i ]})
print(values[ i ], samples_log_learning_rate[ i ], samples_log_weight_decay[ i ], samples_h_hidden_units[ i ])
results = np.concatenate((values, samples_log_learning_rate, samples_log_weight_decay, samples_h_hidden_units), 1)
np.savetxt("results.txt", results)
This will store the results of the random search strategy in the file results.txt. To extract the best configuration found simply execute the script get_final_results.py, whose code is found below. It is the same as the previous one, but it will only consider feasible solutions.
from nn import *
if __name__ == '__main__':
results = np.loadtxt('results.txt')
results[ results[ : , 1 ] < 0, : ] = np.float('inf')
best = results[ np.argmin(results[ :, 0 ]), 2 : ]
values = {u'log_learning_rate': np.array([ best[ 0 ] ]), u'log_weight_decay': np.array([ best[ 1 ] ]), u'n_hidden_units': np.array([ int(best[ 2 ]) ]) }
print(values)
main(0, values)