From 0a636416ed9e597b6169a9e1d1127d035c96df6e Mon Sep 17 00:00:00 2001 From: davidjacnogueira Date: Wed, 9 Nov 2016 02:31:53 +0000 Subject: [PATCH] Internal architecture changes (to allow diferent activation functions for each layer and to allow hidden layers to have different number of nodes). --- src/Layer.h | 52 +++++++++++++++++------- src/MLP.cpp | 19 ++++----- src/MLP.h | 60 +++++++++++----------------- src/MLPTest.cpp | 18 ++++----- src/Node.h | 8 ++-- src/NodeTest.cpp | 35 ++++++++++++---- src/Utils.h | 101 +++++++++++++++++++++++++++++++++++++---------- 7 files changed, 193 insertions(+), 100 deletions(-) diff --git a/src/Layer.h b/src/Layer.h index cd504d8..d22c5b6 100644 --- a/src/Layer.h +++ b/src/Layer.h @@ -5,6 +5,7 @@ #ifndef LAYER_H #define LAYER_H +#include "Utils.h" #include "Node.h" #include @@ -23,39 +24,59 @@ public: m_nodes.clear(); }; - Layer(int num_nodes, - int num_inputs_per_node, + Layer(int num_inputs_per_node, + int num_nodes, + const std::string & activation_function, bool use_constant_weight_init = true, double constant_weight_init = 0.5) { - m_num_nodes = num_nodes; m_num_inputs_per_node = num_inputs_per_node; - m_nodes.resize(num_nodes); + m_num_nodes = num_nodes; + m_nodes.resize(num_nodes); for (int i = 0; i < num_nodes; i++) { m_nodes[i].WeightInitialization(num_inputs_per_node, use_constant_weight_init, constant_weight_init); } + + std::pair, + std::function > *pair; + bool ret_val = utils::ActivationFunctionsManager::Singleton(). + GetActivationFunctionPair(activation_function, + &pair); + assert(ret_val); + m_activation_function = (*pair).first; + m_deriv_activation_function = (*pair).second; + }; + + ~Layer() { + m_num_inputs_per_node = 0; + m_num_nodes = 0; + m_nodes.clear(); + }; + + int GetInputSize() const { + return m_num_inputs_per_node; }; - ~Layer() { - m_num_nodes = 0; - m_num_inputs_per_node = 0; - m_nodes.clear(); + int GetOutputSize() const { + return m_num_nodes; }; const std::vector & GetNodes() const { return m_nodes; } - void GetOutputAfterSigmoid(const std::vector &input, - std::vector * output) const { + void GetOutputAfterActivationFunction(const std::vector &input, + std::vector * output) const { assert(input.size() == m_num_inputs_per_node); output->resize(m_num_nodes); for (int i = 0; i < m_num_nodes; ++i) { - m_nodes[i].GetOutputAfterSigmoid(input, &((*output)[i])); + m_nodes[i].GetOutputAfterActivationFunction(input, + m_activation_function, + &((*output)[i])); } } @@ -79,8 +100,8 @@ public: double dnetj_dwij = 0.0; dE_doj = deriv_error[i]; - doj_dnetj = utils::deriv_sigmoid(net_sum); - + doj_dnetj = m_deriv_activation_function(net_sum); + for (int j = 0; j < m_num_inputs_per_node; j++) { (*deltas)[j] += dE_doj * doj_dnetj * m_nodes[i].GetWeights()[j]; @@ -94,9 +115,12 @@ public: }; protected: - int m_num_nodes{ 0 }; int m_num_inputs_per_node{ 0 }; + int m_num_nodes{ 0 }; std::vector m_nodes; + + std::function m_activation_function; + std::function m_deriv_activation_function; }; #endif //LAYER_H \ No newline at end of file diff --git a/src/MLP.cpp b/src/MLP.cpp index 428d4b0..ad0c518 100644 --- a/src/MLP.cpp +++ b/src/MLP.cpp @@ -27,14 +27,13 @@ void MLP::GetOutput(const std::vector &input, if (m_num_hidden_layers == 0) temp_size = m_num_outputs; else - temp_size = m_num_nodes_per_hidden_layer; + temp_size = m_layers_nodes[1]; std::vector temp_in(m_num_inputs, 0.0); std::vector temp_out(temp_size, 0.0); temp_in = input; - //m_layers.size() equals (m_num_hidden_layers + 1) - for (int i = 0; i < (m_num_hidden_layers + 1); ++i) { + for (int i = 0; i < m_layers.size(); ++i) { if (i > 0) { //Store this layer activation if (all_layers_activations != nullptr) @@ -43,11 +42,9 @@ void MLP::GetOutput(const std::vector &input, temp_in.clear(); temp_in = temp_out; temp_out.clear(); - temp_out.resize((i == m_num_hidden_layers) ? - m_num_outputs : - m_num_nodes_per_hidden_layer); + temp_out.resize(m_layers[i].GetOutputSize()); } - m_layers[i].GetOutputAfterSigmoid(temp_in, &temp_out); + m_layers[i].GetOutputAfterActivationFunction(temp_in, &temp_out); } if (temp_out.size() > 1) @@ -106,8 +103,9 @@ void MLP::UpdateMiniBatch(const std::vector &training_sample_set // } //} size_t i = 0; + double current_iteration_cost_function = 0.0; for (i = 0; i < max_iterations; i++) { - double current_iteration_cost_function = 0.0; + current_iteration_cost_function = 0.0; for (auto & training_sample_with_bias : training_sample_set_with_bias) { std::vector predicted_output; std::vector< std::vector > all_layers_activations; @@ -153,7 +151,10 @@ void MLP::UpdateMiniBatch(const std::vector &training_sample_set break; } - LOG(INFO) << "******************************" ; + LOG(INFO) << "Iteration " << i << " cost function f(error): " + << current_iteration_cost_function; + + LOG(INFO) << "******************************"; LOG(INFO) << "******* TRAINING ENDED *******"; LOG(INFO) << "******* " << i << " iters *******"; LOG(INFO) << "******************************"; diff --git a/src/MLP.h b/src/MLP.h index a70f219..0b72a95 100644 --- a/src/MLP.h +++ b/src/MLP.h @@ -19,27 +19,26 @@ class MLP { public: - MLP(int num_inputs, - int num_outputs, - int num_hidden_layers, - int num_nodes_per_hidden_layer, + //desired call sintax : MLP({64*64,20,4}, {"sigmoid", "linear"}, + MLP(const std::vector & layers_nodes, + const std::vector & layers_activfuncs, bool use_constant_weight_init = true, double constant_weight_init = 0.5) { + assert(layers_nodes.size() >= 2); + assert(layers_activfuncs.size() + 1 == layers_nodes.size()); - m_num_inputs = num_inputs; - m_num_outputs = num_outputs; - m_num_hidden_layers = num_hidden_layers; - m_num_nodes_per_hidden_layer = num_nodes_per_hidden_layer; - - CreateMLP(use_constant_weight_init, + CreateMLP(layers_nodes, + layers_activfuncs, + use_constant_weight_init, constant_weight_init); } + ~MLP() { m_num_inputs = 0; m_num_outputs = 0; m_num_hidden_layers = 0; - m_num_nodes_per_hidden_layer = 0; + m_layers_nodes.clear(); m_layers.clear(); }; @@ -60,40 +59,27 @@ protected: const std::vector &error, double learning_rate); private: - void CreateMLP(bool use_constant_weight_init, + void CreateMLP(const std::vector & layers_nodes, + const std::vector & layers_activfuncs, + bool use_constant_weight_init, double constant_weight_init = 0.5) { - if (m_num_hidden_layers > 0) { - //first layer - m_layers.emplace_back(Layer(m_num_nodes_per_hidden_layer, - m_num_inputs, - use_constant_weight_init, - constant_weight_init)); - //subsequent layers - for (int i = 0; i < m_num_hidden_layers - 1; i++) { - m_layers.emplace_back(Layer(m_num_nodes_per_hidden_layer, - m_num_nodes_per_hidden_layer, - use_constant_weight_init, - constant_weight_init)); - } - //last layer - m_layers.emplace_back(Layer(m_num_outputs, - m_num_nodes_per_hidden_layer, - use_constant_weight_init, - constant_weight_init)); - } else { - m_layers.emplace_back(Layer(m_num_outputs, - m_num_inputs, + m_layers_nodes = layers_nodes; + m_num_inputs = m_layers_nodes[0]; + m_num_outputs = m_layers_nodes[m_layers_nodes.size() - 1]; + m_num_hidden_layers = m_layers_nodes.size() - 2; + + for (int i = 0; i < m_layers_nodes.size() - 1; i++) { + m_layers.emplace_back(Layer(m_layers_nodes[i], + m_layers_nodes[i + 1], + layers_activfuncs[i], use_constant_weight_init, constant_weight_init)); } } - - int m_num_inputs{ 0 }; int m_num_outputs{ 0 }; int m_num_hidden_layers{ 0 }; - int m_num_nodes_per_hidden_layer{ 0 }; - + std::vector m_layers_nodes; std::vector m_layers; }; diff --git a/src/MLPTest.cpp b/src/MLPTest.cpp index 23a3256..f1931f2 100644 --- a/src/MLPTest.cpp +++ b/src/MLPTest.cpp @@ -39,7 +39,7 @@ UNIT(LearnAND) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); size_t num_outputs = training_sample_set_with_bias[0].GetOutputVectorSize(); - MLP my_mlp(num_features, num_outputs, 1, 2, false); + MLP my_mlp({ num_features, 2 ,num_outputs }, { "sigmoid", "linear" }, false); //Train MLP my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); @@ -79,7 +79,7 @@ UNIT(LearnNAND) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); size_t num_outputs = training_sample_set_with_bias[0].GetOutputVectorSize(); - MLP my_mlp(num_features, num_outputs, 1, 2, false); + MLP my_mlp({ num_features, 2 ,num_outputs }, { "sigmoid", "linear" }, false); //Train MLP my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); @@ -119,7 +119,7 @@ UNIT(LearnOR) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); size_t num_outputs = training_sample_set_with_bias[0].GetOutputVectorSize(); - MLP my_mlp(num_features, num_outputs, 1, 2, false); + MLP my_mlp({ num_features, 2 ,num_outputs }, { "sigmoid", "linear" }, false); //Train MLP my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); @@ -159,7 +159,7 @@ UNIT(LearnNOR) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); size_t num_outputs = training_sample_set_with_bias[0].GetOutputVectorSize(); - MLP my_mlp(num_features, num_outputs, 1, 2, false); + MLP my_mlp({ num_features, 2 ,num_outputs }, { "sigmoid", "linear" }, false); //Train MLP my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); @@ -197,9 +197,9 @@ UNIT(LearnXOR) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); size_t num_outputs = training_sample_set_with_bias[0].GetOutputVectorSize(); - MLP my_mlp(num_features, num_outputs, 1, 2, false); + MLP my_mlp({ num_features, 2 ,num_outputs }, { "sigmoid", "linear" }, false); //Train MLP - my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 50'000, 0.25); + my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); for (const auto & training_sample : training_sample_set_with_bias) { std::vector output; @@ -233,7 +233,7 @@ UNIT(LearnNOT) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); size_t num_outputs = training_sample_set_with_bias[0].GetOutputVectorSize(); - MLP my_mlp(num_features, num_outputs, 1, 2, false); + MLP my_mlp({ num_features, 2 ,num_outputs }, { "sigmoid", "linear" }, false); //Train MLP my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); @@ -271,7 +271,7 @@ UNIT(LearnX1) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); size_t num_outputs = training_sample_set_with_bias[0].GetOutputVectorSize(); - MLP my_mlp(num_features, num_outputs, 1, 2, false); + MLP my_mlp({ num_features, 2 ,num_outputs }, { "sigmoid", "linear" }, false); //Train MLP my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); @@ -309,7 +309,7 @@ UNIT(LearnX2) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); size_t num_outputs = training_sample_set_with_bias[0].GetOutputVectorSize(); - MLP my_mlp(num_features, num_outputs, 1, 2, false); + MLP my_mlp({ num_features, 2 ,num_outputs }, { "sigmoid", "linear" }, false); //Train MLP my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); diff --git a/src/Node.h b/src/Node.h index 5aacbdb..6ef6042 100644 --- a/src/Node.h +++ b/src/Node.h @@ -95,18 +95,20 @@ public: *output = inner_prod; } - void GetOutputAfterSigmoid(const std::vector &input, + void GetOutputAfterActivationFunction(const std::vector &input, + std::function activation_function, double * output) const { double inner_prod = 0.0; GetInputInnerProdWithWeights(input, &inner_prod); - *output = utils::sigmoid(inner_prod); + *output = activation_function(inner_prod); } void GetBooleanOutput(const std::vector &input, + std::function activation_function, bool * bool_output, double threshold = 0.5) const { double value; - GetOutputAfterSigmoid(input, &value); + GetOutputAfterActivationFunction(input, activation_function, &value); *bool_output = (value >threshold) ? true : false; }; diff --git a/src/NodeTest.cpp b/src/NodeTest.cpp index 2d8e1dd..70e379b 100644 --- a/src/NodeTest.cpp +++ b/src/NodeTest.cpp @@ -39,7 +39,10 @@ void Train(Node & node, int error_count = 0; for (auto & training_sample_with_bias : training_sample_set_with_bias) { bool prediction; - node.GetBooleanOutput(training_sample_with_bias.input_vector(), &prediction, 0.5); + node.GetBooleanOutput(training_sample_with_bias.input_vector(), + utils::linear, + &prediction, + 0.5); bool correct_output = training_sample_with_bias.output_vector()[0] > 0.5 ? true : false; if (prediction != correct_output) { error_count++; @@ -85,7 +88,10 @@ UNIT(LearnAND) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + my_node.GetBooleanOutput(training_sample.input_vector(), + utils::linear, + &class_id, + 0.5); bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(class_id == correct_output); } @@ -117,7 +123,10 @@ UNIT(LearnNAND) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + my_node.GetBooleanOutput(training_sample.input_vector(), + utils::linear, + &class_id, + 0.5); bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(class_id == correct_output); } @@ -149,7 +158,10 @@ UNIT(LearnOR) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + my_node.GetBooleanOutput(training_sample.input_vector(), + utils::linear, + &class_id, + 0.5); bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(class_id == correct_output); } @@ -180,7 +192,10 @@ UNIT(LearnNOR) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + my_node.GetBooleanOutput(training_sample.input_vector(), + utils::linear, + &class_id, + 0.5); bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(class_id == correct_output); } @@ -210,7 +225,10 @@ UNIT(LearnNOT) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + my_node.GetBooleanOutput(training_sample.input_vector(), + utils::linear, + &class_id, + 0.5); bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(class_id == correct_output); } @@ -242,7 +260,10 @@ UNIT(LearnXOR) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + my_node.GetBooleanOutput(training_sample.input_vector(), + utils::linear, + &class_id, + 0.5); bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; if (class_id != correct_output) { LOG(WARNING) << "Failed to train. " << diff --git a/src/Utils.h b/src/Utils.h index bbb2fbe..1edeb38 100644 --- a/src/Utils.h +++ b/src/Utils.h @@ -10,20 +10,91 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #ifdef _WIN32 #include #else #include #endif -#include -#include -#include -#include -#include -#include - namespace utils { +//Typical sigmoid function created from input x +//Returns the sigmoided value +inline double sigmoid(double x) { + return 1 / (1 + exp(-x)); +} + +// Derivative of sigmoid function +inline double deriv_sigmoid(double x) { + return sigmoid(x)*(1 - sigmoid(x)); +}; + +//Compute hyperbolic tangent (tanh) +//Returns the hyperbolic tangent of x. +inline double hyperbolic_tan(double x) { + return (tanh)(x); +} + +// Derivative of hyperbolic tangent function +inline double deriv_hyperbolic_tan(double x) { + return 1 - (std::pow)(hyperbolic_tan(x), 2); +}; + +inline double linear(double x) { + return x; +} + +// Derivative of linear function +inline double deriv_linear(double x) { + return 1; +}; + +struct ActivationFunctionsManager { + bool GetActivationFunctionPair(const std::string & activation_name, + std::pair, + std::function > **pair) { + auto iter = activation_functions_map.find(activation_name); + if (iter != activation_functions_map.end()) + *pair = &(iter->second); + else + return false; + return true; + } + + static ActivationFunctionsManager & Singleton() { + static ActivationFunctionsManager instance; + return instance; + } +private: + void AddNewPair(std::string function_name, + std::function function, + std::function deriv_function) { + activation_functions_map.insert(std::make_pair(function_name, + std::make_pair(function, + deriv_function))); + }; + + ActivationFunctionsManager() { + AddNewPair("sigmoid", sigmoid, deriv_sigmoid); + AddNewPair("tanh", hyperbolic_tan, deriv_hyperbolic_tan); + AddNewPair("linear", linear, deriv_linear); + }; + + std::unordered_map, std::function > > + activation_functions_map; +}; struct gen_rand { double factor; @@ -35,18 +106,6 @@ public: } }; -inline double sigmoid(double x) { - //Typical sigmoid function created from input x - //param x: input value - //return: sigmoided value - return 1 / (1 + exp(-x)); -} - -// Derivative of sigmoid function -inline double deriv_sigmoid(double x) { - return sigmoid(x)*(1 - sigmoid(x)); -}; - inline void Softmax(std::vector *output) { size_t num_elements = output->size(); std::vector exp_output(num_elements); @@ -62,8 +121,8 @@ inline void Softmax(std::vector *output) { inline void GetIdMaxElement(const std::vector &output, size_t * class_id) { *class_id = std::distance(output.begin(), - std::max_element(output.begin(), - output.end())); + std::max_element(output.begin(), + output.end())); } } #endif // UTILS_H \ No newline at end of file