diff --git a/src/Layer.h b/src/Layer.h index 97fe2a2..297f9eb 100644 --- a/src/Layer.h +++ b/src/Layer.h @@ -34,23 +34,19 @@ public: use_constant_weight_init, constant_weight_init))); }; - - + ~Layer() { m_num_nodes = 0; m_num_inputs_per_node = 0; m_nodes.clear(); }; - //std::vector & GetNodes() { - // return m_nodes; - //} - const std::vector & GetNodes() const { return m_nodes; } - void GetOutputAfterSigmoid(const std::vector &input, std::vector * output) const { + void GetOutputAfterSigmoid(const std::vector &input, + std::vector * output) const { assert(input.size() == m_num_inputs_per_node); output->resize(m_num_nodes); @@ -71,7 +67,8 @@ public: for (size_t i = 0; i < m_nodes.size(); i++) { double net_sum; - m_nodes[i].GetInputInnerProdWithWeights(input_layer_activation, &net_sum); + m_nodes[i].GetInputInnerProdWithWeights(input_layer_activation, + &net_sum); //dE/dwij = dE/doj . doj/dnetj . dnetj/dwij double dE_doj = 0.0; @@ -80,8 +77,7 @@ public: dE_doj = deriv_error[i]; doj_dnetj = utils::deriv_sigmoid(net_sum); - - + for (int j = 0; j < m_num_inputs_per_node; j++) { (*deltas)[j] += dE_doj * doj_dnetj * m_nodes[i].GetWeights()[j]; diff --git a/src/MLP.cpp b/src/MLP.cpp index 3f46d6b..e1e91af 100644 --- a/src/MLP.cpp +++ b/src/MLP.cpp @@ -20,8 +20,7 @@ bool MLP::ImportNNWeights(const std::vector & weights) { void MLP::GetOutput(const std::vector &input, std::vector * output, - std::vector> * all_layers_activations, - bool apply_softmax) const { + std::vector> * all_layers_activations) const { assert(input.size() == m_num_inputs); int temp_size; if (m_num_hidden_layers == 0) @@ -50,7 +49,7 @@ void MLP::GetOutput(const std::vector &input, m_layers[i].GetOutputAfterSigmoid(temp_in, &temp_out); } - if (apply_softmax && temp_out.size() > 1) + if (temp_out.size() > 1) utils::Softmax(&temp_out); *output = temp_out; @@ -105,11 +104,11 @@ void MLP::UpdateMiniBatch(const std::vector &training_sample_set } } } - - for (int i = 0; i < max_iterations; i++) { - std::cout << "******************************" << std::endl; - std::cout << "******** ITER " << i << std::endl; - std::cout << "******************************" << std::endl; + size_t i = 0; + for ( i = 0; i < max_iterations; i++) { + //std::cout << "******************************" << std::endl; + //std::cout << "******** ITER " << i << std::endl; + //std::cout << "******************************" << std::endl; double current_iteration_cost_function = 0.0; for (auto & training_sample_with_bias : training_sample_set_with_bias) { std::vector predicted_output; @@ -123,16 +122,16 @@ void MLP::UpdateMiniBatch(const std::vector &training_sample_set assert(correct_output.size() == predicted_output.size()); std::vector deriv_error_output(predicted_output.size()); - std::cout << training_sample_with_bias << "\t\t"; - { - std::cout << "Predicted output: ["; - for (int i = 0; i < predicted_output.size(); i++) { - if (i != 0) - std::cout << ", "; - std::cout << predicted_output[i]; - } - std::cout << "]" << std::endl; - } + //std::cout << training_sample_with_bias << "\t\t"; + //{ + // std::cout << "Predicted output: ["; + // for (int i = 0; i < predicted_output.size(); i++) { + // if (i != 0) + // std::cout << ", "; + // std::cout << predicted_output[i]; + // } + // std::cout << "]" << std::endl; + //} for (int j = 0; j < predicted_output.size(); j++) { current_iteration_cost_function += @@ -146,7 +145,8 @@ void MLP::UpdateMiniBatch(const std::vector &training_sample_set learning_rate); } - std::cout << "Iteration cost function f(error): " + if((i% (max_iterations/100))==0) + std::cout << "Iteration "<< i << " cost function f(error): " << current_iteration_cost_function << std::endl; if (current_iteration_cost_function < min_error_cost) break; @@ -173,6 +173,7 @@ void MLP::UpdateMiniBatch(const std::vector &training_sample_set std::cout << "******************************" << std::endl; std::cout << "******* TRAINING ENDED *******" << std::endl; + std::cout << "******* " << i << " iters *******" << std::endl; std::cout << "******************************" << std::endl; { int layer_i = -1; diff --git a/src/MLP.h b/src/MLP.h index 849aa97..a70f219 100644 --- a/src/MLP.h +++ b/src/MLP.h @@ -48,8 +48,7 @@ public: void GetOutput(const std::vector &input, std::vector * output, - std::vector> * all_layers_activations = nullptr, - bool apply_softmax = false) const; + std::vector> * all_layers_activations = nullptr) const; void GetOutputClass(const std::vector &output, size_t * class_id) const; void UpdateMiniBatch(const std::vector &training_sample_set_with_bias, diff --git a/src/MLPTest.cpp b/src/MLPTest.cpp index bb6ca41..332b83c 100644 --- a/src/MLPTest.cpp +++ b/src/MLPTest.cpp @@ -17,10 +17,12 @@ UNIT(LearnAND) { std::vector training_set = { - {{ 0, 0 },{0.0}}, - {{ 0, 1 },{0.0}}, - {{ 1, 0 },{0.0}}, - {{ 1, 1 },{1.0}} + { { 0, 0 },{ 0.0 } }, + { { 0, 1 },{ 0.0 } }, + { { 1, 0 },{ 0.0 } }, + { { 1, 1 },{ 1.0 } }, + { { 1, 1 },{ 1.0 } }, + { { 1, 1 },{ 1.0 } } }; bool bias_already_in = false; std::vector training_sample_set_with_bias(training_set); @@ -33,14 +35,14 @@ UNIT(LearnAND) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); - MLP my_mlp(num_features, 1, 0, 5, true, 0.5); + MLP my_mlp(num_features, 1, 1, 2, false); //Train MLP - my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 2, 1000, 0.245); + my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); - for (const auto & training_sample : training_sample_set_with_bias) { + for (const auto & training_sample : training_sample_set_with_bias) { std::vector output; my_mlp.GetOutput(training_sample.input_vector(), &output); - bool predicted_output = output[0]> 0.5 ? true : false; + bool predicted_output = output[0] > 0.5 ? true : false; bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(predicted_output == correct_output); } @@ -53,10 +55,12 @@ UNIT(LearnNAND) { std::vector training_set = { - {{ 0, 0 },{1.0}}, - {{ 0, 1 },{1.0}}, - {{ 1, 0 },{1.0}}, - {{ 1, 1 },{0.0}} + { { 0, 0 },{ 1.0 } }, + { { 0, 1 },{ 1.0 } }, + { { 1, 0 },{ 1.0 } }, + { { 1, 1 },{ 0.0 } }, + { { 1, 1 },{ 0.0 } }, + { { 1, 1 },{ 0.0 } } }; bool bias_already_in = false; std::vector training_sample_set_with_bias(training_set); @@ -69,14 +73,14 @@ UNIT(LearnNAND) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); - MLP my_mlp(num_features, 1, 0, 5, true, 0.5); + MLP my_mlp(num_features, 1, 1, 2, false); //Train MLP - my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 2, 1000, 0.245); + my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); for (const auto & training_sample : training_sample_set_with_bias) { std::vector output; my_mlp.GetOutput(training_sample.input_vector(), &output); - bool predicted_output = output[0]> 0.5 ? true : false; + bool predicted_output = output[0] > 0.5 ? true : false; bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(predicted_output == correct_output); } @@ -89,10 +93,12 @@ UNIT(LearnOR) { std::vector training_set = { - {{ 0, 0 },{0.0}}, - {{ 0, 1 },{1.0}}, - {{ 1, 0 },{1.0}}, - {{ 1, 1 },{1.0}} + { { 0, 0 },{ 0.0 } }, + { { 0, 0 },{ 0.0 } }, + { { 0, 0 },{ 0.0 } }, + { { 0, 1 },{ 1.0 } }, + { { 1, 0 },{ 1.0 } }, + { { 1, 1 },{ 1.0 } } }; bool bias_already_in = false; std::vector training_sample_set_with_bias(training_set); @@ -105,14 +111,14 @@ UNIT(LearnOR) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); - MLP my_mlp(num_features, 1, 0, 5, true, 0.5); + MLP my_mlp(num_features, 1, 1, 2, false); //Train MLP - my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 2, 1000, 0.245); + my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); for (const auto & training_sample : training_sample_set_with_bias) { std::vector output; my_mlp.GetOutput(training_sample.input_vector(), &output); - bool predicted_output = output[0]> 0.5 ? true : false; + bool predicted_output = output[0] > 0.5 ? true : false; bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(predicted_output == correct_output); } @@ -125,10 +131,12 @@ UNIT(LearnNOR) { std::vector training_set = { - {{ 0, 0 },{1.0}}, - {{ 0, 1 },{0.0}}, - {{ 1, 0 },{0.0}}, - {{ 1, 1 },{0.0}} + { { 0, 0 },{ 1.0 } }, + { { 0, 0 },{ 1.0 } }, + { { 0, 0 },{ 1.0 } }, + { { 0, 1 },{ 0.0 } }, + { { 1, 0 },{ 0.0 } }, + { { 1, 1 },{ 0.0 } } }; bool bias_already_in = false; std::vector training_sample_set_with_bias(training_set); @@ -141,14 +149,14 @@ UNIT(LearnNOR) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); - MLP my_mlp(num_features, 1, 0, 5, true, 0.5); + MLP my_mlp(num_features, 1, 1, 2, false); //Train MLP - my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 2, 1000, 0.245); + my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); for (const auto & training_sample : training_sample_set_with_bias) { std::vector output; my_mlp.GetOutput(training_sample.input_vector(), &output); - bool predicted_output = output[0]> 0.5 ? true : false; + bool predicted_output = output[0] > 0.5 ? true : false; bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(predicted_output == correct_output); } @@ -156,49 +164,15 @@ UNIT(LearnNOR) { std::cout << std::endl; } -//UNIT(LearnXOR) { -// std::cout << "Train XOR function with mlp." << std::endl; -// -// std::vector training_set = -// { -// { { 0, 0 },{ 0.0 } }, -// { { 0, 1 },{ 1.0 } }, -// { { 1, 0 },{ 1.0 } }, -// { { 1, 1 },{ 0.0 } } -// }; -// bool bias_already_in = false; -// std::vector training_sample_set_with_bias(training_set); -// //set up bias -// if (!bias_already_in) { -// for (auto & training_sample_with_bias : training_sample_set_with_bias) { -// training_sample_with_bias.AddBiasValue(1); -// } -// } -// -// size_t num_examples = training_sample_set_with_bias.size(); -// size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); -// MLP my_mlp(num_features, 1, 0, 5, true, 0.5); -// //Train MLP -// my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 2, 1000, 0.245); -// -// for (const auto & training_sample : training_sample_set_with_bias) { -// std::vector output; -// my_mlp.GetOutput(training_sample.input_vector(), &output); -// bool predicted_output = output[0]> 0.5 ? true : false; -// bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; -// ASSERT_TRUE(predicted_output == correct_output); -// } -// std::cout << "Trained with success." << std::endl; -// std::cout << std::endl; -//} - -UNIT(LearnNOT) { - std::cout << "Train NOT function with mlp." << std::endl; +UNIT(LearnXOR) { + std::cout << "Train XOR function with mlp." << std::endl; std::vector training_set = { - {{ 0},{1.0 }}, - {{ 1},{0.0 }} + { { 0, 0 },{ 0.0 } }, + { { 0, 1 },{ 1.0 } }, + { { 1, 0 },{ 1.0 } }, + { { 1, 1 },{ 0.0 } } }; bool bias_already_in = false; std::vector training_sample_set_with_bias(training_set); @@ -211,14 +185,48 @@ UNIT(LearnNOT) { size_t num_examples = training_sample_set_with_bias.size(); size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); - MLP my_mlp(num_features, 1, 0, 5, true, 0.5); + MLP my_mlp(num_features, 1, 1, 2, false); //Train MLP - my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 2, 1000, 0.245); + my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 50'000, 0.25); for (const auto & training_sample : training_sample_set_with_bias) { std::vector output; my_mlp.GetOutput(training_sample.input_vector(), &output); - bool predicted_output = output[0]> 0.5 ? true : false; + bool predicted_output = output[0] > 0.5 ? true : false; + bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; + ASSERT_TRUE(predicted_output == correct_output); + } + std::cout << "Trained with success." << std::endl; + std::cout << std::endl; +} + +UNIT(LearnNOT) { + std::cout << "Train NOT function with mlp." << std::endl; + + std::vector training_set = + { + { { 0},{ 1.0 } }, + { { 1},{ 0.0 } } + }; + bool bias_already_in = false; + std::vector training_sample_set_with_bias(training_set); + //set up bias + if (!bias_already_in) { + for (auto & training_sample_with_bias : training_sample_set_with_bias) { + training_sample_with_bias.AddBiasValue(1); + } + } + + size_t num_examples = training_sample_set_with_bias.size(); + size_t num_features = training_sample_set_with_bias[0].GetInputVectorSize(); + MLP my_mlp(num_features, 1, 1, 2, false); + //Train MLP + my_mlp.UpdateMiniBatch(training_sample_set_with_bias, 0.5, 500, 0.25); + + for (const auto & training_sample : training_sample_set_with_bias) { + std::vector output; + my_mlp.GetOutput(training_sample.input_vector(), &output); + bool predicted_output = output[0] > 0.5 ? true : false; bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(predicted_output == correct_output); } diff --git a/src/Node.h b/src/Node.h index e98707e..b6eedd8 100644 --- a/src/Node.h +++ b/src/Node.h @@ -52,7 +52,7 @@ public: } else { m_weights.resize(m_num_inputs); std::generate_n(m_weights.begin(), - m_num_inputs, + m_num_inputs, utils::gen_rand()); } } @@ -102,10 +102,11 @@ public: } void GetBooleanOutput(const std::vector &input, - bool * bool_output) const { + bool * bool_output, + double threshold = 0.5) const { double value; GetOutputAfterSigmoid(input, &value); - *bool_output = (value > 0.5) ? true : false; + *bool_output = (value >threshold) ? true : false; }; void UpdateWeights(const std::vector &x, diff --git a/src/NodeTest.cpp b/src/NodeTest.cpp index da87717..001131c 100644 --- a/src/NodeTest.cpp +++ b/src/NodeTest.cpp @@ -36,7 +36,7 @@ void Train(Node & node, int error_count = 0; for (auto & training_sample_with_bias : training_sample_set_with_bias) { bool prediction; - node.GetBooleanOutput(training_sample_with_bias.input_vector(), &prediction); + node.GetBooleanOutput(training_sample_with_bias.input_vector(), &prediction, 0.5); bool correct_output = training_sample_with_bias.output_vector()[0] > 0.5 ? true : false; if (prediction != correct_output) { error_count++; @@ -82,8 +82,8 @@ UNIT(LearnAND) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id); - bool correct_output = training_sample.output_vector()[0] > 0 ? true : false; + my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(class_id == correct_output); } std::cout << "Trained with success." << std::endl; @@ -115,8 +115,8 @@ UNIT(LearnNAND) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id); - bool correct_output = training_sample.output_vector()[0] > 0 ? true : false; + my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(class_id == correct_output); } std::cout << "Trained with success." << std::endl; @@ -148,8 +148,8 @@ UNIT(LearnOR) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id); - bool correct_output = training_sample.output_vector()[0] > 0 ? true : false; + my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(class_id == correct_output); } std::cout << "Trained with success." << std::endl; @@ -180,8 +180,8 @@ UNIT(LearnNOR) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id); - bool correct_output = training_sample.output_vector()[0] > 0 ? true : false; + my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(class_id == correct_output); } std::cout << "Trained with success." << std::endl; @@ -211,8 +211,8 @@ UNIT(LearnNOT) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id); - bool correct_output = training_sample.output_vector()[0] > 0 ? true : false; + my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; ASSERT_TRUE(class_id == correct_output); } std::cout << "Trained with success." << std::endl; @@ -244,8 +244,8 @@ UNIT(LearnXOR) { for (const auto & training_sample : training_sample_set_with_bias) { bool class_id; - my_node.GetBooleanOutput(training_sample.input_vector(), &class_id); - bool correct_output = training_sample.output_vector()[0] > 0 ? true : false; + my_node.GetBooleanOutput(training_sample.input_vector(), &class_id, 0.5); + bool correct_output = training_sample.output_vector()[0] > 0.5 ? true : false; if (class_id != correct_output) { std::cout << "Failed to train. " << " A simple perceptron cannot learn the XOR function." << std::endl;