Merge pull request #11 from mfl28/bidirectional-rnn

prabhuomkar · web-flow · commit 42a82ba3ac1e · 2019-12-06T19:00:24.000+05:30
Add Bidirectional Recurrent Neural Network tutorial
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -21,6 +21,7 @@ add_subdirectory("tutorials/basics/pytorch_basics")
 add_subdirectory("tutorials/intermediate/convolutional_neural_network")
 add_subdirectory("tutorials/intermediate/deep_residual_network")
 add_subdirectory("tutorials/intermediate/recurrent_neural_network")
+add_subdirectory("tutorials/intermediate/bidirectional_recurrent_neural_network")
 
 # The following code block is suggested to be used on Windows.
 # According to https://github.com/pytorch/pytorch/issues/25457,
diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ $ ./scripts.sh build
 * [Convolutional Neural Network](https://github.com/prabhuomkar/pytorch-cpp/tree/master/tutorials/intermediate/convolutional_neural_network/src/main.cpp)
 * [Deep Residual Network](https://github.com/prabhuomkar/pytorch-cpp/tree/master/tutorials/intermediate/deep_residual_network/src/main.cpp)
 * [Recurrent Neural Network](https://github.com/prabhuomkar/pytorch-cpp/tree/master/tutorials/intermediate/recurrent_neural_network/src/main.cpp)
-* [Bidirectional Recurrent Neural Network]()
+* [Bidirectional Recurrent Neural Network](https://github.com/prabhuomkar/pytorch-cpp/tree/master/tutorials/intermediate/bidirectional_recurrent_neural_network/src/main.cpp)
 * [Language Model (RNN-LM)]()
 
 #### 3. Advanced
diff --git a/tutorials/intermediate/bidirectional_recurrent_neural_network/CMakeLists.txt b/tutorials/intermediate/bidirectional_recurrent_neural_network/CMakeLists.txt
@@ -0,0 +1,37 @@
+cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
+
+project(bidirectional-recurrent-neural-network VERSION 1.0.0 LANGUAGES CXX)
+
+# Files
+set(SOURCES src/main.cpp
+            src/bi_rnn.cpp
+)
+
+set(HEADERS include/bi_rnn.h
+)
+
+set(EXECUTABLE_NAME bidirectional-recurrent-neural-network)
+
+
+add_executable(${EXECUTABLE_NAME} ${SOURCES} ${HEADERS})
+target_include_directories(${EXECUTABLE_NAME} PRIVATE include)
+
+target_link_libraries(${EXECUTABLE_NAME} "${TORCH_LIBRARIES}")
+
+set_target_properties(${EXECUTABLE_NAME} PROPERTIES
+  CXX_STANDARD 11
+  CXX_STANDARD_REQUIRED YES
+)
+
+# The following code block is suggested to be used on Windows.
+# According to https://github.com/pytorch/pytorch/issues/25457,
+# the DLLs need to be copied to avoid memory errors.
+# See https://pytorch.org/cppdocs/installing.html.
+if (MSVC)
+  file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
+  add_custom_command(TARGET ${EXECUTABLE_NAME}
+                     POST_BUILD
+                     COMMAND ${CMAKE_COMMAND} -E copy_if_different
+                     ${TORCH_DLLS}
+                     $<TARGET_FILE_DIR:${EXECUTABLE_NAME}>)
+endif (MSVC)
diff --git a/tutorials/intermediate/bidirectional_recurrent_neural_network/include/bi_rnn.h b/tutorials/intermediate/bidirectional_recurrent_neural_network/include/bi_rnn.h
@@ -0,0 +1,16 @@
+// Copyright 2019 Markus Fleischhacker
+#pragma once
+
+#include <torch/torch.h>
+
+class BiRNNImpl : public torch::nn::Module {
+ public:
+    BiRNNImpl(int64_t input_size, int64_t hidden_size, int64_t num_layers, int64_t num_classes);
+    torch::Tensor forward(torch::Tensor x);
+
+ private:
+    torch::nn::LSTM lstm;
+    torch::nn::Linear fc;
+};
+
+TORCH_MODULE(BiRNN);
diff --git a/tutorials/intermediate/bidirectional_recurrent_neural_network/src/bi_rnn.cpp b/tutorials/intermediate/bidirectional_recurrent_neural_network/src/bi_rnn.cpp
@@ -0,0 +1,19 @@
+// Copyright 2019 Markus Fleischhacker
+#include "bi_rnn.h"
+#include <torch/torch.h>
+
+BiRNNImpl::BiRNNImpl(int64_t input_size, int64_t hidden_size, int64_t num_layers, int64_t num_classes)
+    : lstm(torch::nn::LSTMOptions(input_size, hidden_size).layers(num_layers).batch_first(true).bidirectional(true)),
+      fc(hidden_size * 2, num_classes) {
+    register_module("lstm", lstm);
+    register_module("fc", fc);
+}
+
+torch::Tensor BiRNNImpl::forward(torch::Tensor x) {
+    auto out = lstm->forward(x)
+        .output
+        .slice(1, -1)
+        .squeeze(1);
+    out = fc->forward(out);
+    return torch::log_softmax(out, 1);
+}
diff --git a/tutorials/intermediate/bidirectional_recurrent_neural_network/src/main.cpp b/tutorials/intermediate/bidirectional_recurrent_neural_network/src/main.cpp
@@ -0,0 +1,126 @@
+// Copyright 2019 Markus Fleischhacker
+#include <torch/torch.h>
+#include <iostream>
+#include <iomanip>
+#include "bi_rnn.h"
+
+int main() {
+    std::cout << "Bidirectional Recurrent Neural Network\n\n";
+
+    // Device
+    torch::Device device(torch::cuda::is_available() ? torch::kCUDA : torch::kCPU);
+
+    // Hyper parameters
+    const int64_t sequence_length = 28;
+    const int64_t input_size = 28;
+    const int64_t hidden_size = 128;
+    const int64_t num_layers = 2;
+    const int64_t num_classes = 10;
+    const int64_t batch_size = 100;
+    const int64_t num_epochs = 2;
+    const double learning_rate = 0.003;
+
+    const std::string MNIST_data_path = "../../../../tutorials/intermediate/"
+        "bidirectional_recurrent_neural_network/data/";
+
+    // MNIST dataset
+    auto train_dataset = torch::data::datasets::MNIST(MNIST_data_path)
+        .map(torch::data::transforms::Normalize<>(0.1307, 0.3081))
+        .map(torch::data::transforms::Stack<>());
+
+    // Number of samples in the training set
+    auto num_train_samples = train_dataset.size().value();
+
+    auto test_dataset = torch::data::datasets::MNIST(MNIST_data_path, torch::data::datasets::MNIST::Mode::kTest)
+        .map(torch::data::transforms::Normalize<>(0.1307, 0.3081))
+        .map(torch::data::transforms::Stack<>());
+
+    // Number of samples in the testset
+    auto num_test_samples = test_dataset.size().value();
+
+    // Data loader
+    auto train_loader = torch::data::make_data_loader<torch::data::samplers::RandomSampler>(
+        std::move(train_dataset), batch_size);
+    auto test_loader = torch::data::make_data_loader<torch::data::samplers::SequentialSampler>(
+        std::move(test_dataset), batch_size);
+
+    // Model
+    BiRNN model(input_size, hidden_size, num_layers, num_classes);
+    model->to(device);
+
+    // Optimizer
+    auto optimizer = torch::optim::Adam(model->parameters(), torch::optim::AdamOptions(learning_rate));
+
+    // Set floating point output precision
+    std::cout << std::fixed << std::setprecision(4);
+
+    std::cout << "Training...\n";
+
+    // Train the model
+    for (size_t epoch = 0; epoch != num_epochs; ++epoch) {
+        // Initialize running metrics
+        float running_loss = 0.0;
+        size_t num_correct = 0;
+
+        for (auto& batch : *train_loader) {
+            // Transfer images and target labels to device
+            auto data = batch.data.view({-1, sequence_length, input_size}).to(device);
+            auto target = batch.target.to(device);
+
+            // Forward pass
+            auto output = model->forward(data);
+
+            // Calculate loss
+            auto loss = torch::nll_loss(output, target);
+            // Update running loss
+            running_loss += loss.item().toFloat() * data.size(0);
+
+            // Calculate prediction
+            auto prediction = output.argmax(1);
+
+            // Update number of correctly classified samples
+            num_correct += prediction.eq(target).sum().item().toLong();
+
+            // Backward pass and optimize
+            optimizer.zero_grad();
+            loss.backward();
+            optimizer.step();
+        }
+
+        auto sample_mean_loss = running_loss / num_train_samples;
+        auto accuracy = static_cast<float>(num_correct) / num_train_samples;
+
+        std::cout << "Epoch [" << (epoch + 1) << "/" << num_epochs << "], Trainset - Loss: "
+            << sample_mean_loss << ", Accuracy: " << accuracy << '\n';
+    }
+
+    std::cout << "Training finished!\n\n";
+    std::cout << "Testing...\n";
+
+    // Test the model
+    model->eval();
+    torch::NoGradGuard no_grad;
+
+    float running_loss = 0.0;
+    size_t num_correct = 0;
+
+    for (const auto& batch : *test_loader) {
+        auto data = batch.data.view({-1, sequence_length, input_size}).to(device);
+        auto target = batch.target.to(device);
+
+        auto output = model->forward(data);
+
+        auto loss = torch::nll_loss(output, target);
+        running_loss += loss.item().toFloat() * data.size(0);
+
+        auto prediction = output.argmax(1);
+        num_correct += prediction.eq(target).sum().item().toLong();
+    }
+
+    std::cout << "Testing finished!\n";
+
+    auto test_accuracy = static_cast<float>(num_correct) / num_test_samples;
+    auto test_sample_mean_loss = running_loss / num_test_samples;
+
+    std::cout << "Testset - Loss: " << test_sample_mean_loss << ", Accuracy: " << test_accuracy << '\n';
+}