added global sum and game of life

PhilipNelson5 · Oct 13, 2018 · c371245 · c371245
1 parent a7186a6
commit c371245
Show file tree

Hide file tree

Showing 123 changed files with 1,312 additions and 0 deletions.
diff --git a/CS5500_ParallelProgramming/7-GlobalSum/main.cpp b/CS5500_ParallelProgramming/7-GlobalSum/main.cpp
@@ -0,0 +1,168 @@
+#include "random.hpp"
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+#include <mpi.h>
+#include <unistd.h>
+#include <vector>
+
+#define slep 100000
+
+void print1per(int data, std::string title = "")
+{
+  int rank;
+  int word_size;
+
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &word_size);
+
+  if (0 == rank)
+  {
+    int* dArray = new int[word_size];
+    MPI_Gather(&data, 1, MPI_INT, dArray, 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+    std::cout << title << '\n';
+    for (int i = 0; i < word_size; ++i)
+    {
+      std::cout << std::setw(5) << i << std::setw(5) << dArray[i] << "\n";
+    }
+    std::cout << std::endl;
+  }
+  else
+  {
+    MPI_Gather(&data, 1, MPI_INT, nullptr, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  }
+}
+
+int cube(int c, int sendData, int rank)
+{
+  int recvData;
+  auto dest = rank ^ (1 << c);
+
+  MPI_Send(&sendData, 1, MPI_INT, dest, 0, MPI_COMM_WORLD);
+  MPI_Recv(&recvData, 1, MPI_INT, dest, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+  return recvData;
+}
+
+int ring(int dir, int sendData, int rank, int world_size)
+{
+  int recvData;
+  auto dest = (rank + 1 * dir) % world_size;
+  auto src = (rank - 1 * dir) % world_size;
+
+  MPI_Send(&sendData, 1, MPI_INT, dest, 0, MPI_COMM_WORLD);
+  MPI_Recv(&recvData, 1, MPI_INT, src, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+  return recvData;
+}
+
+void cubeSum(int num, int rank, int world_size)
+{
+  int log2n = log2(world_size);
+  for (auto i = 0; i < log2n; ++i)
+  {
+    num += cube(i, num, rank);
+    usleep(slep);
+  }
+  print1per(num, "cube sum");
+}
+
+void ringSum(int num, int rank, int world_size)
+{
+  int next, prev = num;
+
+  for (auto i = 0; i < world_size - 1; ++i)
+  {
+    next = ring(1, prev, rank, world_size);
+    num += next;
+    prev = next;
+    usleep(slep);
+  }
+  print1per(num, "ring sum");
+}
+
+void masterSlaveSum(int num, int rank, int world_size)
+{
+  if (0 == rank)
+  {
+    int recvData;
+    for (auto i = 1; i < world_size; ++i)
+    {
+      MPI_Recv(&recvData,
+               1,
+               MPI_INT,
+               MPI_ANY_SOURCE,
+               0,
+               MPI_COMM_WORLD,
+               MPI_STATUS_IGNORE);
+      num += recvData;
+      usleep(slep);
+    }
+    std::cout << "master slave sum\n    0   " << num << "\n\n";
+  }
+  else
+  {
+    MPI_Send(&num, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
+  }
+}
+
+void mpiAllReduce(int num)
+{
+  MPI_Allreduce(&num, &num, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
+  usleep(slep);
+  print1per(num, "all reduce");
+}
+
+int main(int argc, char** argv)
+{
+  MPI_Init(&argc, &argv);
+
+  int rank, world_size;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+
+  if (0 != (world_size & (world_size - 1)))
+  {
+    if (rank == 0)
+    {
+      std::cerr << "There must be a power of 2 number of threads\n";
+    }
+
+    MPI_Finalize();
+    exit(EXIT_SUCCESS);
+  }
+
+  int num;
+  if (0 == rank)
+  {
+    std::vector<int> data(world_size);
+    random_double_fill(begin(data), end(data), 0, 10);
+    MPI_Scatter(data.data(), 1, MPI_INT, &num, 1, MPI_INT, 0, MPI_COMM_WORLD);
+    print1per(num, "original data");
+  }
+  else
+  {
+    MPI_Scatter(nullptr, 1, MPI_INT, &num, 1, MPI_INT, 0, MPI_COMM_WORLD);
+    print1per(num);
+  }
+
+  auto t1 = MPI_Wtime();
+  cubeSum(num, rank, world_size);
+  auto t2 = MPI_Wtime();
+  ringSum(num, rank, world_size);
+  auto t3 = MPI_Wtime();
+  masterSlaveSum(num, rank, world_size);
+  auto t4 = MPI_Wtime();
+  mpiAllReduce(num);
+  auto t5 = MPI_Wtime();
+
+  if (0 == rank)
+    std::cout << "cube: " << t2 - t1 << "\nring: " << t3 - t2
+              << "\nmaster slave: " << t4 - t3 << "\nall reduce: " << t5 - t4
+              << "\n";
+
+  MPI_Finalize();
+
+  return (EXIT_SUCCESS);
+}
diff --git a/CS5500_ParallelProgramming/7-GlobalSum/makefile b/CS5500_ParallelProgramming/7-GlobalSum/makefile
@@ -0,0 +1,12 @@
+OBJS = main.cpp
+EXECS=release
+MPICC?=mpic++
+FLAGS=-O3
+
+all: ${EXECS}
+
+release: ${OBJS}
+	${MPICC} ${FLAGS} ${OBJS} -o release.out
+
+clean:
+	rm -f ${EXECS}
diff --git a/CS5500_ParallelProgramming/7-GlobalSum/random.hpp b/CS5500_ParallelProgramming/7-GlobalSum/random.hpp
@@ -0,0 +1,72 @@
+#ifndef RANDOM_HPP
+#define RANDOM_HPP
+
+#include <algorithm>
+#include <functional>
+#include <random>
+
+/**
+ * Generate a random number from [low, high]
+ *
+ * @param low  The lower bound
+ * @param high The upper bound
+ * @return     A random number on the range [low, high]
+ */
+int random_int(int low, int high)
+{
+  static std::random_device rd;
+  static std::mt19937 mt(rd());
+  std::uniform_int_distribution<> dist(low, high);
+  return dist(mt);
+}
+
+/**
+ * Generate a random number from [low, high)
+ *
+ * @param low  The lower bound
+ * @param high The upper bound
+ * @return     A random number on the range [low, high)
+ */
+double random_double(double low, double high)
+{
+  static std::random_device rd;
+  static std::mt19937 mt(rd());
+  std::uniform_real_distribution<> dist(low, high);
+  return dist(mt);
+}
+
+/**
+ * Fill a container from [first, last) with random numbers from [low, high]
+ *
+ * @param first Iterator to beginning of range to fill
+ * @param last  Iterator to end of range to fill
+ * @param low   The lower bound
+ * @param high  The upper bound
+ */
+template <typename it>
+void random_int_fill(it first, it last, const int low, const int high)
+{
+  static std::random_device rd;
+  static std::mt19937 mt(rd());
+  std::uniform_int_distribution<> dist(low, high);
+  std::generate(first, last, std::bind(dist, mt));
+}
+
+/**
+ * Fill a container from [first, last) with random numbers from [low, high)
+ *
+ * @param first Iterator to beginning of range to fill
+ * @param last  Iterator to end of range to fill
+ * @param low   The lower bound
+ * @param high  The upper bound
+ */
+template <typename it>
+void random_double_fill(it first, it last, const double low, const double high)
+{
+  static std::random_device rd;
+  static std::mt19937 mt(rd());
+  std::uniform_real_distribution<double> dist(low, high);
+  std::generate(first, last, std::bind(dist, mt));
+}
+
+#endif
diff --git a/CS5500_ParallelProgramming/7-GlobalSum/report/philip_nelson_hw7.pdf b/CS5500_ParallelProgramming/7-GlobalSum/report/philip_nelson_hw7.pdf
diff --git a/CS5500_ParallelProgramming/7-GlobalSum/report/philip_nelson_hw7.tex b/CS5500_ParallelProgramming/7-GlobalSum/report/philip_nelson_hw7.tex
@@ -0,0 +1,96 @@
+\documentclass{article}
+
+\usepackage[utf8]{inputenc}
+\usepackage{geometry}
+\usepackage{listings}
+\usepackage{graphicx}
+\usepackage{geometry}
+\usepackage{courier}
+
+\graphicspath{{images/}}
+
+\title{HW 7 \\ Global Sum}
+\author{Philip Nelson}
+\date{2018 October 12}
+
+\lstset{basicstyle=\footnotesize\ttfamily\normalsize,
+        breaklines=true,
+        stepnumber=1,
+       }
+
+\begin{document}
+
+\maketitle
+
+\section*{Introduction}
+
+The purpose of this assignment is to write an MPI program that performs a global sum. I have implemented several different variants. The first takes advantage of the cube network communication. Second uses ring communication. Third is a naive master slave method. Fourth uses the building MPI\_Allgather function. I made each iteration sleep for one tenth of a second in order to simulate some kind of work being executed.
+
+\section*{Code}
+
+\lstinputlisting[showstringspaces=false, language=c++, numbers=left]{../main.cpp}
+\newpage
+
+\section*{Output}
+
+\begin{lstlisting}[showstringspaces=false]
+
+# mpic++ -O3 main.cpp -o release.out
+
+# mpiexec -n 8 --oversubscribe release.out
+
+original data                                               
+    0    5
+    1    3
+    2    0
+    3    8
+    4    4
+    5    8
+    6    3
+    7    9
+
+cube sum
+    0   40
+    1   40
+    2   40
+    3   40
+    4   40
+    5   40
+    6   40
+    7   40
+
+ring sum
+    0   40
+    1   40
+    2   40
+    3   40
+    4   40
+    5   40
+    6   40
+    7   40
+
+master slave sum
+    0   40
+
+all reduce
+    0   40
+    1   40
+    2   40
+    3   40
+    4   40
+    5   40
+    6   40
+    7   40
+
+cube: 0.305294
+ring: 0.708875
+master slave: 0.701991
+all reduce: 0.101125
+
+\end{lstlisting}
+
+\section*{Findings}
+
+The cube sum was the best performing of my own sorting functions which is unsurprising since it only needs to compute $\log_2n$ sums where $n$ is the world size. Therefore it is about $43\%$ faster. The ring sum was as performant as the master slave sum however only the master process is left with the sum. If you want all processes to have the global sum, the ring sum would be a better option. Getting a good time estimate for the reduce / all reduce isn't possible using this setup but I image it is the best option for performing global sums.
+
+\end{document}
diff --git a/CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/Makefile b/CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/Makefile
@@ -0,0 +1,11 @@
+OBJS = main.cpp
+EXECS=release
+MPICC?=mpic++
+
+all: ${EXECS}
+
+release: ${OBJS}
+	${MPICC} -O3 -lpng ${OBJS} -o release
+
+clean:
+	rm -f ${EXECS}
diff --git a/CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/bench100.csv b/CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/bench100.csv
@@ -0,0 +1,10 @@
+Processes,Seconds
+2,0.777249
+3,0.643743
+4,0.556935
+5,0.668211
+6,0.781215
+7,0.957652
+8,0.883065
+9,1.25623
+10,1.1307
diff --git a/CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/benchmark.csv b/CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/benchmark.csv
@@ -0,0 +1,10 @@
+Processes,Simulation Time
+2,3.50693
+3,2.88412
+4,2.72392
+5,3.59394
+6,3.82275
+7,3.82185
+8,4.95354
+9,5.03342
+10,5.54502
diff --git a/CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/benchmark.gp b/CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/benchmark.gp
@@ -0,0 +1,11 @@
+set datafile separator ","
+set term png
+
+set output "report/images/benchmark.png"
+set title "Simulation Time vs Number of Processes"
+set xlabel "Number of Processes" offset 0,-1.1
+set ylabel "Simulation Time (s)"
+set key outside autotitle columnhead
+set xtics axis nomirror in
+set ytics axis nomirror in
+plot "benchmark.csv" using 1:2 with linespoints lw 2