-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a7186a6
commit c371245
Showing
123 changed files
with
1,312 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
#include "random.hpp" | ||
#include <algorithm> | ||
#include <iomanip> | ||
#include <iostream> | ||
#include <mpi.h> | ||
#include <unistd.h> | ||
#include <vector> | ||
|
||
#define slep 100000 | ||
|
||
void print1per(int data, std::string title = "") | ||
{ | ||
int rank; | ||
int word_size; | ||
|
||
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | ||
MPI_Comm_size(MPI_COMM_WORLD, &word_size); | ||
|
||
if (0 == rank) | ||
{ | ||
int* dArray = new int[word_size]; | ||
MPI_Gather(&data, 1, MPI_INT, dArray, 1, MPI_INT, 0, MPI_COMM_WORLD); | ||
|
||
std::cout << title << '\n'; | ||
for (int i = 0; i < word_size; ++i) | ||
{ | ||
std::cout << std::setw(5) << i << std::setw(5) << dArray[i] << "\n"; | ||
} | ||
std::cout << std::endl; | ||
} | ||
else | ||
{ | ||
MPI_Gather(&data, 1, MPI_INT, nullptr, 1, MPI_INT, 0, MPI_COMM_WORLD); | ||
} | ||
} | ||
|
||
int cube(int c, int sendData, int rank) | ||
{ | ||
int recvData; | ||
auto dest = rank ^ (1 << c); | ||
|
||
MPI_Send(&sendData, 1, MPI_INT, dest, 0, MPI_COMM_WORLD); | ||
MPI_Recv(&recvData, 1, MPI_INT, dest, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | ||
|
||
return recvData; | ||
} | ||
|
||
int ring(int dir, int sendData, int rank, int world_size) | ||
{ | ||
int recvData; | ||
auto dest = (rank + 1 * dir) % world_size; | ||
auto src = (rank - 1 * dir) % world_size; | ||
|
||
MPI_Send(&sendData, 1, MPI_INT, dest, 0, MPI_COMM_WORLD); | ||
MPI_Recv(&recvData, 1, MPI_INT, src, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | ||
|
||
return recvData; | ||
} | ||
|
||
void cubeSum(int num, int rank, int world_size) | ||
{ | ||
int log2n = log2(world_size); | ||
for (auto i = 0; i < log2n; ++i) | ||
{ | ||
num += cube(i, num, rank); | ||
usleep(slep); | ||
} | ||
print1per(num, "cube sum"); | ||
} | ||
|
||
void ringSum(int num, int rank, int world_size) | ||
{ | ||
int next, prev = num; | ||
|
||
for (auto i = 0; i < world_size - 1; ++i) | ||
{ | ||
next = ring(1, prev, rank, world_size); | ||
num += next; | ||
prev = next; | ||
usleep(slep); | ||
} | ||
print1per(num, "ring sum"); | ||
} | ||
|
||
void masterSlaveSum(int num, int rank, int world_size) | ||
{ | ||
if (0 == rank) | ||
{ | ||
int recvData; | ||
for (auto i = 1; i < world_size; ++i) | ||
{ | ||
MPI_Recv(&recvData, | ||
1, | ||
MPI_INT, | ||
MPI_ANY_SOURCE, | ||
0, | ||
MPI_COMM_WORLD, | ||
MPI_STATUS_IGNORE); | ||
num += recvData; | ||
usleep(slep); | ||
} | ||
std::cout << "master slave sum\n 0 " << num << "\n\n"; | ||
} | ||
else | ||
{ | ||
MPI_Send(&num, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); | ||
} | ||
} | ||
|
||
void mpiAllReduce(int num) | ||
{ | ||
MPI_Allreduce(&num, &num, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); | ||
usleep(slep); | ||
print1per(num, "all reduce"); | ||
} | ||
|
||
int main(int argc, char** argv) | ||
{ | ||
MPI_Init(&argc, &argv); | ||
|
||
int rank, world_size; | ||
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | ||
MPI_Comm_size(MPI_COMM_WORLD, &world_size); | ||
|
||
if (0 != (world_size & (world_size - 1))) | ||
{ | ||
if (rank == 0) | ||
{ | ||
std::cerr << "There must be a power of 2 number of threads\n"; | ||
} | ||
|
||
MPI_Finalize(); | ||
exit(EXIT_SUCCESS); | ||
} | ||
|
||
int num; | ||
if (0 == rank) | ||
{ | ||
std::vector<int> data(world_size); | ||
random_double_fill(begin(data), end(data), 0, 10); | ||
MPI_Scatter(data.data(), 1, MPI_INT, &num, 1, MPI_INT, 0, MPI_COMM_WORLD); | ||
print1per(num, "original data"); | ||
} | ||
else | ||
{ | ||
MPI_Scatter(nullptr, 1, MPI_INT, &num, 1, MPI_INT, 0, MPI_COMM_WORLD); | ||
print1per(num); | ||
} | ||
|
||
auto t1 = MPI_Wtime(); | ||
cubeSum(num, rank, world_size); | ||
auto t2 = MPI_Wtime(); | ||
ringSum(num, rank, world_size); | ||
auto t3 = MPI_Wtime(); | ||
masterSlaveSum(num, rank, world_size); | ||
auto t4 = MPI_Wtime(); | ||
mpiAllReduce(num); | ||
auto t5 = MPI_Wtime(); | ||
|
||
if (0 == rank) | ||
std::cout << "cube: " << t2 - t1 << "\nring: " << t3 - t2 | ||
<< "\nmaster slave: " << t4 - t3 << "\nall reduce: " << t5 - t4 | ||
<< "\n"; | ||
|
||
MPI_Finalize(); | ||
|
||
return (EXIT_SUCCESS); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
OBJS = main.cpp | ||
EXECS=release | ||
MPICC?=mpic++ | ||
FLAGS=-O3 | ||
|
||
all: ${EXECS} | ||
|
||
release: ${OBJS} | ||
${MPICC} ${FLAGS} ${OBJS} -o release.out | ||
|
||
clean: | ||
rm -f ${EXECS} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#ifndef RANDOM_HPP | ||
#define RANDOM_HPP | ||
|
||
#include <algorithm> | ||
#include <functional> | ||
#include <random> | ||
|
||
/** | ||
* Generate a random number from [low, high] | ||
* | ||
* @param low The lower bound | ||
* @param high The upper bound | ||
* @return A random number on the range [low, high] | ||
*/ | ||
int random_int(int low, int high) | ||
{ | ||
static std::random_device rd; | ||
static std::mt19937 mt(rd()); | ||
std::uniform_int_distribution<> dist(low, high); | ||
return dist(mt); | ||
} | ||
|
||
/** | ||
* Generate a random number from [low, high) | ||
* | ||
* @param low The lower bound | ||
* @param high The upper bound | ||
* @return A random number on the range [low, high) | ||
*/ | ||
double random_double(double low, double high) | ||
{ | ||
static std::random_device rd; | ||
static std::mt19937 mt(rd()); | ||
std::uniform_real_distribution<> dist(low, high); | ||
return dist(mt); | ||
} | ||
|
||
/** | ||
* Fill a container from [first, last) with random numbers from [low, high] | ||
* | ||
* @param first Iterator to beginning of range to fill | ||
* @param last Iterator to end of range to fill | ||
* @param low The lower bound | ||
* @param high The upper bound | ||
*/ | ||
template <typename it> | ||
void random_int_fill(it first, it last, const int low, const int high) | ||
{ | ||
static std::random_device rd; | ||
static std::mt19937 mt(rd()); | ||
std::uniform_int_distribution<> dist(low, high); | ||
std::generate(first, last, std::bind(dist, mt)); | ||
} | ||
|
||
/** | ||
* Fill a container from [first, last) with random numbers from [low, high) | ||
* | ||
* @param first Iterator to beginning of range to fill | ||
* @param last Iterator to end of range to fill | ||
* @param low The lower bound | ||
* @param high The upper bound | ||
*/ | ||
template <typename it> | ||
void random_double_fill(it first, it last, const double low, const double high) | ||
{ | ||
static std::random_device rd; | ||
static std::mt19937 mt(rd()); | ||
std::uniform_real_distribution<double> dist(low, high); | ||
std::generate(first, last, std::bind(dist, mt)); | ||
} | ||
|
||
#endif |
Binary file not shown.
96 changes: 96 additions & 0 deletions
96
CS5500_ParallelProgramming/7-GlobalSum/report/philip_nelson_hw7.tex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
\documentclass{article} | ||
|
||
\usepackage[utf8]{inputenc} | ||
\usepackage{geometry} | ||
\usepackage{listings} | ||
\usepackage{graphicx} | ||
\usepackage{geometry} | ||
\usepackage{courier} | ||
|
||
\graphicspath{{images/}} | ||
|
||
\title{HW 7 \\ Global Sum} | ||
\author{Philip Nelson} | ||
\date{2018 October 12} | ||
|
||
\lstset{basicstyle=\footnotesize\ttfamily\normalsize, | ||
breaklines=true, | ||
stepnumber=1, | ||
} | ||
|
||
\begin{document} | ||
|
||
\maketitle | ||
|
||
\section*{Introduction} | ||
|
||
The purpose of this assignment is to write an MPI program that performs a global sum. I have implemented several different variants. The first takes advantage of the cube network communication. Second uses ring communication. Third is a naive master slave method. Fourth uses the building MPI\_Allgather function. I made each iteration sleep for one tenth of a second in order to simulate some kind of work being executed. | ||
|
||
\section*{Code} | ||
|
||
\lstinputlisting[showstringspaces=false, language=c++, numbers=left]{../main.cpp} | ||
\newpage | ||
|
||
\section*{Output} | ||
|
||
\begin{lstlisting}[showstringspaces=false] | ||
|
||
# mpic++ -O3 main.cpp -o release.out | ||
|
||
# mpiexec -n 8 --oversubscribe release.out | ||
|
||
original data | ||
0 5 | ||
1 3 | ||
2 0 | ||
3 8 | ||
4 4 | ||
5 8 | ||
6 3 | ||
7 9 | ||
|
||
cube sum | ||
0 40 | ||
1 40 | ||
2 40 | ||
3 40 | ||
4 40 | ||
5 40 | ||
6 40 | ||
7 40 | ||
|
||
ring sum | ||
0 40 | ||
1 40 | ||
2 40 | ||
3 40 | ||
4 40 | ||
5 40 | ||
6 40 | ||
7 40 | ||
|
||
master slave sum | ||
0 40 | ||
|
||
all reduce | ||
0 40 | ||
1 40 | ||
2 40 | ||
3 40 | ||
4 40 | ||
5 40 | ||
6 40 | ||
7 40 | ||
|
||
cube: 0.305294 | ||
ring: 0.708875 | ||
master slave: 0.701991 | ||
all reduce: 0.101125 | ||
|
||
\end{lstlisting} | ||
|
||
\section*{Findings} | ||
|
||
The cube sum was the best performing of my own sorting functions which is unsurprising since it only needs to compute $\log_2n$ sums where $n$ is the world size. Therefore it is about $43\%$ faster. The ring sum was as performant as the master slave sum however only the master process is left with the sum. If you want all processes to have the global sum, the ring sum would be a better option. Getting a good time estimate for the reduce / all reduce isn't possible using this setup but I image it is the best option for performing global sums. | ||
|
||
\end{document} |
11 changes: 11 additions & 0 deletions
11
CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/Makefile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
OBJS = main.cpp | ||
EXECS=release | ||
MPICC?=mpic++ | ||
|
||
all: ${EXECS} | ||
|
||
release: ${OBJS} | ||
${MPICC} -O3 -lpng ${OBJS} -o release | ||
|
||
clean: | ||
rm -f ${EXECS} |
10 changes: 10 additions & 0 deletions
10
CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/bench100.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
Processes,Seconds | ||
2,0.777249 | ||
3,0.643743 | ||
4,0.556935 | ||
5,0.668211 | ||
6,0.781215 | ||
7,0.957652 | ||
8,0.883065 | ||
9,1.25623 | ||
10,1.1307 |
10 changes: 10 additions & 0 deletions
10
CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/benchmark.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
Processes,Simulation Time | ||
2,3.50693 | ||
3,2.88412 | ||
4,2.72392 | ||
5,3.59394 | ||
6,3.82275 | ||
7,3.82185 | ||
8,4.95354 | ||
9,5.03342 | ||
10,5.54502 |
11 changes: 11 additions & 0 deletions
11
CS5500_ParallelProgramming/8-ConwaysGameOfLifeParallel/benchmark.gp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
set datafile separator "," | ||
set term png | ||
|
||
set output "report/images/benchmark.png" | ||
set title "Simulation Time vs Number of Processes" | ||
set xlabel "Number of Processes" offset 0,-1.1 | ||
set ylabel "Simulation Time (s)" | ||
set key outside autotitle columnhead | ||
set xtics axis nomirror in | ||
set ytics axis nomirror in | ||
plot "benchmark.csv" using 1:2 with linespoints lw 2 |
Oops, something went wrong.