diff --git a/triangle_counting_host/cpp/Makefile b/triangle_counting_host/cpp/Makefile new file mode 100644 index 0000000..aff882c --- /dev/null +++ b/triangle_counting_host/cpp/Makefile @@ -0,0 +1,9 @@ +all: + g++ -O3 tc.cpp -std=c++11 -o tc -L. -lsds_lib + g++ -O3 tc_1pe.cpp -std=c++11 -o tc_1pe -L. -lsds_lib + +cpu: + g++ -O3 triangle_counting.cc tc_1pe.cpp -std=c++11 -o tc_cpu -L. -lsds_lib + +clean: + rm -rf tc tc_1pe tc_cpu diff --git a/triangle_counting_host/cpp/libsds_lib.so b/triangle_counting_host/cpp/libsds_lib.so new file mode 100755 index 0000000..7fb4971 Binary files /dev/null and b/triangle_counting_host/cpp/libsds_lib.so differ diff --git a/triangle_counting_host/cpp/libxlnk_cma.h b/triangle_counting_host/cpp/libxlnk_cma.h new file mode 100644 index 0000000..1b8998e --- /dev/null +++ b/triangle_counting_host/cpp/libxlnk_cma.h @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +// kernel buffer pool +#define XLNK_BUFPOOL_SIZE 100 + +#define XLNK_DRIVER_PATH "/dev/xlnk" + +// counter of buffer currently instantiated +static uint32_t xlnkBufCnt = 0; +// virtual address of buffer +static void *xlnkBufPool[2 * XLNK_BUFPOOL_SIZE]; +// length in bytes of buffer +static size_t xlnkBufLens[2 * XLNK_BUFPOOL_SIZE]; +// physical address of buffer +static uint32_t xlnkBufPhyPool[2 * XLNK_BUFPOOL_SIZE]; + +/* + * Get the virtual address referencing the physical address resulting from + * mmaping /dev/mem. + * Required to use bare-metal drivers on linux. Return -1 in case of error. + */ +unsigned long cma_mmap(unsigned long phyAddr, uint32_t len); +/* + * Unmap a previously mapped memory space. + */ +uint32_t cma_munmap(void *buf, uint32_t len); +/* + * Allocate a physically contiguos chunk of CMA memory and map it into + * virtual memory space. Return this Virtual pointer. Returns -1 on failure. + */ +void *cma_alloc(uint32_t len, uint32_t cacheable); +/* + * Return a physical memory address corresponding to a given Virtual address + * pointer. Returns NULL on failure. + */ +unsigned long cma_get_phy_addr(void *buf); +/* + * Free a previously allocated CMA memory chunk. + */ +void cma_free(void *buf); +/* + * Returns the number of available CMA memiry pages which can be allocated. + */ +uint32_t cma_pages_available(); +/* + * Extra functions in case user needs to flush or invalidate Cache. + */ +void cma_flush_cache(void *buf, unsigned int phys_addr, int size); +void cma_invalidate_cache(void *buf, unsigned int phys_addr, int size); diff --git a/triangle_counting_host/cpp/tc.cpp b/triangle_counting_host/cpp/tc.cpp new file mode 100644 index 0000000..5ce9026 --- /dev/null +++ b/triangle_counting_host/cpp/tc.cpp @@ -0,0 +1,342 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // for high_resolution_clock + +extern "C" +{ +#include "libxlnk_cma.h" +} + +using namespace std; + +class accelerator +{ +public: + accelerator(int base_addr=0x43C00000, int range=0x00010000) : base_addr(base_addr), range(range) + { + // virt_base = base_addr & ~(getpagesize() - 1); + virt_base = base_addr & ~(sysconf(_SC_PAGE_SIZE) - 1); + virt_offset = base_addr - virt_base; + mmap_file = open("/dev/mem", O_RDWR | O_SYNC); + if (mmap_file == -1) + cout << "Unable to open /dev/mem" << endl; + mmap_addr = (int*)mmap(NULL, range + virt_offset, PROT_READ | PROT_WRITE, + MAP_SHARED, mmap_file, virt_base); + if (mmap_addr == MAP_FAILED) + cout << "mmap fails. " << endl; + + mmap_space = mmap_addr + virt_offset; + } + ~accelerator() { close(mmap_file); } + + int get(int offset) { return mmap_space[offset >> 2]; } + + void set(int offset, int value) { mmap_space[offset >> 2] = value; } + + void start() { mmap_space[0x00] |= 1; } + + bool done() { return (mmap_space[0x00] & (1 << 1)); } + + bool idle() { return (mmap_space[0x00] & (1 << 2)); } + + bool ready() { return (mmap_space[0x00] & (1 << 3)); } + + int get_return() { return mmap_space[0x10 >> 2]; } + + int program(string bitfile_name) + { + char buf[4194304]; + const string BS_XDEVCFG = "/dev/xdevcfg"; + const string BS_IS_PARTIAL = "/sys/devices/soc0/amba/f8007000.devcfg/is_partial_bitstream"; + + int partial_bs_dev = open(BS_IS_PARTIAL.c_str(), O_WRONLY | O_NONBLOCK); + if (partial_bs_dev < 0) + { + printf("ERROR opening %s\n", BS_IS_PARTIAL.c_str()); + return -1; + } + int write_size = write(partial_bs_dev, "0", 1); + + int fpga_dev = open(BS_XDEVCFG.c_str(), O_WRONLY | O_NONBLOCK); + // int fpga_dev = open(BS_XDEVCFG.c_str(), O_WRONLY); + if (fpga_dev < 0) + { + printf("ERROR opening %s\n", BS_XDEVCFG.c_str()); + return -1; + } + + int bit_file = open(bitfile_name.c_str(), O_RDONLY); + if (bit_file < 0) + { + printf("ERROR opening %s\n", bitfile_name.c_str()); + return -1; + } + + int bit_file_size = read(bit_file, buf, 4194304); + write_size = write(fpga_dev, buf, bit_file_size); + + close(partial_bs_dev); + close(fpga_dev); + close(bit_file); + return 0; + } + +private: + int base_addr; + int range; + int virt_base; + int virt_offset; + int mmap_file; + int *mmap_addr; + int *mmap_space; +}; + +void read_graph(const char *filename, + std::vector *edge_list, + unsigned int num_pe, + std::vector &neighbor_list, + std::vector &offset_list) +{ + std::ifstream ifs(filename); + + int degree_count = 0; + int prev_node = 0; + int pe_idx = 0; + offset_list.push_back(0); + + if (ifs.is_open() && ifs.good()) + { + std::string str; + while (std::getline(ifs, str)) + { + if (!str.empty() && str[0] != '#') + { + std::istringstream ss(str); + int u, v; + ss >> u >> v; + if (prev_node != v) + { + offset_list.push_back(degree_count); + } + + prev_node = v; + if (u < v) + { + edge_list[pe_idx % num_pe].push_back(v); + edge_list[pe_idx % num_pe].push_back(u); + pe_idx++; + } + else + { + neighbor_list.push_back(u); + degree_count++; + } + } + } + } + ifs.close(); + offset_list.push_back(degree_count); +// num_edge = edge_list.size() / 2; +} + +int main( int argc, char** argv ) +{ + + auto t_start = std::chrono::high_resolution_clock::now(); + +// int num_edge = 0; + std::vector edge_list[7], neighbor_list, offset_list; + read_graph("../graph/soc-Epinions1_adj.tsv", edge_list, 7, neighbor_list, offset_list); + std::cout << "neighbor_list size= " << neighbor_list.size() << std::endl; + std::cout << "offset_list size= " << offset_list.size() << std::endl; + + auto t_file_done = std::chrono::high_resolution_clock::now(); + + int *edges0 = (int *)cma_alloc( edge_list[0].size()*sizeof(int), false); + int *edges1 = (int *)cma_alloc( edge_list[1].size()*sizeof(int), false); + int *edges2 = (int *)cma_alloc( edge_list[2].size()*sizeof(int), false); + int *edges3 = (int *)cma_alloc( edge_list[3].size()*sizeof(int), false); + int *edges4 = (int *)cma_alloc( edge_list[4].size()*sizeof(int), false); + int *edges5 = (int *)cma_alloc( edge_list[5].size()*sizeof(int), false); + int *edges6 = (int *)cma_alloc( edge_list[6].size()*sizeof(int), false); + int *neighbors = (int *)cma_alloc(neighbor_list.size()*sizeof(int), false); + int *offsets = (int *)cma_alloc( offset_list.size()*sizeof(int), false); + int *progress = (int *)cma_alloc( 5*sizeof(int), false); + + auto t_malloc_done = std::chrono::high_resolution_clock::now(); + + std::memcpy(edges0 , edge_list[0].data(), edge_list[0].size()*sizeof(int)); + std::memcpy(edges1 , edge_list[1].data(), edge_list[1].size()*sizeof(int)); + std::memcpy(edges2 , edge_list[2].data(), edge_list[2].size()*sizeof(int)); + std::memcpy(edges3 , edge_list[3].data(), edge_list[3].size()*sizeof(int)); + std::memcpy(edges4 , edge_list[4].data(), edge_list[4].size()*sizeof(int)); + std::memcpy(edges5 , edge_list[5].data(), edge_list[5].size()*sizeof(int)); + std::memcpy(edges6 , edge_list[6].data(), edge_list[6].size()*sizeof(int)); + std::memcpy(neighbors, neighbor_list.data(), neighbor_list.size()*sizeof(int)); + std::memcpy(offsets , offset_list.data(), offset_list.size()*sizeof(int)); + + auto t_memcpy_done = std::chrono::high_resolution_clock::now(); + + accelerator acc0(0x43C00000, 0x00010000); + accelerator acc1(0x43C10000, 0x00010000); + accelerator acc2(0x43C20000, 0x00010000); + accelerator acc3(0x43C30000, 0x00010000); + accelerator acc4(0x43C40000, 0x00010000); + accelerator acc5(0x43C50000, 0x00010000); + accelerator acc6(0x43C60000, 0x00010000); + acc0.program("/home/xilinx/code/tc/tc_opt.bit"); + + auto t_program_done = std::chrono::high_resolution_clock::now(); + + acc0.set(0x18, cma_get_phy_addr(neighbors)); + acc0.set(0x20, cma_get_phy_addr(offsets)); + acc0.set(0x28, cma_get_phy_addr(edges0)); + acc0.set(0x30, edge_list[0].size()); + acc0.set(0x38, cma_get_phy_addr(progress)); + + acc1.set(0x18, cma_get_phy_addr(neighbors)); + acc1.set(0x20, cma_get_phy_addr(offsets)); + acc1.set(0x28, cma_get_phy_addr(edges1)); + acc1.set(0x30, edge_list[1].size()); + acc1.set(0x38, cma_get_phy_addr(progress)); + + acc2.set(0x18, cma_get_phy_addr(neighbors)); + acc2.set(0x20, cma_get_phy_addr(offsets)); + acc2.set(0x28, cma_get_phy_addr(edges2)); + acc2.set(0x30, edge_list[2].size()); + acc2.set(0x38, cma_get_phy_addr(progress)); + + acc3.set(0x18, cma_get_phy_addr(neighbors)); + acc3.set(0x20, cma_get_phy_addr(offsets)); + acc3.set(0x28, cma_get_phy_addr(edges3)); + acc3.set(0x30, edge_list[3].size()); + acc3.set(0x38, cma_get_phy_addr(progress)); + + acc4.set(0x18, cma_get_phy_addr(neighbors)); + acc4.set(0x20, cma_get_phy_addr(offsets)); + acc4.set(0x28, cma_get_phy_addr(edges4)); + acc4.set(0x30, edge_list[4].size()); + acc4.set(0x38, cma_get_phy_addr(progress)); + + acc5.set(0x18, cma_get_phy_addr(neighbors)); + acc5.set(0x20, cma_get_phy_addr(offsets)); + acc5.set(0x28, cma_get_phy_addr(edges5)); + acc5.set(0x30, edge_list[5].size()); + acc5.set(0x38, cma_get_phy_addr(progress)); + + acc6.set(0x18, cma_get_phy_addr(neighbors)); + acc6.set(0x20, cma_get_phy_addr(offsets)); + acc6.set(0x28, cma_get_phy_addr(edges6)); + acc6.set(0x30, edge_list[6].size()); + acc6.set(0x38, cma_get_phy_addr(progress)); + + cout << "start execute.." << endl; + + auto t_acc_start = std::chrono::high_resolution_clock::now(); + acc0.start(); + acc1.start(); + acc2.start(); + acc3.start(); + acc4.start(); + acc5.start(); + acc6.start(); + + int tik = 1; + while(!acc0.done()) + { + tik++; + if ((tik % 10000) == 0) std::cout << "."; + } + + std::cout << "acc0 done! " << std::endl; + + while(!acc1.done()) + { + tik++; + if ((tik % 10000) == 0) std::cout << "."; + } + std::cout << "acc1 done! " << std::endl; + + while(!acc2.done()) + { + tik++; + if ((tik % 10000) == 0) std::cout << "."; + } + std::cout << "acc2 done! " << std::endl; + + while(!acc3.done()) + { + tik++; + if ((tik % 10000) == 0) std::cout << "."; + } + std::cout << "acc3 done! " << std::endl; + + while(!acc4.done()) + { + tik++; + if ((tik % 10000) == 0) std::cout << "."; + } + std::cout << "acc4 done! " << std::endl; + + while(!acc5.done()) + { + tik++; + if ((tik % 10000) == 0) std::cout << "."; + } + std::cout << "acc5 done! " << std::endl; + + while(!acc6.done()) + { + tik++; + if ((tik % 10000) == 0) std::cout << "."; + } + std::cout << "acc6 done! " << std::endl; + + auto t_acc_finish = std::chrono::high_resolution_clock::now(); + + cout << "\ndone execute.." << endl; + + std::cout << "result = " << acc0.get_return() + acc1.get_return() + acc2.get_return() + acc3.get_return() + + acc4.get_return() + acc5.get_return() + acc6.get_return() << std::endl; + + std::cout << "acc0 result = " << acc0.get_return() << std::endl; + std::cout << "acc1 result = " << acc1.get_return() << std::endl; + std::cout << "acc2 result = " << acc2.get_return() << std::endl; + std::cout << "acc3 result = " << acc3.get_return() << std::endl; + std::cout << "acc4 result = " << acc4.get_return() << std::endl; + std::cout << "acc5 result = " << acc5.get_return() << std::endl; + std::cout << "acc6 result = " << acc6.get_return() << std::endl; + + std::chrono::duration total_io_time = t_file_done - t_start; + std::chrono::duration total_malloc_time = t_malloc_done - t_file_done; + std::chrono::duration total_memcpy_time = t_memcpy_done - t_malloc_done; + std::chrono::duration total_program_time = t_program_done - t_memcpy_done; + std::chrono::duration total_exec_time = t_acc_finish - t_acc_start; + std::cout << "File IO time: " << total_io_time.count() << "s" << std::endl; + std::cout << "CMA alloc time: " << total_malloc_time.count() << "s" << std::endl; + std::cout << "Memcpy time: " << total_memcpy_time.count() << "s" << std::endl; + std::cout << "FPGA program time: " << total_program_time.count() << "s" << std::endl; + std::cout << "Kernel exec time: " << total_exec_time.count() << "s" << std::endl; + + cma_free(edges0); + cma_free(edges1); + cma_free(edges2); + cma_free(edges3); + cma_free(edges4); + cma_free(edges5); + cma_free(edges6); + cma_free(neighbors); + cma_free(offsets); + cma_free(progress); + + return 0; +} diff --git a/triangle_counting_host/cpp/tc_1pe.cpp b/triangle_counting_host/cpp/tc_1pe.cpp new file mode 100644 index 0000000..fbbdedd --- /dev/null +++ b/triangle_counting_host/cpp/tc_1pe.cpp @@ -0,0 +1,214 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // for high_resolution_clock + +extern "C" +{ +#include "libxlnk_cma.h" +} + +using namespace std; + +class accelerator +{ +public: + accelerator(int base_addr=0x43C00000, int range=0x00010000) : base_addr(base_addr), range(range) + { + // virt_base = base_addr & ~(getpagesize() - 1); + virt_base = base_addr & ~(sysconf(_SC_PAGE_SIZE) - 1); + virt_offset = base_addr - virt_base; + mmap_file = open("/dev/mem", O_RDWR | O_SYNC); + if (mmap_file == -1) + cout << "Unable to open /dev/mem" << endl; + mmap_addr = (int*)mmap(NULL, range + virt_offset, PROT_READ | PROT_WRITE, + MAP_SHARED, mmap_file, virt_base); + if (mmap_addr == MAP_FAILED) + cout << "mmap fails. " << endl; + + mmap_space = mmap_addr + virt_offset; + } + ~accelerator() { close(mmap_file); } + + int get(int offset) { return mmap_space[offset >> 2]; } + + void set(int offset, int value) { mmap_space[offset >> 2] = value; } + + void start() { mmap_space[0x00] |= 1; } + + bool done() { return (mmap_space[0x00] & (1 << 1)); } + + bool idle() { return (mmap_space[0x00] & (1 << 2)); } + + bool ready() { return (mmap_space[0x00] & (1 << 3)); } + + int get_return() { return mmap_space[0x10 >> 2]; } + + int program(string bitfile_name) + { + char buf[4194304]; + const string BS_XDEVCFG = "/dev/xdevcfg"; + const string BS_IS_PARTIAL = "/sys/devices/soc0/amba/f8007000.devcfg/is_partial_bitstream"; + + int partial_bs_dev = open(BS_IS_PARTIAL.c_str(), O_WRONLY | O_NONBLOCK); + if (partial_bs_dev < 0) + { + printf("ERROR opening %s\n", BS_IS_PARTIAL.c_str()); + return -1; + } + int write_size = write(partial_bs_dev, "0", 1); + + int fpga_dev = open(BS_XDEVCFG.c_str(), O_WRONLY | O_NONBLOCK); + // int fpga_dev = open(BS_XDEVCFG.c_str(), O_WRONLY); + if (fpga_dev < 0) + { + printf("ERROR opening %s\n", BS_XDEVCFG.c_str()); + return -1; + } + + int bit_file = open(bitfile_name.c_str(), O_RDONLY); + if (bit_file < 0) + { + printf("ERROR opening %s\n", bitfile_name.c_str()); + return -1; + } + + int bit_file_size = read(bit_file, buf, 4194304); + write_size = write(fpga_dev, buf, bit_file_size); + + close(partial_bs_dev); + close(fpga_dev); + close(bit_file); + return 0; + } + +private: + int base_addr; + int range; + int virt_base; + int virt_offset; + int mmap_file; + int *mmap_addr; + int *mmap_space; +}; + +void read_graph(const char *filename, + std::vector &edge_list, + std::vector &neighbor_list, + std::vector &offset_list, + int &num_edge) +{ + std::ifstream ifs(filename); + + int degree_count = 0; + int prev_node = 0; + offset_list.push_back(0); + + if (ifs.is_open() && ifs.good()) + { + std::string str; + while (std::getline(ifs, str)) + { + if (!str.empty() && str[0] != '#') + { + std::istringstream ss(str); + int u, v; + ss >> u >> v; + if (prev_node != v) + { + offset_list.push_back(degree_count); + } + + prev_node = v; + if (u < v) + { + edge_list.push_back(v); + edge_list.push_back(u); + } + else + { + neighbor_list.push_back(u); + degree_count++; + } + } + } + } + ifs.close(); + offset_list.push_back(degree_count); + num_edge = edge_list.size() / 2; +} + +int main( int argc, char** argv ) +{ + + auto t_start = std::chrono::high_resolution_clock::now(); + + int num_edge = 0; + std::vector edge_list, neighbor_list, offset_list; + read_graph("../graph/soc-Epinions1_adj.tsv", edge_list, neighbor_list, offset_list, num_edge); + std::cout << "neighbor_list size= " << neighbor_list.size() << std::endl; + std::cout << "offset_list size= " << offset_list.size() << std::endl; + std::cout << "edge_list size= " << edge_list.size() << std::endl; + std::cout << "initialized num_edge = " << num_edge << std::endl; + + int *edges = (int *)cma_alloc( edge_list.size()*sizeof(int), false); + int *neighbors = (int *)cma_alloc(neighbor_list.size()*sizeof(int), false); + int *offsets = (int *)cma_alloc( offset_list.size()*sizeof(int), false); + int *progress = (int *)cma_alloc( 5*sizeof(int), false); + + std::memcpy(edges , edge_list.data(), edge_list.size()*sizeof(int)); + std::memcpy(neighbors, neighbor_list.data(), neighbor_list.size()*sizeof(int)); + std::memcpy(offsets , offset_list.data(), offset_list.size()*sizeof(int)); + + accelerator acc; + acc.program("/home/xilinx/code/tc/triangle_counting.bit"); + + auto t_program_done = std::chrono::high_resolution_clock::now(); + + acc.set(0x18, cma_get_phy_addr(neighbors)); + acc.set(0x20, cma_get_phy_addr(offsets)); + acc.set(0x28, cma_get_phy_addr(edges)); + acc.set(0x30, edge_list.size()); + acc.set(0x38, cma_get_phy_addr(progress)); + + cout << "start execute.." << endl; + + auto t_acc_start = std::chrono::high_resolution_clock::now(); + + acc.start(); + + int tik = 1; + while(!acc.done()) + { + tik++; + if ((tik % 10000) == 0) std::cout << "."; + //std::cout << tik << std::endl; +/* std::cout << progress[0] << " " << progress[1] << + " " << progress[2] << " " << progress[3] << " " << progress[4] << + " " << acc.get_return() << std::endl;*/ + } + + auto t_acc_finish = std::chrono::high_resolution_clock::now(); + + cout << "\ndone execute.." << endl; + + std::cout << "result = " << acc.get_return() << std::endl; + + std::chrono::duration total_exec_time = t_acc_finish - t_acc_start; + std::cout << "Kernel exec time: " << total_exec_time.count() << "s" << std::endl; + + cma_free(edges); + cma_free(neighbors); + cma_free(offsets); + cma_free(progress); + + return 0; +} diff --git a/triangle_counting_host/python/graph_parser.py b/triangle_counting_host/python/graph_parser.py new file mode 100644 index 0000000..5a42a09 --- /dev/null +++ b/triangle_counting_host/python/graph_parser.py @@ -0,0 +1,37 @@ + +neighbor_list = [] +offset_list = [0] +edge_list = [] + +graph_file = open("graph/test.tsv") +lines = graph_file.readlines() + +degree_count = 0 +prev_node = 0 + +for line in lines: + node_a, node_b, _ = map(int, line.split()) + if prev_node != node_b: + offset_list.append(degree_count) + + prev_node = node_b + if node_a < node_b: + edge_list.extend([node_b, node_a]) + else: + neighbor_list.append(node_a) + degree_count += 1 + +offset_list.append(degree_count) + +graph_file.close() + +print("neighbor_list size = ", len(neighbor_list)) +print("offset_list size = ", len(offset_list)) +print("edge_list size = ", len(edge_list)) + +f = open("test_parsed.tsv", "w") +f.write("%d %d %d\n" % (len(neighbor_list), len(offset_list), len(edge_list))) +f.write(" ".join(str(e) for e in neighbor_list) + "\n") +f.write(" ".join(str(e) for e in offset_list) + "\n") +f.write(" ".join(str(e) for e in edge_list) + "\n") +f.close() diff --git a/triangle_counting_host/python/intersect_host.py b/triangle_counting_host/python/intersect_host.py new file mode 100644 index 0000000..b9afa3a --- /dev/null +++ b/triangle_counting_host/python/intersect_host.py @@ -0,0 +1,52 @@ +# coding: utf-8 + +import sys +import numpy as np +import os +import time +from datetime import datetime +from pynq import Xlnk +from pynq import Overlay + +# load our design overlay +overlay = Overlay('intersect_hw.bit') +print("intersect_hw.bit loaded") + +myIP = overlay.intersect_0 + +xlnk = Xlnk() + +t1 = time.time() + +input_a = xlnk.cma_array(shape=(4096,), dtype=np.int32) +input_b = xlnk.cma_array(shape=(4096,), dtype=np.int32) + +for i in range(4096): + input_a[i] = i + input_b[i] = i + 1 + +myIP.write(0x18, input_a.physical_address) +myIP.write(0x20, input_b.physical_address) + +myIP.write(0x28, 2) +myIP.write(0x30, 2) + + +t2 = time.time() +t = t2 - t1 +print("Preparing input data time: ", str(t)) + +isready = 0; +myIP.write(0x00, 1) + +while( isready != 6 ): + isready = myIP.read(0x00) + +t3 = time.time() +t = t3 - t2 +#tbatch = tbatch + t +#print("Computation finished") +print("PL Time: ", str(t)) + +print("Return value: ", myIP.read(0x10)) + diff --git a/triangle_counting_host/python/tc_host.py b/triangle_counting_host/python/tc_host.py new file mode 100644 index 0000000..046f90d --- /dev/null +++ b/triangle_counting_host/python/tc_host.py @@ -0,0 +1,101 @@ +# coding: utf-8 + +import sys +import numpy as np +import os +import time +from datetime import datetime +from pynq import Xlnk +from pynq import Overlay + +# load our design overlay +overlay = Overlay('triangle_counting.bit') +print("triangle_counting.bit loaded") + +myIP = overlay.triangle_counting_0 + +t0 = time.time() + +neighbor_list = [] +offset_list = [0] +edge_list = [] + +graph_file = open("graph/soc-Epinions1_adj.tsv") +# graph_file = open("graph/test.tsv") +lines = graph_file.readlines() + +degree_count = 0 +prev_node = 0 + +for line in lines: + node_a, node_b, _ = map(int, line.split()) + if prev_node != node_b: + offset_list.append(degree_count) + + prev_node = node_b + if node_a < node_b: + edge_list.extend([node_b, node_a]) + else: + neighbor_list.append(node_a) + degree_count += 1 + +offset_list.append(degree_count) + +print("neighbor_list size= ", len(neighbor_list)) +print("offset_list size= ", len(offset_list)) +print("edge_list size= ", len(edge_list)) + +t1 = time.time() + +print("Finished reading graph file. ") +t = t1 - t0 +print("Reading input file time: ", str(t)) + +xlnk = Xlnk() + +neighbor = xlnk.cma_array(shape=(len(neighbor_list),), dtype=np.int32) +offset = xlnk.cma_array(shape=(len(offset_list),), dtype=np.int32) +edge = xlnk.cma_array(shape=(len(edge_list),), dtype=np.int32) +progress = xlnk.cma_array(shape=(5,), dtype=np.int32) + +neighbor[:] = neighbor_list +offset[:] = offset_list +edge[:] = edge_list + +# neighbor[:] = [2, 4, 5, 3, 4, 5, 4, 5, 5] +# offset[:] = [0, 0, 3, 6, 8, 9, 9] +# edge[:] = [5, 4, 5, 3, 5, 2, 5, 1, 4, 3, 4, 2, 4, 1, 3, 2, 2, 1] + +myIP.write(0x18, neighbor.physical_address) +myIP.write(0x20, offset.physical_address) +myIP.write(0x28, edge.physical_address) +myIP.write(0x30, len(edge_list)) +myIP.write(0x38, progress.physical_address) + +# for i in range(neighbor.size): +# print("neighbor[%d] = %d" % (i, neighbor[i])) + +# for i in range(offset.size): +# print("offset[%d] = %d" % (i, offset[i])) + +# for i in range(edge.size): +# print("edge[%d] = %d" % (i, edge[i])) + +t2 = time.time() +t = t2 - t1 +print("Preparing input data time: ", str(t)) + +isready = 0; +myIP.write(0x00, 1) + +while( isready != 6 ): +# print(progress[0], progress[1], progress[2], progress[3], progress[4]) + isready = myIP.read(0x00) + +t3 = time.time() +t = t3 - t2 +#tbatch = tbatch + t +#print("Computation finished") +print("PL Time: ", str(t)) + +print("Return value: ", myIP.read(0x10)) diff --git a/triangle_counting_host/python/tc_host_opt_4.py b/triangle_counting_host/python/tc_host_opt_4.py new file mode 100644 index 0000000..9e295d2 --- /dev/null +++ b/triangle_counting_host/python/tc_host_opt_4.py @@ -0,0 +1,162 @@ +# coding: utf-8 + +import sys +import numpy as np +import os +import time +import math +from datetime import datetime +from pynq import Xlnk +from pynq import Overlay + +# load our design overlay +overlay = Overlay('tc_opt_4.bit') +print("tc_opt_4.bit loaded") + +acc0 = overlay.triangle_counting_0 +acc1 = overlay.triangle_counting_1 +acc2 = overlay.triangle_counting_2 +acc3 = overlay.triangle_counting_3 + +t0 = time.time() + +neighbor_list = [] +offset_list = [0] +edge_list = [] + +graph_file = open("../../graph/soc-Epinions1_adj.tsv") +# graph_file = open("graph/test.tsv") +lines = graph_file.readlines() + +degree_count = 0 +prev_node = 0 + +for line in lines: + node_a, node_b, _ = map(int, line.split()) + if prev_node != node_b: + offset_list.append(degree_count) + + prev_node = node_b + if node_a < node_b: + edge_list.extend([node_b, node_a]) + else: + neighbor_list.append(node_a) + degree_count += 1 + +offset_list.append(degree_count) + +print("neighbor_list size= ", len(neighbor_list)) +print("offset_list size= ", len(offset_list)) +print("edge_list size= ", len(edge_list)) + +t1 = time.time() + +print("Finished reading graph file. ") +t = t1 - t0 +print("Reading input file time: ", str(t)) + +xlnk = Xlnk() + +num_edge = int(len(edge_list) / 2) +num_batch = 4 +num_edge_batch = int(math.floor(float(num_edge) / num_batch)) +num_edge_last_batch = num_edge - (num_batch-1)*num_edge_batch + +print(num_edge) +print(num_batch) +print(num_edge_batch) +print(num_edge_last_batch) + +neighbor = xlnk.cma_array(shape=(len(neighbor_list),), dtype=np.int32) +offset = xlnk.cma_array(shape=(len(offset_list),), dtype=np.int32) +edge1 = xlnk.cma_array(shape=(2*num_edge_batch,), dtype=np.int32) +edge2 = xlnk.cma_array(shape=(2*num_edge_batch,), dtype=np.int32) +edge3 = xlnk.cma_array(shape=(2*num_edge_batch,), dtype=np.int32) +edge4 = xlnk.cma_array(shape=(2*num_edge_last_batch,), dtype=np.int32) +progress = xlnk.cma_array(shape=(5,), dtype=np.int32) + +neighbor[:] = neighbor_list +offset[:] = offset_list +edge1[:] = edge_list[0:2*num_edge_batch] +edge2[:] = edge_list[2*num_edge_batch:4*num_edge_batch] +edge3[:] = edge_list[4*num_edge_batch:6*num_edge_batch] +edge4[:] = edge_list[6*num_edge_batch:] + +# neighbor[:] = [2, 4, 5, 3, 4, 5, 4, 5, 5] +# offset[:] = [0, 0, 3, 6, 8, 9, 9] +# edge[:] = [5, 4, 5, 3, 5, 2, 5, 1, 4, 3, 4, 2, 4, 1, 3, 2, 2, 1] + +acc0.write(0x00018, neighbor.physical_address) +acc0.write(0x00020, offset.physical_address) +acc0.write(0x00028, edge1.physical_address) +acc0.write(0x00030, 2*num_edge_batch) +acc0.write(0x00038, progress.physical_address) + +acc1.write(0x00018, neighbor.physical_address) +acc1.write(0x00020, offset.physical_address) +acc1.write(0x00028, edge2.physical_address) +acc1.write(0x00030, 2*num_edge_batch) +acc1.write(0x00038, progress.physical_address) + +acc2.write(0x00018, neighbor.physical_address) +acc2.write(0x00020, offset.physical_address) +acc2.write(0x00028, edge3.physical_address) +acc2.write(0x00030, 2*num_edge_batch) +acc2.write(0x00038, progress.physical_address) + +acc3.write(0x00018, neighbor.physical_address) +acc3.write(0x00020, offset.physical_address) +acc3.write(0x00028, edge4.physical_address) +acc3.write(0x00030, 2*num_edge_last_batch) +acc3.write(0x00038, progress.physical_address) + +# for i in range(neighbor.size): +# print("neighbor[%d] = %d" % (i, neighbor[i])) + +# for i in range(offset.size): +# print("offset[%d] = %d" % (i, offset[i])) + +# for i in range(edge.size): +# print("edge[%d] = %d" % (i, edge[i])) + +t2 = time.time() +t = t2 - t1 +print("Preparing input data time: ", str(t)) + +acc0.write(0x00000, 1) +acc1.write(0x00000, 1) +acc2.write(0x00000, 1) +acc3.write(0x00000, 1) + +isready = 0; +while( isready != 6 ): + isready = acc0.read(0x00000) + +isready = 0; +while( isready != 6 ): + isready = acc1.read(0x00000) + +isready = 0; +while( isready != 6 ): + isready = acc2.read(0x00000) + +isready = 0; +while( isready != 6 ): + isready = acc3.read(0x00000) + +t3 = time.time() +t = t3 - t2 +#tbatch = tbatch + t +#print("Computation finished") +print("PL Time: ", str(t)) + +result1 = acc0.read(0x00010) +result2 = acc1.read(0x00010) +result3 = acc2.read(0x00010) +result4 = acc3.read(0x00010) + +print("Return value 1: ", result1) +print("Return value 2: ", result2) +print("Return value 3: ", result3) +print("Return value 4: ", result4) +print("Number of triangles: ", result1+result2+result3+result4) diff --git a/triangle_counting_host/python/tc_host_opt_7.py b/triangle_counting_host/python/tc_host_opt_7.py new file mode 100644 index 0000000..22df986 --- /dev/null +++ b/triangle_counting_host/python/tc_host_opt_7.py @@ -0,0 +1,210 @@ +# coding: utf-8 + +import sys +import numpy as np +import os +import time +import math +from datetime import datetime +from pynq import Xlnk +from pynq import Overlay + +# load our design overlay +overlay = Overlay('tc_opt.bit') +print("tc_opt.bit loaded") + +acc0 = overlay.triangle_counting_0 +acc1 = overlay.triangle_counting_1 +acc2 = overlay.triangle_counting_2 +acc3 = overlay.triangle_counting_3 +acc4 = overlay.triangle_counting_4 +acc5 = overlay.triangle_counting_5 +acc6 = overlay.triangle_counting_6 + +t0 = time.time() + +neighbor_list = [] +offset_list = [0] +edge_list = [] + +graph_file = open("graph/soc-Epinions1_adj.tsv") +# graph_file = open("graph/test.tsv") +lines = graph_file.readlines() + +degree_count = 0 +prev_node = 0 + +for line in lines: + node_a, node_b, _ = map(int, line.split()) + if prev_node != node_b: + offset_list.append(degree_count) + + prev_node = node_b + if node_a < node_b: + edge_list.extend([node_b, node_a]) + else: + neighbor_list.append(node_a) + degree_count += 1 + +offset_list.append(degree_count) + +print("neighbor_list size= ", len(neighbor_list)) +print("offset_list size= ", len(offset_list)) +print("edge_list size= ", len(edge_list)) + +t1 = time.time() + +print("Finished reading graph file. ") +t = t1 - t0 +print("Reading input file time: ", str(t)) + +xlnk = Xlnk() + +num_edge = int(len(edge_list) / 2) +num_batch = 7 +num_edge_batch = int(math.floor(float(num_edge) / num_batch)) +num_edge_last_batch = num_edge - (num_batch-1)*num_edge_batch + +print(num_edge) +print(num_batch) +print(num_edge_batch) +print(num_edge_last_batch) + +neighbor = xlnk.cma_array(shape=(len(neighbor_list),), dtype=np.int32) +offset = xlnk.cma_array(shape=(len(offset_list),), dtype=np.int32) +edge1 = xlnk.cma_array(shape=(2*num_edge_batch,), dtype=np.int32) +edge2 = xlnk.cma_array(shape=(2*num_edge_batch,), dtype=np.int32) +edge3 = xlnk.cma_array(shape=(2*num_edge_batch,), dtype=np.int32) +edge4 = xlnk.cma_array(shape=(2*num_edge_batch,), dtype=np.int32) +edge5 = xlnk.cma_array(shape=(2*num_edge_batch,), dtype=np.int32) +edge6 = xlnk.cma_array(shape=(2*num_edge_batch,), dtype=np.int32) +edge7 = xlnk.cma_array(shape=(2*num_edge_last_batch,), dtype=np.int32) +progress = xlnk.cma_array(shape=(5,), dtype=np.int32) + +neighbor[:] = neighbor_list +offset[:] = offset_list +edge1[:] = edge_list[0:2*num_edge_batch] +edge2[:] = edge_list[2*num_edge_batch:4*num_edge_batch] +edge3[:] = edge_list[4*num_edge_batch:6*num_edge_batch] +edge4[:] = edge_list[6*num_edge_batch:8*num_edge_batch] +edge5[:] = edge_list[8*num_edge_batch:10*num_edge_batch] +edge6[:] = edge_list[10*num_edge_batch:12*num_edge_batch] +edge7[:] = edge_list[12*num_edge_batch:] + +# neighbor[:] = [2, 4, 5, 3, 4, 5, 4, 5, 5] +# offset[:] = [0, 0, 3, 6, 8, 9, 9] +# edge[:] = [5, 4, 5, 3, 5, 2, 5, 1, 4, 3, 4, 2, 4, 1, 3, 2, 2, 1] + +acc0.write(0x18, neighbor.physical_address) +acc0.write(0x20, offset.physical_address) +acc0.write(0x28, edge1.physical_address) +acc0.write(0x30, 2*num_edge_batch) +acc0.write(0x38, progress.physical_address) + +acc1.write(0x18, neighbor.physical_address) +acc1.write(0x20, offset.physical_address) +acc1.write(0x28, edge2.physical_address) +acc1.write(0x30, 2*num_edge_batch) +acc1.write(0x38, progress.physical_address) + +acc2.write(0x18, neighbor.physical_address) +acc2.write(0x20, offset.physical_address) +acc2.write(0x28, edge3.physical_address) +acc2.write(0x30, 2*num_edge_batch) +acc2.write(0x38, progress.physical_address) + +acc3.write(0x18, neighbor.physical_address) +acc3.write(0x20, offset.physical_address) +acc3.write(0x28, edge4.physical_address) +acc3.write(0x30, 2*num_edge_batch) +acc3.write(0x38, progress.physical_address) + +acc4.write(0x18, neighbor.physical_address) +acc4.write(0x20, offset.physical_address) +acc4.write(0x28, edge5.physical_address) +acc4.write(0x30, 2*num_edge_batch) +acc4.write(0x38, progress.physical_address) + +acc5.write(0x18, neighbor.physical_address) +acc5.write(0x20, offset.physical_address) +acc5.write(0x28, edge6.physical_address) +acc5.write(0x30, 2*num_edge_batch) +acc5.write(0x38, progress.physical_address) + +acc6.write(0x18, neighbor.physical_address) +acc6.write(0x20, offset.physical_address) +acc6.write(0x28, edge7.physical_address) +acc6.write(0x30, 2*num_edge_last_batch) +acc6.write(0x38, progress.physical_address) + +# for i in range(neighbor.size): +# print("neighbor[%d] = %d" % (i, neighbor[i])) + +# for i in range(offset.size): +# print("offset[%d] = %d" % (i, offset[i])) + +# for i in range(edge.size): +# print("edge[%d] = %d" % (i, edge[i])) + +t2 = time.time() +t = t2 - t1 +print("Preparing input data time: ", str(t)) + +acc0.write(0x00, 1) +acc1.write(0x00, 1) +acc2.write(0x00, 1) +acc3.write(0x00, 1) +acc4.write(0x00, 1) +acc5.write(0x00, 1) +acc6.write(0x00, 1) + +isready = 0; +while( isready != 6 ): + isready = acc0.read(0x00) + +isready = 0; +while( isready != 6 ): + isready = acc1.read(0x00) + +isready = 0; +while( isready != 6 ): + isready = acc2.read(0x00) + +isready = 0; +while( isready != 6 ): + isready = acc3.read(0x00) + +isready = 0; +while( isready != 6 ): + isready = acc4.read(0x00) + +isready = 0; +while( isready != 6 ): + isready = acc5.read(0x00) + +isready = 0; +while( isready != 6 ): + isready = acc6.read(0x00) + +t3 = time.time() +t = t3 - t2 +#tbatch = tbatch + t +#print("Computation finished") +print("PL Time: ", str(t)) + +result1 = acc0.read(0x10) +result2 = acc1.read(0x10) +result3 = acc2.read(0x10) +result4 = acc3.read(0x10) +result5 = acc4.read(0x10) +result6 = acc5.read(0x10) +result7 = acc6.read(0x10) + +print("Return value 1: ", result1) +print("Return value 2: ", result2) +print("Return value 3: ", result3) +print("Return value 4: ", result4) +print("Return value 5: ", result5) +print("Return value 6: ", result6) +print("Return value 7: ", result7) +print("Number of triangles: ", result1+result2+result3+result4+result5+result6+result7)