diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..5c255f6 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,18 @@ +# Contributing + +## Commit Messages + +When commiting to this repository, prefix your commit messages with the implementation(s) affected. +When changing top-level files, a prefix is not necessary +The following are examples of acceptable commit messaged: + + openmp: Code change + openmp, opencl: Code change + all: Switch to CMake + data: Data change + Update README + +## Compiler and Platform Support + +Each implementation's README should detail the compilers supported and any platform restrictions. + diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..a937481 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,219 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 University of Bristol High Performance Computing Group + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..5250755 --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +# BUDE Benchmark + +This repository contains implementation of the core computation of the Bristol University Docking Engine (BUDE) in different HPC programming models. +The benchmark is a virtual screening run of the NDM-1 protein and runs the energy evaluation for a single generation of poses repeatedly, for a configurable number of iterations. +Increasing the iteration count has similar performance effects to docking multiple ligands back-to-back in a production BUDE docking run. + +## Structure + +The top-level `data` directory contains the input common to implementations. +Each other subdirectory contains a separate implementation: + +- [OpenMP](openmp/) for CPUs +- [OpenCL](opencl/) for GPUs + +## Building + +To build with the default options, type `make` in an implementation directory. +There are options to choose the compiler used and the architecture targetted. + +Refer to each implementation's README for further build instructions. + +## Running + +To run with the default options, run the binary without any flags. +To adjust the run time, use `-i` to set the number of iterations. +For very short runs, e.g. for simulation, use set `-n 1024` to reduce the number of poses. + +Refer to each implementation's README for further run instructions. diff --git a/data/forcefield.dat b/data/forcefield.dat new file mode 100644 index 0000000..be3576e Binary files /dev/null and b/data/forcefield.dat differ diff --git a/data/ligand.dat b/data/ligand.dat new file mode 100644 index 0000000..7344c21 Binary files /dev/null and b/data/ligand.dat differ diff --git a/data/poses.dat b/data/poses.dat new file mode 100644 index 0000000..7a1dff1 Binary files /dev/null and b/data/poses.dat differ diff --git a/data/protein.dat b/data/protein.dat new file mode 100644 index 0000000..1f06923 Binary files /dev/null and b/data/protein.dat differ diff --git a/opencl/.gitignore b/opencl/.gitignore new file mode 100644 index 0000000..c505754 --- /dev/null +++ b/opencl/.gitignore @@ -0,0 +1 @@ +bude-opencl* diff --git a/opencl/Makefile b/opencl/Makefile new file mode 100644 index 0000000..73b6172 --- /dev/null +++ b/opencl/Makefile @@ -0,0 +1,25 @@ +CC = gcc +CFLAGS = -std=c99 -Wall -O3 -ffast-math -march=native +LDFLAGS = + +PLATFORM = $(shell uname -s) +ifeq ($(PLATFORM), Darwin) + LDFLAGS = -framework OpenCL +else + CFLAGS += -fopenmp + LDFLAGS = -lOpenCL -lm +endif + +# ------- + +EXE = bude-opencl + +.PHONY: all $(EXE) clean + +all: $(EXE) + +$(EXE): bude.c + $(CC) $(CFLAGS) bude.c $(LDFLAGS) -o $@ + +clean: + rm -f $(EXE) diff --git a/opencl/README.md b/opencl/README.md new file mode 100644 index 0000000..8419408 --- /dev/null +++ b/opencl/README.md @@ -0,0 +1,40 @@ +# BUDE OpenCL + +This is a GPU implementation of BUDE using OpenCL. + +## Building + +Select the compiler you want to use and pass it to Make: + +``` +make COMPILER=GNU +``` + +The supported compilers names are: `ARM`, `CLANG`, `CRAY`, `GNU`, `INTEL`. + +### Target Architecture + +By default, the native architecture is targetted, but this can be changed by setting the `ARCH` parameter to the name of the target processor. +For example, the following are both valid: + +``` +make COMPILER=GNU ARCH=thunderx2t99 +make COMPILER=GNU ARCH=skylake-avx512 +``` + +### Block Sizes + +This implementation includes a tunable block size similar to OpenCL workgroups. +The default value is `64`, which is suitable for 512-bit vectors, e.g. in Skylake or A64FX, but higher values may sometimes be beneficial. +For 128-bit vectors, `16` is a good choice. + +This parameter can be set using the `WGSIZE` parameter, as follows: + +``` +make COMPILER=GNU ARCH=thunderx2t99 WGSIZE=16 +``` + +## Running + +The `-n` and `-i` parameters are available, along with several OpenCL-specific options for device selection and run-time tuning. +Run `bude-opencl -h` for a list of all available flags. diff --git a/opencl/bude.c b/opencl/bude.c new file mode 100644 index 0000000..bfb0871 --- /dev/null +++ b/opencl/bude.c @@ -0,0 +1,699 @@ +#include +#include +#include +#include +#include + +#if defined(__APPLE__) + #include +#else + #include + #include +#endif + +#define MAX_PLATFORMS 8 +#define MAX_DEVICES 32 +#define MAX_INFO_STRING 256 + +#define DATA_DIR "../data" +#define FILE_LIGAND DATA_DIR "/ligand.dat" +#define FILE_PROTEIN DATA_DIR "/protein.dat" +#define FILE_FORCEFIELD DATA_DIR "/forcefield.dat" +#define FILE_POSES DATA_DIR "/poses.dat" + +#define FILE_KERNEL "budeMultiTD.cl" + +// Energy evaluation parameters +#define CNSTNT 45.0f +#define HBTYPE_F 70 +#define HBTYPE_E 69 +#define HARDNESS 38.0f +#define NPNPDIST 5.5f +#define NPPDIST 1.0f + +typedef struct +{ + cl_float x, y, z; + cl_int type; +} Atom; + +typedef struct +{ + cl_int hbtype; + cl_float radius; + cl_float hphb; + cl_float elsc; +} FFParams; + +struct +{ + cl_int natlig; + cl_int natpro; + cl_int ntypes; + cl_int nposes; + Atom *restrict protein; + Atom *restrict ligand; + FFParams *restrict forcefield; + float *restrict poses[6]; + + int iterations; +} params = {0}; + +struct +{ + cl_device_id device; + cl_context context; + cl_command_queue queue; + cl_program program; + cl_kernel kernel; + + int deviceIndex; + int wgsize; + int posesPerWI; +} cl = {0}; + +double getTimestamp(); +void loadParameters(int argc, char *argv[]); +void freeParameters(); +void printTimings(double start, double end, double poses_per_wi); + +void initCL(); +unsigned getDevices(cl_device_id devices[MAX_DEVICES]); +void getDeviceName(cl_device_id device, char name[MAX_INFO_STRING]); +void releaseCL(); +void checkError(cl_int err, const char *op); + +void runOpenMP(float *energies); +void runOpenCL(float *energies); + +int main(int argc, char *argv[]) +{ + loadParameters(argc, argv); + + float *energiesOCL = malloc(params.nposes*sizeof(float)); + float *energiesOMP = malloc(params.nposes*sizeof(float)); + + runOpenMP(energiesOMP); + runOpenCL(energiesOCL); + + // Verify results + float maxdiff = 0; + printf("\n OpenMP OpenCL (diff)\n"); + for (int i = 0; i < params.nposes; i++) + { + if (fabs(energiesOCL[i]) < 1.f) + { + continue; + } + + float diff = fabs(energiesOMP[i] - energiesOCL[i]) / energiesOCL[i]; + if (diff > maxdiff) + maxdiff = diff; + + if (i < 8) + { + printf("%7.2f vs %7.2f (%5.2f%%)\n", + energiesOMP[i], energiesOCL[i], 100*diff); + } + } + printf("\nLargest difference was %.3f%%\n\n", maxdiff); + + free(energiesOCL); + free(energiesOMP); + + freeParameters(); +} + +void runOpenMP(float *results) +{ + printf("\nRunning C/OpenMP\n"); + + double start = getTimestamp(); + +#pragma omp parallel + for (int itr = 0; itr < params.iterations; itr++) + { +#pragma omp for + for (unsigned i = 0; i < params.nposes; i++) + { + float etot = 0; + + // Compute transformation matrix + const float sx = sin(params.poses[0][i]); + const float cx = cos(params.poses[0][i]); + const float sy = sin(params.poses[1][i]); + const float cy = cos(params.poses[1][i]); + const float sz = sin(params.poses[2][i]); + const float cz = cos(params.poses[2][i]); + + float transform[3][4]; + transform[0][0] = cy*cz; + transform[0][1] = sx*sy*cz - cx*sz; + transform[0][2] = cx*sy*cz + sx*sz; + transform[0][3] = params.poses[3][i]; + transform[1][0] = cy*sz; + transform[1][1] = sx*sy*sz + cx*cz; + transform[1][2] = cx*sy*sz - sx*cz; + transform[1][3] = params.poses[4][i]; + transform[2][0] = -sy; + transform[2][1] = sx*cy; + transform[2][2] = cx*cy; + transform[2][3] = params.poses[5][i]; + + // Loop over ligand atoms + int il = 0; + do + { + // Load ligand atom data + const Atom l_atom = params.ligand[il]; + const FFParams l_params = params.forcefield[l_atom.type]; + const int lhphb_ltz = l_params.hphb<0.f; + const int lhphb_gtz = l_params.hphb>0.f; + + // Transform ligand atom + float lpos_x = transform[0][3] + + l_atom.x * transform[0][0] + + l_atom.y * transform[0][1] + + l_atom.z * transform[0][2]; + float lpos_y = transform[1][3] + + l_atom.x * transform[1][0] + + l_atom.y * transform[1][1] + + l_atom.z * transform[1][2]; + float lpos_z = transform[2][3] + + l_atom.x * transform[2][0] + + l_atom.y * transform[2][1] + + l_atom.z * transform[2][2]; + + // Loop over protein atoms + int ip = 0; + do + { + // Load protein atom data + const Atom p_atom = params.protein[ip]; + const FFParams p_params = params.forcefield[p_atom.type]; + + const float radij = p_params.radius + l_params.radius; + const float r_radij = 1.f / radij; + + const float elcdst = + (p_params.hbtype==HBTYPE_F && l_params.hbtype==HBTYPE_F) + ? 4.f : 2.f; + const float elcdst1 = + (p_params.hbtype==HBTYPE_F && l_params.hbtype==HBTYPE_F) + ? 0.25f : 0.5f; + const int type_E = + ((p_params.hbtype==HBTYPE_E || l_params.hbtype==HBTYPE_E)); + + const int phphb_ltz = p_params.hphb < 0.f; + const int phphb_gtz = p_params.hphb > 0.f; + const int phphb_nz = p_params.hphb != 0.f; + const float p_hphb = + p_params.hphb * (phphb_ltz && lhphb_gtz ? -1.f : 1.f); + const float l_hphb = + l_params.hphb * (phphb_gtz && lhphb_ltz ? -1.f : 1.f); + const float distdslv = + (phphb_ltz ? (lhphb_ltz ? NPNPDIST : NPPDIST) + : (lhphb_ltz ? NPPDIST : -FLT_MAX)); + const float r_distdslv = 1.f / distdslv; + + const float chrg_init = l_params.elsc * p_params.elsc; + const float dslv_init = p_hphb + l_hphb; + + // Calculate distance between atoms + const float x = lpos_x - p_atom.x; + const float y = lpos_y - p_atom.y; + const float z = lpos_z - p_atom.z; + const float distij = sqrt(x*x + y*y + z*z); + + // Calculate the sum of the sphere radii + const float distbb = distij - radij; + const int zone1 = (distbb < 0.f); + + // Calculate steric energy + etot += (1.f - (distij*r_radij)) * (zone1 ? 2*HARDNESS : 0.f); + + // Calculate formal and dipole charge interactions + float chrg_e = chrg_init + * ((zone1 ? 1 : (1.f - distbb*elcdst1)) + * (distbb= argc || (cl.deviceIndex = parseInt(argv[i])) < 0) + { + printf("Invalid device index\n"); + exit(1); + } + } + else if (!strcmp(argv[i], "--iterations") || !strcmp(argv[i], "-i")) + { + if (++i >= argc || (params.iterations = parseInt(argv[i])) < 0) + { + printf("Invalid number of iterations\n"); + exit(1); + } + } + else if (!strcmp(argv[i], "--numposes") || !strcmp(argv[i], "-n")) + { + if (++i >= argc || (nposes = parseInt(argv[i])) < 0) + { + printf("Invalid number of poses\n"); + exit(1); + } + } + else if (!strcmp(argv[i], "--posesperwi") || !strcmp(argv[i], "-p")) + { + if (++i >= argc || (cl.posesPerWI = parseInt(argv[i])) < 0) + { + printf("Invalid poses-per-workitem value\n"); + exit(1); + } + } + else if (!strcmp(argv[i], "--wgsize") || !strcmp(argv[i], "-w")) + { + if (++i >= argc || (cl.wgsize = parseInt(argv[i])) < 0) + { + printf("Invalid work-group size\n"); + exit(1); + } + } + else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) + { + printf("\n"); + printf("Usage: ./bude [OPTIONS]\n\n"); + printf("Options:\n"); + printf(" -h --help Print this message\n"); + printf(" --list List available devices\n"); + printf(" --device INDEX Select device at INDEX\n"); + printf(" -i --iterations I Repeat kernel I times\n"); + printf(" -n --numposes N Compute energies for N poses\n"); + printf(" -p --poserperwi PPWI Compute PPWI poses per work-item\n"); + printf(" -w --wgsize WGSIZE Run with work-group size WGSIZE\n"); + printf("\n"); + exit(0); + } + else + { + printf("Unrecognized argument '%s' (try '--help')\n", argv[i]); + exit(1); + } + } + + FILE *file = NULL; + long length; + + file = openFile(FILE_LIGAND, &length); + params.natlig = length / sizeof(Atom); + params.ligand = malloc(params.natlig*sizeof(Atom)); + fread(params.ligand, sizeof(Atom), params.natlig, file); + fclose(file); + + file = openFile(FILE_PROTEIN, &length); + params.natpro = length / sizeof(Atom); + params.protein = malloc(params.natpro*sizeof(Atom)); + fread(params.protein, sizeof(Atom), params.natpro, file); + fclose(file); + + file = openFile(FILE_FORCEFIELD, &length); + params.ntypes = length / sizeof(FFParams); + params.forcefield = malloc(params.ntypes*sizeof(FFParams)); + fread(params.forcefield, sizeof(FFParams), params.ntypes, file); + fclose(file); + + file = openFile(FILE_POSES, &length); + for (int i = 0; i < 6; i++) + params.poses[i] = malloc(nposes*sizeof(float)); + + long available = length / 6 / sizeof(float); + params.nposes = 0; + while (params.nposes < nposes) + { + long fetch = nposes - params.nposes; + if (fetch > available) + fetch = available; + + for (int i = 0; i < 6; i++) + { + fseek(file, i*available*sizeof(float), SEEK_SET); + fread(params.poses[i] + params.nposes, sizeof(float), fetch, file); + } + rewind(file); + + params.nposes += fetch; + } + fclose(file); +} + +void freeParameters() +{ + free(params.ligand); + free(params.protein); + free(params.forcefield); + for (int i = 0; i < 6; i++) + free(params.poses[i]); +} + +void printTimings(double start, double end, double poses_per_wi) +{ + double ms = ((end-start)/params.iterations)*1e-3; + + // Compute FLOP/s + double runtime = ms*1e-3; + double ops_per_wi = 27*poses_per_wi + + params.natlig*(3 + 18*poses_per_wi + params.natpro*(11 + 30*poses_per_wi)) + + poses_per_wi; + double total_ops = ops_per_wi * (params.nposes/poses_per_wi); + double flops = total_ops / runtime; + double gflops = flops / 1e9; + + double interactions = + (double)params.nposes + * (double)params.natlig + * (double)params.natpro; + double interactions_per_sec = interactions / runtime; + + // Print stats + printf("- Total time: %7.2lf ms\n", (end-start)*1e-3); + printf("- Average time: %7.2lf ms\n", ms); + printf("- Interactions/s: %7.2lf billion\n", (interactions_per_sec / 1e9)); + printf("- GFLOP/s: %7.2lf\n", gflops); +} + +void checkError(cl_int err, const char *op) +{ + if (err != CL_SUCCESS) + { + printf("Error during operation '%s' (%d)\n", op, err); + releaseCL(); + } +} + +unsigned getDevices(cl_device_id devices[MAX_DEVICES]) +{ + cl_int err; + + // Get list of platforms + cl_uint numPlatforms = 0; + cl_platform_id platforms[MAX_PLATFORMS]; + err = clGetPlatformIDs(MAX_PLATFORMS, platforms, &numPlatforms); + checkError(err, "getting platforms"); + + // Enumerate devices + unsigned numDevices = 0; + for (int i = 0; i < numPlatforms; i++) + { + cl_uint num = 0; + err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, + MAX_DEVICES-numDevices, devices+numDevices, &num); + checkError(err, "getting deviceS"); + numDevices += num; + } + + return numDevices; +} + +void getDeviceName(cl_device_id device, char name[MAX_INFO_STRING]) +{ + cl_device_info info = CL_DEVICE_NAME; + + // Special case for AMD +#ifdef CL_DEVICE_BOARD_NAME_AMD + clGetDeviceInfo(device, CL_DEVICE_VENDOR, MAX_INFO_STRING, name, NULL); + if (strstr(name, "Advanced Micro Devices")) + info = CL_DEVICE_BOARD_NAME_AMD; +#endif + + clGetDeviceInfo(device, info, MAX_INFO_STRING, name, NULL); +} + +double getTimestamp() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_usec + tv.tv_sec*1e6; +} + +void initCL() +{ + cl_int err; + + cl_device_id devices[MAX_DEVICES]; + unsigned num = getDevices(devices); + if (cl.deviceIndex >= num) + { + printf("Invalid device index (try '--list')\n"); + exit(1); + } + cl.device = devices[cl.deviceIndex]; + + char name[128]; + getDeviceName(cl.device, name); + printf("Using device: %s\n", name); + + cl.context = clCreateContext(NULL, 1, &cl.device, NULL, NULL, &err); + checkError(err, "creating context"); + + cl.queue = clCreateCommandQueue( + cl.context, cl.device, CL_QUEUE_PROFILING_ENABLE, &err); + checkError(err, "creating queue"); + + long length; + FILE *file = openFile(FILE_KERNEL, &length); + char *source = malloc(length+1); + fread(source, 1, length, file); + source[length] = '\0'; + fclose(file); + + cl.program = clCreateProgramWithSource( + cl.context, 1, (const char**)&source, NULL, &err); + checkError(err, "creating program"); + + char options[256]; + sprintf(options, + "-cl-fast-relaxed-math -cl-mad-enable -DNUM_TD_PER_THREAD=%d", + cl.posesPerWI); + err = clBuildProgram(cl.program, 1, &cl.device, options, NULL, NULL); + if (err != CL_SUCCESS) + { + if (err == CL_BUILD_PROGRAM_FAILURE) + { + char log[16384]; + clGetProgramBuildInfo(cl.program, cl.device, CL_PROGRAM_BUILD_LOG, + 16384, log, NULL); + printf("%s\n", log); + } + } + free(source); + checkError(err, "building program"); + + cl.kernel = clCreateKernel(cl.program, "fasten_main", &err); + checkError(err, "creating kernel"); +} + +#define RELEASE(func, obj) if (obj) {func(obj); obj=NULL;}; +void releaseCL() +{ + RELEASE(clReleaseKernel, cl.kernel); + RELEASE(clReleaseProgram, cl.program); + RELEASE(clReleaseCommandQueue, cl.queue); + RELEASE(clReleaseContext, cl.context); +} diff --git a/opencl/budeMultiTD.cl b/opencl/budeMultiTD.cl new file mode 100644 index 0000000..cd1adf2 --- /dev/null +++ b/opencl/budeMultiTD.cl @@ -0,0 +1,203 @@ +/** + * BUDE OpenCL kernel file + **/ + +// Numeric constants +#define ZERO 0.0f +#define QUARTER 0.25f +#define HALF 0.5f +#define ONE 1.0f +#define TWO 2.0f +#define FOUR 4.0f +#define CNSTNT 45.0f + +#define HBTYPE_F 70 +#define HBTYPE_E 69 + +// The data structure for one atom - 16 bytes +#ifndef ATOM_STRUCT +#define ATOM_STRUCT +typedef struct _atom +{ + float x,y,z; + int index; +} Atom; + +typedef struct +{ + int hbtype; + float radius; + float hphb; + float elsc; +} FFParams; + +#define HARDNESS 38.0f +#define NPNPDIST 5.5f +#define NPPDIST 1.0f + +#endif + +void compute_transformation_matrix(const float transform_0, + const float transform_1, + const float transform_2, + const float transform_3, + const float transform_4, + const float transform_5, + __private float4 *restrict transform) +{ + const float sx = sin(transform_0); + const float cx = cos(transform_0); + const float sy = sin(transform_1); + const float cy = cos(transform_1); + const float sz = sin(transform_2); + const float cz = cos(transform_2); + + transform[0].x = cy*cz; + transform[0].y = sx*sy*cz - cx*sz; + transform[0].z = cx*sy*cz + sx*sz; + transform[0].w = transform_3; + transform[1].x = cy*sz; + transform[1].y = sx*sy*sz + cx*cz; + transform[1].z = cx*sy*sz - sx*cz; + transform[1].w = transform_4; + transform[2].x = -sy; + transform[2].y = sx*cy; + transform[2].z = cx*cy; + transform[2].w = transform_5; +} + +__kernel void fasten_main(const int natlig, + const int natpro, + const __global Atom *restrict protein_molecule, + const __global Atom *restrict ligand_molecule, + const __global float *restrict transforms_0, + const __global float *restrict transforms_1, + const __global float *restrict transforms_2, + const __global float *restrict transforms_3, + const __global float *restrict transforms_4, + const __global float *restrict transforms_5, + __global float *restrict etotals, + const __global FFParams *restrict global_forcefield, + __local FFParams *restrict forcefield, + const int num_atom_types, + const int numTransforms) +{ + // Get index of first TD + int ix = get_group_id(0)*get_local_size(0)*NUM_TD_PER_THREAD + get_local_id(0); + + // Have extra threads do the last member intead of return. + // A return would disable use of barriers, so not using return is better + ix = ix < numTransforms ? ix : numTransforms - NUM_TD_PER_THREAD; + + // Copy forcefield parameter table to local memory + event_t event = async_work_group_copy((__local float*)forcefield, + (__global float*)global_forcefield, + num_atom_types*sizeof(FFParams)/sizeof(float), + 0); + + // Compute transformation matrix to private memory + float etot[NUM_TD_PER_THREAD]; + float4 transform[NUM_TD_PER_THREAD][3]; + const int lsz = get_local_size(0); + for (int i = 0; i < NUM_TD_PER_THREAD; i++) + { + int index = ix + i*lsz; + compute_transformation_matrix(transforms_0[index], + transforms_1[index], + transforms_2[index], + transforms_3[index], + transforms_4[index], + transforms_5[index], + transform[i]); + etot[i] = ZERO; + } + + // Wait for forcefield copy to finish + wait_group_events(1, &event); + + // Loop over ligand atoms + int il = 0; + do + { + // Load ligand atom data + const Atom l_atom = ligand_molecule[il]; + const FFParams l_params = forcefield[l_atom.index]; + const bool lhphb_ltz = l_params.hphbZERO; + + float3 lpos[NUM_TD_PER_THREAD]; + const float4 linitpos = (float4)(l_atom.x,l_atom.y,l_atom.z,ONE); + for (int i = 0; i < NUM_TD_PER_THREAD; i++) + { + // Transform ligand atom + lpos[i].x = transform[i][0].w + linitpos.x*transform[i][0].x + linitpos.y*transform[i][0].y + linitpos.z*transform[i][0].z; + lpos[i].y = transform[i][1].w + linitpos.x*transform[i][1].x + linitpos.y*transform[i][1].y + linitpos.z*transform[i][1].z; + lpos[i].z = transform[i][2].w + linitpos.x*transform[i][2].x + linitpos.y*transform[i][2].y + linitpos.z*transform[i][2].z; + } + + // Loop over protein atoms + int ip = 0; + do + { + // Load protein atom data + const Atom p_atom = protein_molecule[ip]; + const FFParams p_params = forcefield[p_atom.index]; + + const float radij = p_params.radius + l_params.radius; + const float r_radij = native_recip(radij); + + const float elcdst = (p_params.hbtype==HBTYPE_F && l_params.hbtype==HBTYPE_F) ? FOUR : TWO; + const float elcdst1 = (p_params.hbtype==HBTYPE_F && l_params.hbtype==HBTYPE_F) ? QUARTER : HALF; + const bool type_E = ((p_params.hbtype==HBTYPE_E || l_params.hbtype==HBTYPE_E)); + + const bool phphb_ltz = p_params.hphbZERO; + const bool phphb_nz = p_params.hphb!=ZERO; + const float p_hphb = p_params.hphb * (phphb_ltz && lhphb_gtz ? -ONE : ONE); + const float l_hphb = l_params.hphb * (phphb_gtz && lhphb_ltz ? -ONE : ONE); + const float distdslv = (phphb_ltz ? (lhphb_ltz ? NPNPDIST : NPPDIST) : (lhphb_ltz ? NPPDIST : -FLT_MAX)); + const float r_distdslv = native_recip(distdslv); + + const float chrg_init = l_params.elsc * p_params.elsc; + const float dslv_init = p_hphb + l_hphb; + + for (int i = 0; i < NUM_TD_PER_THREAD; i++) + { + // Calculate distance between atoms + const float x = lpos[i].x - p_atom.x; + const float y = lpos[i].y - p_atom.y; + const float z = lpos[i].z - p_atom.z; + const float distij = native_sqrt(x*x + y*y + z*z); + + // Calculate the sum of the sphere radii + const float distbb = distij - radij; + const bool zone1 = (distbb < ZERO); + + // Calculate steric energy + etot[i] += (ONE - (distij*r_radij)) * (zone1 ? 2*HARDNESS : ZERO); + + // Calculate formal and dipole charge interactions + float chrg_e = chrg_init * ((zone1 ? 1 : (ONE - distbb*elcdst1)) * (distbb +#include +#include +#include +#include +#include + +#include "bude.h" + +struct +{ + int natlig; + int natpro; + int ntypes; + int nposes; + Atom *restrict protein; + Atom *restrict ligand; + FFParams *restrict forcefield; + float *restrict poses[6]; + + int iterations; +} params = {0}; + +double getTimestamp(); +void loadParameters(int argc, char *argv[]); +void freeParameters(); +void printTimings(double start, double end, double poses_per_wi); + +void runOpenMP(float *energies); + +void fasten_main(const int natlig, + const int natpro, + const Atom *restrict protein, + const Atom *restrict ligand, + const float *restrict transforms_0, + const float *restrict transforms_1, + const float *restrict transforms_2, + const float *restrict transforms_3, + const float *restrict transforms_4, + const float *restrict transforms_5, + float *restrict results, + const FFParams *restrict forcefield, + const int group); + +int main(int argc, char *argv[]) +{ + loadParameters(argc, argv); + + float *energiesOMP = calloc(params.nposes, sizeof(float)); + + runOpenMP(energiesOMP); + + + FILE *output = fopen("energies.dat", "w"); + + // Print some energies + printf("\nEnergies\n"); + for (int i = 0; i < params.nposes; i++) + { + fprintf(output, "%7.2f\n", energiesOMP[i]); + if (i < 16) + printf("%7.2f\n", energiesOMP[i]); + } + + fclose(output); + + free(energiesOMP); + + freeParameters(); +} + +void runOpenMP(float *restrict results) +{ + printf("\nRunning C/OpenMP\n"); + + double start = getTimestamp(); + +#pragma omp parallel + for (int itr = 0; itr < params.iterations; itr++) + { +#pragma omp for + for (unsigned group = 0; group < (params.nposes/WGSIZE/PPWI); group++) + { + fasten_main(params.natlig, params.natpro, params.protein, params.ligand, + params.poses[0], params.poses[1], params.poses[2], + params.poses[3], params.poses[4], params.poses[5], + results, params.forcefield, group); + } + } + + double end = getTimestamp(); + + printTimings(start, end, PPWI); +} + +FILE* openFile(const char *name, long *length) +{ + FILE *file = NULL; + if (!(file = fopen(name, "rb"))) + { + fprintf(stderr, "Failed to open '%s'\n", name); + exit(1); + } + + fseek(file, 0, SEEK_END); + *length = ftell(file); + rewind(file); + + return file; +} + +int parseInt(const char *str) +{ + char *next; + int value = strtoul(str, &next, 10); + return strlen(next) ? -1 : value; +} + +void loadParameters(int argc, char *argv[]) +{ + // Defaults + params.iterations = DEFAULT_ITERS; + int nposes = DEFAULT_NPOSES; + + for (int i = 1; i < argc; i++) + { + if (!strcmp(argv[i], "--iterations") || !strcmp(argv[i], "-i")) + { + if (++i >= argc || (params.iterations = parseInt(argv[i])) < 0) + { + printf("Invalid number of iterations\n"); + exit(1); + } + } + else if (!strcmp(argv[i], "--numposes") || !strcmp(argv[i], "-n")) + { + if (++i >= argc || (nposes = parseInt(argv[i])) < 0) + { + printf("Invalid number of poses\n"); + exit(1); + } + } + else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) + { + printf("\n"); + printf("Usage: ./bude [OPTIONS]\n\n"); + printf("Options:\n"); + printf(" -h --help Print this message\n"); + printf(" -i --iterations I Repeat kernel I times (default: %d)\n", DEFAULT_ITERS); + printf(" -n --numposes N Compute energies for N poses (default: %d)\n", DEFAULT_NPOSES); + printf("\n"); + exit(0); + } + else + { + printf("Unrecognized argument '%s' (try '--help')\n", argv[i]); + exit(1); + } + } + + FILE *file = NULL; + long length; + + file = openFile(FILE_LIGAND, &length); + params.natlig = length / sizeof(Atom); + params.ligand = malloc(params.natlig*sizeof(Atom)); + fread(params.ligand, sizeof(Atom), params.natlig, file); + fclose(file); + + file = openFile(FILE_PROTEIN, &length); + params.natpro = length / sizeof(Atom); + params.protein = malloc(params.natpro*sizeof(Atom)); + fread(params.protein, sizeof(Atom), params.natpro, file); + fclose(file); + + file = openFile(FILE_FORCEFIELD, &length); + params.ntypes = length / sizeof(FFParams); + params.forcefield = malloc(params.ntypes*sizeof(FFParams)); + fread(params.forcefield, sizeof(FFParams), params.ntypes, file); + fclose(file); + + file = openFile(FILE_POSES, &length); + for (int i = 0; i < 6; i++) + params.poses[i] = malloc(nposes*sizeof(float)); + + long available = length / 6 / sizeof(float); + params.nposes = 0; + while (params.nposes < nposes) + { + long fetch = nposes - params.nposes; + if (fetch > available) + fetch = available; + + for (int i = 0; i < 6; i++) + { + fseek(file, i*available*sizeof(float), SEEK_SET); + fread(params.poses[i] + params.nposes, sizeof(float), fetch, file); + } + rewind(file); + + params.nposes += fetch; + } + fclose(file); +} + +void freeParameters() +{ + free(params.ligand); + free(params.protein); + free(params.forcefield); + for (int i = 0; i < 6; i++) + free(params.poses[i]); +} + +void printTimings(double start, double end, double poses_per_wi) +{ + double ms = ((end-start)/params.iterations)*1e-3; + + // Compute FLOP/s + double runtime = ms*1e-3; + double ops_per_wi = 27*poses_per_wi + + params.natlig*(3 + 18*poses_per_wi + params.natpro*(11 + 30*poses_per_wi)) + + poses_per_wi; + double total_ops = ops_per_wi * (params.nposes/poses_per_wi); + double flops = total_ops / runtime; + double gflops = flops / 1e9; + + double total_finsts = 25.0 * params.natpro * params.natlig * params.nposes; + double finsts = total_finsts / runtime; + double gfinsts = finsts / 1e9; + + double interactions = + (double)params.nposes + * (double)params.natlig + * (double)params.natpro; + double interactions_per_sec = interactions / runtime; + + // Print stats + printf("- Total time: %7.3lf ms\n", (end-start)*1e-3); + printf("- Average time: %7.3lf ms\n", ms); + printf("- Interactions/s: %7.3lf billion\n", (interactions_per_sec / 1e9)); + printf("- GFLOP/s: %7.3lf\n", gflops); + printf("- GFInst/s: %7.3lf\n", gfinsts); +} + +double getTimestamp() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_usec + tv.tv_sec*1e6; +} diff --git a/openmp/bude.h b/openmp/bude.h new file mode 100644 index 0000000..ee5d620 --- /dev/null +++ b/openmp/bude.h @@ -0,0 +1,29 @@ +#ifndef PPWI +#define PPWI 1 +#endif +#ifndef WGSIZE +#define WGSIZE 4 +#endif + +#define DEFAULT_ITERS 8 +#define DEFAULT_NPOSES 65536 + +#define DATA_DIR "../data" +#define FILE_LIGAND DATA_DIR "/ligand.dat" +#define FILE_PROTEIN DATA_DIR "/protein.dat" +#define FILE_FORCEFIELD DATA_DIR "/forcefield.dat" +#define FILE_POSES DATA_DIR "/poses.dat" + +typedef struct +{ + float x, y, z; + int type; +} Atom; + +typedef struct +{ + int hbtype; + float radius; + float hphb; + float elsc; +} FFParams; diff --git a/openmp/vec-pose-inner.c b/openmp/vec-pose-inner.c new file mode 100644 index 0000000..24c6d84 --- /dev/null +++ b/openmp/vec-pose-inner.c @@ -0,0 +1,168 @@ +#include +#include +#include + +#include "bude.h" + +// Energy evaluation parameters +#define CNSTNT 45.0f +#define HBTYPE_F 70 +#define HBTYPE_E 69 +#define HARDNESS 38.0f +#define NPNPDIST 5.5f +#define NPPDIST 1.0f + +void fasten_main(const int natlig, + const int natpro, + const Atom *restrict protein, + const Atom *restrict ligand, + const float *restrict transforms_0, + const float *restrict transforms_1, + const float *restrict transforms_2, + const float *restrict transforms_3, + const float *restrict transforms_4, + const float *restrict transforms_5, + float *restrict results, + const FFParams *restrict forcefield, + const int group) +{ + float transform[3][4][WGSIZE]; + float etot[WGSIZE]; + +#pragma omp simd + for (int l = 0; l < WGSIZE; l++) + { + int ix = group*WGSIZE + l; + + // Compute transformation matrix + const float sx = sinf(transforms_0[ix]); + const float cx = cosf(transforms_0[ix]); + const float sy = sinf(transforms_1[ix]); + const float cy = cosf(transforms_1[ix]); + const float sz = sinf(transforms_2[ix]); + const float cz = cosf(transforms_2[ix]); + + transform[0][0][l] = cy*cz; + transform[0][1][l] = sx*sy*cz - cx*sz; + transform[0][2][l] = cx*sy*cz + sx*sz; + transform[0][3][l] = transforms_3[ix]; + transform[1][0][l] = cy*sz; + transform[1][1][l] = sx*sy*sz + cx*cz; + transform[1][2][l] = cx*sy*sz - sx*cz; + transform[1][3][l] = transforms_4[ix]; + transform[2][0][l] = -sy; + transform[2][1][l] = sx*cy; + transform[2][2][l] = cx*cy; + transform[2][3][l] = transforms_5[ix]; + + etot[l] = 0.f; + } + + { + // Loop over ligand atoms + int il = 0; + do + { + // Load ligand atom data + const Atom l_atom = ligand[il]; + const FFParams l_params = forcefield[l_atom.type]; + const int lhphb_ltz = l_params.hphb<0.f; + const int lhphb_gtz = l_params.hphb>0.f; + + // Transform ligand atom + float lpos_x[WGSIZE], lpos_y[WGSIZE], lpos_z[WGSIZE]; + +#pragma omp simd + for (int l = 0; l < WGSIZE; l++) + { + lpos_x[l] = transform[0][3][l] + + l_atom.x * transform[0][0][l] + + l_atom.y * transform[0][1][l] + + l_atom.z * transform[0][2][l]; + lpos_y[l] = transform[1][3][l] + + l_atom.x * transform[1][0][l] + + l_atom.y * transform[1][1][l] + + l_atom.z * transform[1][2][l]; + lpos_z[l] = transform[2][3][l] + + l_atom.x * transform[2][0][l] + + l_atom.y * transform[2][1][l] + + l_atom.z * transform[2][2][l]; + } + + // Loop over protein atoms + int ip = 0; + do + { + // Load protein atom data + const Atom p_atom = protein[ip]; + const FFParams p_params = forcefield[p_atom.type]; + + const float radij = p_params.radius + l_params.radius; + const float r_radij = 1.f / radij; + + const float elcdst = + (p_params.hbtype==HBTYPE_F && l_params.hbtype==HBTYPE_F) + ? 4.f : 2.f; + const float elcdst1 = + (p_params.hbtype==HBTYPE_F && l_params.hbtype==HBTYPE_F) + ? 0.25f : 0.5f; + const int type_E = + ((p_params.hbtype==HBTYPE_E || l_params.hbtype==HBTYPE_E)); + + const int phphb_ltz = p_params.hphb < 0.f; + const int phphb_gtz = p_params.hphb > 0.f; + const int phphb_nz = p_params.hphb != 0.f; + const float p_hphb = + p_params.hphb * (phphb_ltz && lhphb_gtz ? -1.f : 1.f); + const float l_hphb = + l_params.hphb * (phphb_gtz && lhphb_ltz ? -1.f : 1.f); + const float distdslv = + (phphb_ltz ? (lhphb_ltz ? NPNPDIST : NPPDIST) + : (lhphb_ltz ? NPPDIST : -FLT_MAX)); + const float r_distdslv = 1.f / distdslv; + + const float chrg_init = l_params.elsc * p_params.elsc; + const float dslv_init = p_hphb + l_hphb; + +#pragma omp simd + for (int l = 0; l < WGSIZE; l++) + { + // Calculate distance between atoms + const float x = lpos_x[l] - p_atom.x; + const float y = lpos_y[l] - p_atom.y; + const float z = lpos_z[l] - p_atom.z; + const float distij = sqrtf(x*x + y*y + z*z); + + // Calculate the sum of the sphere radii + const float distbb = distij - radij; + + const int zone1 = (distbb < 0.f); + + // Calculate steric energy + etot[l] += (1.f - (distij*r_radij)) * (zone1 ? 2.f*HARDNESS : 0.f); + + // Calculate formal and dipole charge interactions + float chrg_e = chrg_init + * ((zone1 ? 1.f : (1.f - distbb*elcdst1)) + * (distbb