Add yaml/perf scripts for new perf test pipeline (microsoft#3909)

* yaml/perf scripts for new pipeline * yaml/perf scripts for new pipeline * remove unused imports * testing some comments change * testing some comments change * testing jdbc * testing jdbc * testing jdbc * exclude pwd from jdbc properties * exclude pwd from jdbc properties * namedtuple * on comments Co-authored-by: Ethan Tao <[email protected]>
shaoboyan091 · May 13, 2020 · 93eb9bc · 93eb9bc
1 parent e86214e
commit 93eb9bc
Show file tree

Hide file tree

Showing 5 changed files with 138 additions and 3 deletions.
diff --git a/orttraining/orttraining/models/runner/training_runner.cc b/orttraining/orttraining/models/runner/training_runner.cc
@@ -820,6 +820,8 @@ Status TrainingRunner::SavePerfMetrics(const size_t number_of_batches, const siz
   perf_metrics_stream.open(perf_metrics_path, std::ios::out | std::ios::trunc);
   ORT_RETURN_IF_NOT(perf_metrics_stream << json_string << "\n", "Failed to write to output file.");
 
+  std::cout << "\n\nSaved perf metrics file: " << ToMBString(perf_metrics_path) << "\n\n";
+
   return Status::OK();
 }
 

diff --git a/orttraining/tools/ci_test/run_bert_perf_test.py b/orttraining/tools/ci_test/run_bert_perf_test.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import argparse
+import subprocess
+import sys
+import os
+from collections import namedtuple
+
+SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
+
+def parse_args():
+  parser = argparse.ArgumentParser(description="Runs BERT performance tests.")
+  parser.add_argument("--binary_dir", required=True,
+                      help="Path to the ORT binary directory.")
+  parser.add_argument("--training_data_root", required=True,
+                      help="Path to the training data root directory.")
+  parser.add_argument("--model_root", required=True,
+                      help="Path to the model root directory.")
+  return parser.parse_args()
+
+# using the same params from "GitHub Master Merge Schedule" in OneNotes
+def main():
+    args = parse_args()
+
+    Config = namedtuple('Config', ['use_mixed_precision', 'max_seq_length', 'batch_size', 'max_predictions_per_seq'])
+    configs = [
+        Config(True, 128, 66, 20),
+        Config(True, 512, 10, 80),
+        Config(False, 128, 33, 20),
+        Config(False, 512, 5, 80)
+    ]
+
+    # run BERT training
+    for c in configs:
+        print("######## testing name - " + ('fp16-' if c.use_mixed_precision else 'fp32-') + str(c.max_seq_length) + " ##############")
+        cmds = [
+            os.path.join(args.binary_dir, "onnxruntime_training_bert"),
+            "--model_name", os.path.join(
+                args.model_root, "nv/bert-large/bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.1_optimized_layer_norm"),
+            "--train_data_dir", os.path.join(
+                args.training_data_root, str(c.max_seq_length), "books_wiki_en_corpus/train"),
+            "--test_data_dir", os.path.join(
+                args.training_data_root, str(c.max_seq_length), "books_wiki_en_corpus/test"),
+            "--train_batch_size", str(c.batch_size),
+            "--mode", "train",
+            "--max_seq_length", str(c.max_seq_length),
+            "--num_train_steps", "100",
+            "--display_loss_steps", "5",
+            "--optimizer", "Lamb",
+            "--learning_rate", "3e-3",
+            "--warmup_ratio", "0.2843",
+            "--warmup_mode", "Poly",
+            "--gradient_accumulation_steps", "1",
+            "--max_predictions_per_seq", str(c.max_predictions_per_seq),
+            "--lambda", "0",
+            "--use_nccl",
+            "--perf_output_dir", os.path.join(SCRIPT_DIR, "results"), 
+        ]
+
+        if c.use_mixed_precision: 
+            cmds.append("--use_mixed_precision"),
+            cmds.append("--allreduce_in_fp16"),
+
+        subprocess.run(cmds).check_returncode()
+
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml
@@ -0,0 +1,59 @@
+trigger: none
+
+jobs:
+- job: Onnxruntime_Linux_GPU_Training_Perf_Test
+
+  timeoutInMinutes: 120
+
+  variables: 
+  - group: 'ortperf' # variable group
+
+  steps:
+  - checkout: self
+    clean: true
+    submodules: recursive
+
+  - script: >
+      tools/ci_build/github/linux/run_dockerbuild.sh
+      -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory)
+      -x "
+      --config RelWithDebInfo
+      --enable_training
+      --update --build
+      "
+    displayName: 'Build performance tests'
+
+  - script: >
+      docker run --gpus all --rm --name onnxruntime-gpu-perf 
+      --volume $(Build.SourcesDirectory):/onnxruntime_src 
+      --volume $(Build.BinariesDirectory):/build 
+      --volume /bert_ort/bert_models:/build/bert_models:ro 
+      --volume /bert_data:/build/bert_data:ro 
+      -e NIGHTLY_BUILD onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6
+      /usr/bin/python3.6 /onnxruntime_src/orttraining/tools/ci_test/run_bert_perf_test.py
+      --binary_dir /build/RelWithDebInfo 
+      --training_data_root /build/bert_data 
+      --model_root /build/bert_models
+    displayName: 'Run bert performance tests'
+
+  # generate jdbc.properties
+  - script: >
+      mkdir -p $(Build.SourcesDirectory)/tools/perf_util/src/main/resources &&
+      printf "url=jdbc:mysql://onnxruntimedashboard.mysql.database.azure.com/onnxruntime?serverTimezone=UTC&useUnicode=true&characterEncoding=UTF-8\nuser=powerbi@onnxruntimedashboard\npassword_env=ORT_PERF_PASSWORD" 
+      > $(Build.SourcesDirectory)/tools/perf_util/src/main/resources/jdbc.properties     
+    displayName: 'Create resource file'
+
+  - script: >
+      mvn package
+    displayName: 'Maven build'
+    workingDirectory: $(Build.SourcesDirectory)/tools/perf_util
+
+  # process json files
+  - script: >
+      java -cp target/send_perf_metrics-0.0.1-SNAPSHOT-jar-with-dependencies.jar com.msft.send_perf_metrics.App "$(Build.SourcesDirectory)/orttraining/tools/ci_test/results"
+    env:
+      ORT_PERF_PASSWORD: $(ortperf) 
+    displayName: 'Populate perf metrics'
+    workingDirectory: $(Build.SourcesDirectory)/tools/perf_util
+
+  - template: templates/clean-agent-build-directory-step.yml
diff --git a/tools/perf_util/src/main/java/com/msft/send_perf_metrics/App.java b/tools/perf_util/src/main/java/com/msft/send_perf_metrics/App.java
@@ -51,12 +51,12 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
 
 				if (!filename.startsWith(".") && filename.endsWith(".json")) {
 					perf_metrics.add(file);
+					System.out.println(filename);
 				}
 				return FileVisitResult.CONTINUE;
 			}
 
 		});
-		System.out.println(perf_metrics);
 
 		final Path cwd_dir = Paths.get(System.getProperty("user.dir"));
 		// git rev-parse HEAD
@@ -78,6 +78,7 @@ private static void processPerfMetrics(final List<Path> perf_metrics, String com
 										   String batch_id) throws Exception {
 		try {
 			Connection conn = JdbcUtil.GetConn();
+			System.out.println("MySQL DB connection established.\n");
 			// go thru each json file
 			JSONParser jsonParser = new JSONParser();
 			for (Path metrics_json : perf_metrics) {

diff --git a/tools/perf_util/src/main/java/com/msft/send_perf_metrics/JdbcUtil.java b/tools/perf_util/src/main/java/com/msft/send_perf_metrics/JdbcUtil.java
@@ -1,17 +1,19 @@
 package com.msft.send_perf_metrics;
 
 import java.sql.DriverManager;
+import java.util.Map;
 import java.util.Properties;
 
 public class JdbcUtil {
 	static java.sql.Connection GetConn() throws Exception {
 		try (java.io.InputStream in = App.class.getResourceAsStream("/jdbc.properties")) {
 			if (in == null)
-				throw new RuntimeException("err");
+				throw new RuntimeException("Error reading jdbc properties");
 			Properties props = new Properties();
 			props.load(in);
+			// loading password via env variable
 			return DriverManager.getConnection(props.getProperty("url"), props.getProperty("user"),
-					props.getProperty("password"));
+					System.getenv(props.getProperty("password_env")));
 		}
 	}
 }