diff --git a/official/nlp/albert/run_squad.py b/official/nlp/albert/run_squad.py index 242e349b128..ed3c2da5372 100644 --- a/official/nlp/albert/run_squad.py +++ b/official/nlp/albert/run_squad.py @@ -19,9 +19,12 @@ from __future__ import print_function import json +import os +import time from absl import app from absl import flags +from absl import logging import tensorflow as tf from official.nlp.albert import configs as albert_configs @@ -53,7 +56,7 @@ def train_squad(strategy, def predict_squad(strategy, input_meta_data): - """Makes predictions for a squad dataset.""" + """Makes predictions for the squad dataset.""" bert_config = albert_configs.AlbertConfig.from_json_file( FLAGS.bert_config_file) tokenizer = tokenization.FullSentencePieceTokenizer( @@ -63,6 +66,18 @@ def predict_squad(strategy, input_meta_data): bert_config, squad_lib_sp) +def eval_squad(strategy, input_meta_data): + """Evaluate on the squad dataset.""" + bert_config = albert_configs.AlbertConfig.from_json_file( + FLAGS.bert_config_file) + tokenizer = tokenization.FullSentencePieceTokenizer( + sp_model_file=FLAGS.sp_model_file) + + eval_metrics = run_squad_helper.eval_squad( + strategy, input_meta_data, tokenizer, bert_config, squad_lib_sp) + return eval_metrics + + def export_squad(model_export_path, input_meta_data): """Exports a trained model as a `SavedModel` for inference. @@ -97,10 +112,25 @@ def main(_): num_gpus=FLAGS.num_gpus, all_reduce_alg=FLAGS.all_reduce_alg, tpu_address=FLAGS.tpu) - if FLAGS.mode in ('train', 'train_and_predict'): + + if 'train' in FLAGS.mode: train_squad(strategy, input_meta_data, run_eagerly=FLAGS.run_eagerly) - if FLAGS.mode in ('predict', 'train_and_predict'): + if 'predict' in FLAGS.mode: predict_squad(strategy, input_meta_data) + if 'eval' in FLAGS.mode: + eval_metrics = eval_squad(strategy, input_meta_data) + f1_score = eval_metrics['final_f1'] + logging.info('SQuAD eval F1-score: %f', f1_score) + summary_dir = os.path.join(FLAGS.model_dir, 'summaries', 'eval') + summary_writer = tf.summary.create_file_writer(summary_dir) + with summary_writer.as_default(): + # TODO(lehou): write to the correct step number. + tf.summary.scalar('F1-score', f1_score, step=0) + summary_writer.flush() + # Also write eval_metrics to json file. + squad_lib_sp.write_to_json_files( + eval_metrics, os.path.join(summary_dir, 'eval_metrics.json')) + time.sleep(60) if __name__ == '__main__': diff --git a/official/nlp/bert/run_squad.py b/official/nlp/bert/run_squad.py index 7ca62b13f98..2c84c7bcdee 100644 --- a/official/nlp/bert/run_squad.py +++ b/official/nlp/bert/run_squad.py @@ -20,7 +20,6 @@ import json import os -import tempfile import time from absl import app @@ -130,18 +129,15 @@ def main(_): eval_metrics = eval_squad(strategy, input_meta_data) f1_score = eval_metrics['final_f1'] logging.info('SQuAD eval F1-score: %f', f1_score) - if (not strategy) or strategy.extended.should_save_summary: - summary_dir = os.path.join(FLAGS.model_dir, 'summaries') - else: - summary_dir = tempfile.mkdtemp() - summary_writer = tf.summary.create_file_writer( - os.path.join(summary_dir, 'eval')) + summary_dir = os.path.join(FLAGS.model_dir, 'summaries', 'eval') + summary_writer = tf.summary.create_file_writer(summary_dir) with summary_writer.as_default(): # TODO(lehou): write to the correct step number. tf.summary.scalar('F1-score', f1_score, step=0) summary_writer.flush() - # Wait for some time, for the depending mldash/tensorboard jobs to finish - # exporting the final F1-score. + # Also write eval_metrics to json file. + squad_lib_wp.write_to_json_files( + eval_metrics, os.path.join(summary_dir, 'eval_metrics.json')) time.sleep(60)