From 0391d5bc6bd5e34de3d39abbc3e4d5e83b038519 Mon Sep 17 00:00:00 2001 From: Yeonwoo Sung Date: Sat, 2 Dec 2023 18:42:16 +0900 Subject: [PATCH] Create by Google Colab --- ...n_of_hallucination_with_SelfCheckGPT.ipynb | 7410 +++++++++++++++++ 1 file changed, 7410 insertions(+) create mode 100644 LLMs/src/Automatic_detection_of_hallucination_with_SelfCheckGPT.ipynb diff --git a/LLMs/src/Automatic_detection_of_hallucination_with_SelfCheckGPT.ipynb b/LLMs/src/Automatic_detection_of_hallucination_with_SelfCheckGPT.ipynb new file mode 100644 index 0000000..fa41ef1 --- /dev/null +++ b/LLMs/src/Automatic_detection_of_hallucination_with_SelfCheckGPT.ipynb @@ -0,0 +1,7410 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "d9bde1fc58e04ab7bfeb94f92a683335": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ee9565244a0d42b8b62d318c960ca1b1", + "IPY_MODEL_94326129d3fa4a98b4c61b11cccd7e4b", + "IPY_MODEL_98653d678d924112a43afd91f1b371d3" + ], + "layout": "IPY_MODEL_a6369928f21449d6a905b8284f490372" + } + }, + "ee9565244a0d42b8b62d318c960ca1b1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8181be7d095745809fd6804da72ac955", + "placeholder": "​", + "style": "IPY_MODEL_0896460c7d2e46628f45331cc3a18125", + "value": "Downloading readme: 100%" + } + }, + "94326129d3fa4a98b4c61b11cccd7e4b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c16d998a6a83462fab2481e4e99cea5f", + "max": 2450, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_48a038cb7f96429c811bf6cefa21afdd", + "value": 2450 + } + }, + "98653d678d924112a43afd91f1b371d3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b61da09ee33548ea8cde1e5664706201", + "placeholder": "​", + "style": "IPY_MODEL_59f79ea59f934188a39d1064ab26a77e", + "value": " 2.45k/2.45k [00:00<00:00, 141kB/s]" + } + }, + "a6369928f21449d6a905b8284f490372": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8181be7d095745809fd6804da72ac955": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0896460c7d2e46628f45331cc3a18125": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c16d998a6a83462fab2481e4e99cea5f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "48a038cb7f96429c811bf6cefa21afdd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b61da09ee33548ea8cde1e5664706201": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "59f79ea59f934188a39d1064ab26a77e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b0acae0b82ad4c23a5b947d046959ff6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_da632866e97848f5be8f37b0def5e9ef", + "IPY_MODEL_d92d83c5a312450fa9099a18352286d8", + "IPY_MODEL_42c9fffa0ca941f69bee3dedbb45c1d0" + ], + "layout": "IPY_MODEL_b85ddf53d7454614bcf5d7e3f8833b99" + } + }, + "da632866e97848f5be8f37b0def5e9ef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c40b1efe533e4ac692eb1340ee66038c", + "placeholder": "​", + "style": "IPY_MODEL_b75b9e1274ae423d957e3799ad6ba53f", + "value": "Downloading data files: 100%" + } + }, + "d92d83c5a312450fa9099a18352286d8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ad84e62374f04b02bdb3a93cfa78aaee", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9ac30c1c6cfe46d085a7b108e349e489", + "value": 1 + } + }, + "42c9fffa0ca941f69bee3dedbb45c1d0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f01b745be7124b7fb382727aaf99925b", + "placeholder": "​", + "style": "IPY_MODEL_bd76f84525c04bdd9604bd919a01fd2d", + "value": " 1/1 [00:00<00:00, 1.92it/s]" + } + }, + "b85ddf53d7454614bcf5d7e3f8833b99": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c40b1efe533e4ac692eb1340ee66038c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b75b9e1274ae423d957e3799ad6ba53f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ad84e62374f04b02bdb3a93cfa78aaee": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9ac30c1c6cfe46d085a7b108e349e489": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f01b745be7124b7fb382727aaf99925b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bd76f84525c04bdd9604bd919a01fd2d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ab995cba21af49919efc0e248cc8b481": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8425833a60ec43b5a694a796a0268774", + "IPY_MODEL_f0d0171ed71e4ac791a7639b53af4cd9", + "IPY_MODEL_e18ed091e6c4417d8a98aaa0b25720c9" + ], + "layout": "IPY_MODEL_1956f23b62f44397b8bd344063d04d7b" + } + }, + "8425833a60ec43b5a694a796a0268774": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a5f1bb6c09bc4e7b82682b39bb09ddcf", + "placeholder": "​", + "style": "IPY_MODEL_42923e07ee2e4a8ea3584a740b35f1c2", + "value": "Downloading data: 100%" + } + }, + "f0d0171ed71e4ac791a7639b53af4cd9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9c4308112b5e46e18af2ad71235fc6e7", + "max": 2561507, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_cd0670827319470c83f164542c88b6b4", + "value": 2561507 + } + }, + "e18ed091e6c4417d8a98aaa0b25720c9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d9a44782358746f3bc92c4c5e9b8e7ff", + "placeholder": "​", + "style": "IPY_MODEL_cc48a98421e0432492c5622e6284ce51", + "value": " 2.56M/2.56M [00:00<00:00, 5.15MB/s]" + } + }, + "1956f23b62f44397b8bd344063d04d7b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a5f1bb6c09bc4e7b82682b39bb09ddcf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "42923e07ee2e4a8ea3584a740b35f1c2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9c4308112b5e46e18af2ad71235fc6e7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cd0670827319470c83f164542c88b6b4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d9a44782358746f3bc92c4c5e9b8e7ff": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cc48a98421e0432492c5622e6284ce51": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5982f8275ac34c17a181b14e6345650c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e950601a746f4cf89431ad7400cdc1ab", + "IPY_MODEL_ee3cf65a76004312823b36fac5ad7d25", + "IPY_MODEL_9a803cf5f6c14106b298a47e8abbac60" + ], + "layout": "IPY_MODEL_6066ddef956841409de53d8d724a27fc" + } + }, + "e950601a746f4cf89431ad7400cdc1ab": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a2e41b48f9cd4370a60286d659dff719", + "placeholder": "​", + "style": "IPY_MODEL_2afafb253c4e4d29a69781906c21feea", + "value": "Extracting data files: 100%" + } + }, + "ee3cf65a76004312823b36fac5ad7d25": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0b78d6fb36f14f6c969b11ff22750320", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_449c6ccb64ee413fb58213a4a205b850", + "value": 1 + } + }, + "9a803cf5f6c14106b298a47e8abbac60": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f9caef79002c45f796768ee94dc8adc3", + "placeholder": "​", + "style": "IPY_MODEL_dacbe3e3cf8e4278a26ab93fa92e9177", + "value": " 1/1 [00:00<00:00, 39.91it/s]" + } + }, + "6066ddef956841409de53d8d724a27fc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a2e41b48f9cd4370a60286d659dff719": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2afafb253c4e4d29a69781906c21feea": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0b78d6fb36f14f6c969b11ff22750320": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "449c6ccb64ee413fb58213a4a205b850": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f9caef79002c45f796768ee94dc8adc3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dacbe3e3cf8e4278a26ab93fa92e9177": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c510a8c155ea4725ae83da4d4be55401": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_90fb7d7a6c77446b9a3666a439aa35b1", + "IPY_MODEL_fc82b37b5e2447eaa1ca2b1f741d1a6c", + "IPY_MODEL_6807b8a6e7da400f980e8c0599e752bd" + ], + "layout": "IPY_MODEL_2becd97b91dd4b489de2d586c3160f02" + } + }, + "90fb7d7a6c77446b9a3666a439aa35b1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_11eafa8b329e4e4687a522ee57c8715d", + "placeholder": "​", + "style": "IPY_MODEL_bf578d430dff4f578ae5b5a43067ddcd", + "value": "Generating evaluation split: 100%" + } + }, + "fc82b37b5e2447eaa1ca2b1f741d1a6c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_41ccdd351ca04e2e8d2d86e8afb789f9", + "max": 238, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6bb3c1577ea74a12b1eac78e06fd3e33", + "value": 238 + } + }, + "6807b8a6e7da400f980e8c0599e752bd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1cdac33b6a9642cb9051888d82b8ea01", + "placeholder": "​", + "style": "IPY_MODEL_a8b24a3070144a1d873f86d89b5aa64e", + "value": " 238/238 [00:00<00:00, 2249.88 examples/s]" + } + }, + "2becd97b91dd4b489de2d586c3160f02": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "11eafa8b329e4e4687a522ee57c8715d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bf578d430dff4f578ae5b5a43067ddcd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "41ccdd351ca04e2e8d2d86e8afb789f9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6bb3c1577ea74a12b1eac78e06fd3e33": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "1cdac33b6a9642cb9051888d82b8ea01": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a8b24a3070144a1d873f86d89b5aa64e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a54c92d5587542e2968bf15c8f5bc366": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_708d4f18c65d4f70bddb6f76d96359c9", + "IPY_MODEL_65de6148380a42d584d446f3694cc66e", + "IPY_MODEL_87a3765acf7443ecb47f823cdef319c5" + ], + "layout": "IPY_MODEL_0d7704d9dabf474db287a10547717d9f" + } + }, + "708d4f18c65d4f70bddb6f76d96359c9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d9c55a34c29d477a94d7cb9d8c954bda", + "placeholder": "​", + "style": "IPY_MODEL_f4537afacf844d158ad644dcd4166198", + "value": "tokenizer_config.json: 100%" + } + }, + "65de6148380a42d584d446f3694cc66e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_db4955a6aee24af6a0cf79ebd6bd69cf", + "max": 400, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7dc0b81690dd43339691baf1d2538bbf", + "value": 400 + } + }, + "87a3765acf7443ecb47f823cdef319c5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8136010cfba64dff87c09eb5c2f51910", + "placeholder": "​", + "style": "IPY_MODEL_ecd7fc85a08d403ab8c05d580f744e38", + "value": " 400/400 [00:00<00:00, 6.67kB/s]" + } + }, + "0d7704d9dabf474db287a10547717d9f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d9c55a34c29d477a94d7cb9d8c954bda": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f4537afacf844d158ad644dcd4166198": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "db4955a6aee24af6a0cf79ebd6bd69cf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7dc0b81690dd43339691baf1d2538bbf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8136010cfba64dff87c09eb5c2f51910": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ecd7fc85a08d403ab8c05d580f744e38": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4eed8bb3293c444f8b901c7c2cfa43a8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_184afe5ad421461497155f532b6289fe", + "IPY_MODEL_f5b7222d3cae4137b57c6c88c2424ed4", + "IPY_MODEL_db3bc9aaf24344bcae8034a129652f89" + ], + "layout": "IPY_MODEL_7e7d6c2cf6e14c04a36589129ee049a2" + } + }, + "184afe5ad421461497155f532b6289fe": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d1f0ed2e19c54effa7cc04ade6d610de", + "placeholder": "​", + "style": "IPY_MODEL_85148179679f4dbda22ce6b0d910bb04", + "value": "spm.model: 100%" + } + }, + "f5b7222d3cae4137b57c6c88c2424ed4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8e975437a7484193acac10f205bc3212", + "max": 2464616, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9fc15125ab9e4e7fb2f5f1502ebecbd8", + "value": 2464616 + } + }, + "db3bc9aaf24344bcae8034a129652f89": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_df41b063bc85475d999a0b0c981672e1", + "placeholder": "​", + "style": "IPY_MODEL_d478bba6452b4399b538d9c2f9c6827b", + "value": " 2.46M/2.46M [00:00<00:00, 9.25MB/s]" + } + }, + "7e7d6c2cf6e14c04a36589129ee049a2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d1f0ed2e19c54effa7cc04ade6d610de": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "85148179679f4dbda22ce6b0d910bb04": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8e975437a7484193acac10f205bc3212": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9fc15125ab9e4e7fb2f5f1502ebecbd8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "df41b063bc85475d999a0b0c981672e1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d478bba6452b4399b538d9c2f9c6827b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0eab6cfa210a49f0a725fafbe0acd125": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_aab69434fb9f4cee84533df70f1e06df", + "IPY_MODEL_c62d40c750ae45829cf0fe9e0e0cfb72", + "IPY_MODEL_cdcbefe5fc55421fbda161db3ae21441" + ], + "layout": "IPY_MODEL_5781c13d66b040f48c780b20f0e150d8" + } + }, + "aab69434fb9f4cee84533df70f1e06df": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_67ba0b82f66245e982a24d21dcc9d427", + "placeholder": "​", + "style": "IPY_MODEL_5965aa9cd4764be1a90cf8e70b1bc6c8", + "value": "added_tokens.json: 100%" + } + }, + "c62d40c750ae45829cf0fe9e0e0cfb72": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1e177fb0417b4397b478915667edd123", + "max": 23, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5751dcc2e2f04e968b6cea7739b908a8", + "value": 23 + } + }, + "cdcbefe5fc55421fbda161db3ae21441": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f04b4e6ab2e24bbd861646c0e42ba535", + "placeholder": "​", + "style": "IPY_MODEL_94c5382d4ffe4a71ab4d5bd9dd9f9625", + "value": " 23.0/23.0 [00:00<00:00, 435B/s]" + } + }, + "5781c13d66b040f48c780b20f0e150d8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "67ba0b82f66245e982a24d21dcc9d427": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5965aa9cd4764be1a90cf8e70b1bc6c8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1e177fb0417b4397b478915667edd123": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5751dcc2e2f04e968b6cea7739b908a8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f04b4e6ab2e24bbd861646c0e42ba535": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "94c5382d4ffe4a71ab4d5bd9dd9f9625": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9e1998ff08ef4e3db92f0d143ba3312c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_41d6269a541e4d978cdfef5559c4b4c2", + "IPY_MODEL_7c30a786258b4583b52dadcd88b62b97", + "IPY_MODEL_afb65a84d45042ef9a2d3e6ab1d33e09" + ], + "layout": "IPY_MODEL_5ca612d0a4104cf18f190bbb2622d1c7" + } + }, + "41d6269a541e4d978cdfef5559c4b4c2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_127b01c984854d0abe278cc49f025fb8", + "placeholder": "​", + "style": "IPY_MODEL_61b0ac1c9bf744bfa9be245627ceb95d", + "value": "special_tokens_map.json: 100%" + } + }, + "7c30a786258b4583b52dadcd88b62b97": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d6893e19ff3f46a89dc3fd7753697782", + "max": 173, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6c2d320a6fe14e9b91864864381980ee", + "value": 173 + } + }, + "afb65a84d45042ef9a2d3e6ab1d33e09": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7689b0e41f4c4e8782b777e54e54f23a", + "placeholder": "​", + "style": "IPY_MODEL_41ed51b66c69417aaa2e63db0125fc1e", + "value": " 173/173 [00:00<00:00, 3.94kB/s]" + } + }, + "5ca612d0a4104cf18f190bbb2622d1c7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "127b01c984854d0abe278cc49f025fb8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "61b0ac1c9bf744bfa9be245627ceb95d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d6893e19ff3f46a89dc3fd7753697782": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6c2d320a6fe14e9b91864864381980ee": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7689b0e41f4c4e8782b777e54e54f23a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "41ed51b66c69417aaa2e63db0125fc1e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c7f9a7f9951c4ceca087314bb37daa48": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_fe3b43c492554320861bd94992f998e7", + "IPY_MODEL_4176f7cc43424514af21a17e0d5c651f", + "IPY_MODEL_e31dd903bdcd4d308485e53b29efd3fc" + ], + "layout": "IPY_MODEL_8690171f376b4b81a1b55235cc9b5b14" + } + }, + "fe3b43c492554320861bd94992f998e7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cba91e119daa416baf5cd000ec68a3c5", + "placeholder": "​", + "style": "IPY_MODEL_2081af44876a4da2bbfbed670e7ef83e", + "value": "config.json: 100%" + } + }, + "4176f7cc43424514af21a17e0d5c651f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7f1a5012a8aa4dc59a99bc30193cd45f", + "max": 883, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_fedd63519da5400da5fad04da0c037e7", + "value": 883 + } + }, + "e31dd903bdcd4d308485e53b29efd3fc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c186a155c8ba451089a290ece12b68e6", + "placeholder": "​", + "style": "IPY_MODEL_6d820256f83d4c0eb0762f7a0373095e", + "value": " 883/883 [00:00<00:00, 19.8kB/s]" + } + }, + "8690171f376b4b81a1b55235cc9b5b14": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cba91e119daa416baf5cd000ec68a3c5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2081af44876a4da2bbfbed670e7ef83e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7f1a5012a8aa4dc59a99bc30193cd45f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fedd63519da5400da5fad04da0c037e7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c186a155c8ba451089a290ece12b68e6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6d820256f83d4c0eb0762f7a0373095e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e90cf8bf022447908a38b214824b204f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c777482a53364af48a8d3ba52a5b703c", + "IPY_MODEL_677bc1a9cece43778370f69315c8515d", + "IPY_MODEL_b6c3f3d7c9a14322bbc5dd415e61732d" + ], + "layout": "IPY_MODEL_268f65028b794f4ea1ebb1c062b421b0" + } + }, + "c777482a53364af48a8d3ba52a5b703c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a1529d621ebc4943aa8579abb9e19cc2", + "placeholder": "​", + "style": "IPY_MODEL_cf4ec242aa4b4d75b559e19f0074fada", + "value": "pytorch_model.bin: 100%" + } + }, + "677bc1a9cece43778370f69315c8515d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f1d689bdb5ff4d72acd373717677394e", + "max": 1740393387, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b3d9f823468d4fa8b108cecf42f301a7", + "value": 1740393387 + } + }, + "b6c3f3d7c9a14322bbc5dd415e61732d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_da2f49210445411787efb2ed61df2eb8", + "placeholder": "​", + "style": "IPY_MODEL_ac257c28849e4544ab348d7a9b0541f7", + "value": " 1.74G/1.74G [00:14<00:00, 170MB/s]" + } + }, + "268f65028b794f4ea1ebb1c062b421b0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a1529d621ebc4943aa8579abb9e19cc2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cf4ec242aa4b4d75b559e19f0074fada": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f1d689bdb5ff4d72acd373717677394e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b3d9f823468d4fa8b108cecf42f301a7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "da2f49210445411787efb2ed61df2eb8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ac257c28849e4544ab348d7a9b0541f7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Automatic detection of hallucination with SelfCheckGPT\n", + "\n", + "This notebook helps understand how hallucination metrics, such as SelfCheckGPT NLI score, can be used to automatically detect hallucinations.\n", + "\n", + "We will explore\n", + "- Heuristics on why LLMs hallucinate and how it could be automatically detected with metrics that measure sentences' inconsistency\n", + "- How to actually verify that hypothesis with the SelfCheckGPT NLI score on a real dataset derived from the WikiBio, to benchmark how accurate this metric is to detect hallucination automatically and reliably\n", + "\n", + "Our initial results show that this hallucination score has a rather calibrated recall, and high precision. This means that the higher the score, the more likely the model will be able to flag hallucinations (calibrated recall), and any flagged hallucination is almost certainly one (high precision, aka low false positive).\n", + "\n", + "As we work at [Mithril Security](https://www.mithrilsecurity.io/) on Confidential and Trustworthy Conversational AI, being able to know when an LLM is not to be trusted is paramount.\n", + "You can try BlindChat, our open-source and Confidential Conversational AI (aka any data sent to our AI remains private and not even our admins can see your prompts) at [chat.mithrilsecurity.io](https://chat.mithrilsecurity.io/).\n", + "\n", + "While the hallucination detection feature is not yet available in BlindChat, if you are interested in it, you can register [here](https://www.mithrilsecurity.io/registration-for-automated-hallucination-detection-in-blindchat) to show your interest in it so we know how to prioritize it and notify you when it is available." + ], + "metadata": { + "id": "rEa8AIYvuDRX" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Context" + ], + "metadata": { + "id": "Kp2xirrmpyXk" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Heuristic on hallucination origins\n", + "\n", + "LLMS have shown tremendous potential recently, but their tendency to hallucinate wrong facts when prompted on specific tasks has made them unreliable in many real world settings.\n", + "\n", + "For instance, if one were to deploy an LLM to help triage patients and answer simple medical questions, having an LLM hallucinate answers that are not medically grounded could have terrible consequences.\n", + "\n", + "Hallucinations often arise from the fact that LLMs are asked to answer prompts whose task / input / output was not present in the training set, and therefore will produce an answer not based on any ground truth.\n", + "\n", + "This makes sense when one knows that those models are taught to produce the most probable next token according to the statistics of their training set.\n", + "\n", + "Work from [McCoy, R. T., Yao, S., Friedman, D., Hardy, M., & Griffiths, T. L. (2023). Embers of Autoregression: Understanding Large Language Models Through the Problem They are Trained to Solve.](https://arxiv.org/abs/2309.13638)\n", + "shows that unseen tasks / outputs / inputs in the training set are the reason why LLMs hallucinate.\n", + "\n", + "![](https://github.com/dhuynh95/hallucination_article/blob/main/embers_graph.png?raw=true)\n", + "\n", + "For instance, they show that on the simple task of doing a Cesar Cipher of 13 (aka shifting every letter by 13 to hide information), GPT4 is rather accurate. However, when asked to do it with a shift of 2, its accuracy decreases from 0.5 to almost 0. This is most likely due to the fact that the Internet is full of examples of a shift of 13 (as doing it twice sends back to the original message), while examples of Cesar of 2 are much less common.\n", + "\n", + "We can see the same patterns when the prompt and answer are not seen in the training set.\n", + "\n", + "This means that unlikely outputs, aka ones where the next token has a low score, will most likely be unfactual and several samples from the same prompt will generate inconsistent results." + ], + "metadata": { + "id": "r_20uEgdp4W_" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Automatic hallucination detection\n", + "\n", + "This insight is leveraged by SelfCheckGPT ([Manakul, P., Liusie, A., & Gales, M. J. F. (2023). SelfCheckGPT: Zero-Resource Black-Box Hallucination Detection for Generative Large Language Models.](https://arxiv.org/abs/2303.08896)), as several samples of the same prompt are drawn, and used to detect inconsistencies among them. The higher the inconsistencies, the more likely the LLM is hallucinating.\n", + "\n", + "The way SelfCheckGPT NLI provides a hallucination score for a given prompt to a given LLM (e.g. GPT4 or any open-source LLM):\n", + "- Greedily sample the answer $r$ to the prompt\n", + "- Sample $N$ more answers ${S^n}, n \\in [[1,N]]$ from the LLM\n", + "- For each sentence in $r_i$ of $r$, for each sampled answer $S_n$, compute the likelihood there is a contradiction between $r_i$ and $S^n$ using a model for Natural Language Inference (NLI) like [DeBERTa-v3-large](https://huggingface.co/microsoft/deberta-v3-large). The more likely there is contradiction, the more the score will be close to 1, and vice versa if there is entailment.\n", + "$$P(\\text{contradict} | r_i, S^n) = \\frac{\\exp(z_c)}{\\exp(z_e) + \\exp(z_c)}$$\n", + "- Compute the hallucination score of the sentence $r_i$ by averaging it over the $N$ samples:\n", + "$$S_{\\text{NLI}}(i) = \\frac{1}{N} \\sum_{n=1}^{N} P(\\text{contradict} | r_i, S^n)$$\n", + "\n", + "Note that the SelfCheckGPT NLI score has several advantages:\n", + "- It works in a blackbox setting, aka there is no need to have access to the weights or the log probabilities, which means it works with both closed-source models being APIs or fully transparent open-source models\n", + "- It works for free text generation, aka it covers almost any task, be it summarization, question answering in free form, or classification\n", + "\n", + "The reasoning why such an inconsistency score can be used to automatically detect hallucinations is the following:\n", + "- The less seen in the training set a specific task is, the more the LLM will be hallucinating (cf. the Embers of autoregression paper mentioned earlier)\n", + "- The less seen a specific task is seen in the training set, the less confident the LLM will be in the next token to choose (aka higher entropy and the most likely token will have a low score, let's say 0.3, versus a very certain output of 0.9)\n", + "- The higher the entropy, the more diverse and inconsistent different samples from the same prompt will be\n", + "- The more inconsistent the samples, the higher a metric which looks at inconsistency between sentences, like SelfCheckGPT NLI score, is" + ], + "metadata": { + "id": "wj3r53uEFh4Q" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Exploration of hallucination score on WikiBio\n", + "\n", + "Now that we have understood how an inconsistency score like SelfCheckGPT NLI can be used to detect hallucinations, let's see its performance in practice.\n", + "\n", + "To do so, we will use the Wiki Bio hallucination dataset curated by the authors of SelfCheckGPT. It can be found on Hugging Face [here](https://huggingface.co/datasets/potsawee/wiki_bio_gpt3_hallucination).\n", + "\n", + "To test whether or not a model is hallucinating, they constructed a dataset where they asked GPT-3 to generate description of topics with the prompt format **\"This is a Wikipedia passage about {concept}:\"**, recorded the output, and then manually labelled each sentence of the generated text by humans to have a gold standard about factuality. The labels were \"Accurate\" (0), \"Minor Inaccurate\" (0.5) and \"Major Inaccurate\" (1). \n", + "\n", + "Then they generated $N=20$ additional samples, that will be used to detect hallucination through inconsistency scoring." + ], + "metadata": { + "id": "_WB3IjhToaA4" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Setup\n", + "\n", + "First we install the needed libraries." + ], + "metadata": { + "id": "CkpuJZOYxQqL" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2QB02xTi6XLE", + "outputId": "c93690e4-76fb-4850-b3b6-0efcc7885319" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.35.2)\n", + "Collecting datasets\n", + " Downloading datasets-2.15.0-py3-none-any.whl (521 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m521.2/521.2 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.4)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.0)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.1)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n", + "Collecting pyarrow-hotfix (from datasets)\n", + " Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n", + "Collecting dill<0.3.8,>=0.3.0 (from datasets)\n", + " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m17.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n", + "Collecting multiprocess (from datasets)\n", + " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.3)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.5.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.11.17)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3.post1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", + "Installing collected packages: pyarrow-hotfix, dill, multiprocess, datasets\n", + "Successfully installed datasets-2.15.0 dill-0.3.7 multiprocess-0.70.15 pyarrow-hotfix-0.6\n", + "Collecting sentencepiece\n", + " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: sentencepiece\n", + "Successfully installed sentencepiece-0.1.99\n", + "Collecting selfcheckgpt\n", + " Downloading selfcheckgpt-0.1.4.tar.gz (14 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: transformers>=4.11.3 in /usr/local/lib/python3.10/dist-packages (from selfcheckgpt) (4.35.2)\n", + "Requirement already satisfied: torch>=1.10 in /usr/local/lib/python3.10/dist-packages (from selfcheckgpt) (2.1.0+cu118)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from selfcheckgpt) (1.23.5)\n", + "Collecting bert_score (from selfcheckgpt)\n", + " Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.1/61.1 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: spacy in /usr/local/lib/python3.10/dist-packages (from selfcheckgpt) (3.6.1)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from selfcheckgpt) (3.8.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->selfcheckgpt) (3.13.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->selfcheckgpt) (4.5.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->selfcheckgpt) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->selfcheckgpt) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->selfcheckgpt) (3.1.2)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->selfcheckgpt) (2023.6.0)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->selfcheckgpt) (2.1.0)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.11.3->selfcheckgpt) (0.19.4)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.11.3->selfcheckgpt) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.11.3->selfcheckgpt) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.11.3->selfcheckgpt) (2023.6.3)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers>=4.11.3->selfcheckgpt) (2.31.0)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.11.3->selfcheckgpt) (0.15.0)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.11.3->selfcheckgpt) (0.4.1)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.11.3->selfcheckgpt) (4.66.1)\n", + "Requirement already satisfied: pandas>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from bert_score->selfcheckgpt) (1.5.3)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from bert_score->selfcheckgpt) (3.7.1)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->selfcheckgpt) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->selfcheckgpt) (1.3.2)\n", + "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (3.0.12)\n", + "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (1.0.5)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (1.0.10)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (2.0.8)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (3.0.9)\n", + "Requirement already satisfied: thinc<8.2.0,>=8.1.8 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (8.1.12)\n", + "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (1.1.2)\n", + "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (2.4.8)\n", + "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (2.0.10)\n", + "Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (0.9.0)\n", + "Requirement already satisfied: pathy>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (0.10.3)\n", + "Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (6.4.0)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (1.10.13)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (67.7.2)\n", + "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy->selfcheckgpt) (3.3.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.1->bert_score->selfcheckgpt) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.1->bert_score->selfcheckgpt) (2023.3.post1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.11.3->selfcheckgpt) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.11.3->selfcheckgpt) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.11.3->selfcheckgpt) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers>=4.11.3->selfcheckgpt) (2023.11.17)\n", + "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.10/dist-packages (from thinc<8.2.0,>=8.1.8->spacy->selfcheckgpt) (0.7.11)\n", + "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.2.0,>=8.1.8->spacy->selfcheckgpt) (0.1.4)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10->selfcheckgpt) (2.1.3)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert_score->selfcheckgpt) (1.2.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert_score->selfcheckgpt) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert_score->selfcheckgpt) (4.45.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert_score->selfcheckgpt) (1.4.5)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert_score->selfcheckgpt) (9.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->bert_score->selfcheckgpt) (3.1.1)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10->selfcheckgpt) (1.3.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas>=1.0.1->bert_score->selfcheckgpt) (1.16.0)\n", + "Building wheels for collected packages: selfcheckgpt\n", + " Building wheel for selfcheckgpt (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for selfcheckgpt: filename=selfcheckgpt-0.1.4-py3-none-any.whl size=12240 sha256=b84177b1233dcd1d27681e31c4adedd25d5ec594ef3b08e0a47f73c8f4477c45\n", + " Stored in directory: /root/.cache/pip/wheels/dc/a6/76/107247d4924cb23f6e1b1987413b2a191979767e1e2c1d5751\n", + "Successfully built selfcheckgpt\n", + "Installing collected packages: bert_score, selfcheckgpt\n", + "Successfully installed bert_score-0.3.13 selfcheckgpt-0.1.4\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.2.2)\n", + "Collecting scikit-learn\n", + " Downloading scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.8/10.8 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy<2.0,>=1.17.3 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.23.5)\n", + "Requirement already satisfied: scipy>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.11.4)\n", + "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.2.0)\n", + "Installing collected packages: scikit-learn\n", + " Attempting uninstall: scikit-learn\n", + " Found existing installation: scikit-learn 1.2.2\n", + " Uninstalling scikit-learn-1.2.2:\n", + " Successfully uninstalled scikit-learn-1.2.2\n", + "Successfully installed scikit-learn-1.3.2\n" + ] + } + ], + "source": [ + "!pip install transformers datasets\n", + "!pip install sentencepiece\n", + "!pip install selfcheckgpt\n", + "!pip install -U scikit-learn" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Now we will download the Wiki bio dataset." + ], + "metadata": { + "id": "dLK_le9gQQgR" + } + }, + { + "cell_type": "code", + "source": [ + "from datasets import load_dataset\n", + "\n", + "dataset = load_dataset(\"potsawee/wiki_bio_gpt3_hallucination\")" + ], + "metadata": { + "id": "UM4mhgxdYeVJ", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 177, + "referenced_widgets": [ + "d9bde1fc58e04ab7bfeb94f92a683335", + "ee9565244a0d42b8b62d318c960ca1b1", + "94326129d3fa4a98b4c61b11cccd7e4b", + "98653d678d924112a43afd91f1b371d3", + "a6369928f21449d6a905b8284f490372", + "8181be7d095745809fd6804da72ac955", + "0896460c7d2e46628f45331cc3a18125", + "c16d998a6a83462fab2481e4e99cea5f", + "48a038cb7f96429c811bf6cefa21afdd", + "b61da09ee33548ea8cde1e5664706201", + "59f79ea59f934188a39d1064ab26a77e", + "b0acae0b82ad4c23a5b947d046959ff6", + "da632866e97848f5be8f37b0def5e9ef", + "d92d83c5a312450fa9099a18352286d8", + "42c9fffa0ca941f69bee3dedbb45c1d0", + "b85ddf53d7454614bcf5d7e3f8833b99", + "c40b1efe533e4ac692eb1340ee66038c", + "b75b9e1274ae423d957e3799ad6ba53f", + "ad84e62374f04b02bdb3a93cfa78aaee", + "9ac30c1c6cfe46d085a7b108e349e489", + "f01b745be7124b7fb382727aaf99925b", + "bd76f84525c04bdd9604bd919a01fd2d", + "ab995cba21af49919efc0e248cc8b481", + "8425833a60ec43b5a694a796a0268774", + "f0d0171ed71e4ac791a7639b53af4cd9", + "e18ed091e6c4417d8a98aaa0b25720c9", + "1956f23b62f44397b8bd344063d04d7b", + "a5f1bb6c09bc4e7b82682b39bb09ddcf", + "42923e07ee2e4a8ea3584a740b35f1c2", + "9c4308112b5e46e18af2ad71235fc6e7", + "cd0670827319470c83f164542c88b6b4", + "d9a44782358746f3bc92c4c5e9b8e7ff", + "cc48a98421e0432492c5622e6284ce51", + "5982f8275ac34c17a181b14e6345650c", + "e950601a746f4cf89431ad7400cdc1ab", + "ee3cf65a76004312823b36fac5ad7d25", + "9a803cf5f6c14106b298a47e8abbac60", + "6066ddef956841409de53d8d724a27fc", + "a2e41b48f9cd4370a60286d659dff719", + "2afafb253c4e4d29a69781906c21feea", + "0b78d6fb36f14f6c969b11ff22750320", + "449c6ccb64ee413fb58213a4a205b850", + "f9caef79002c45f796768ee94dc8adc3", + "dacbe3e3cf8e4278a26ab93fa92e9177", + "c510a8c155ea4725ae83da4d4be55401", + "90fb7d7a6c77446b9a3666a439aa35b1", + "fc82b37b5e2447eaa1ca2b1f741d1a6c", + "6807b8a6e7da400f980e8c0599e752bd", + "2becd97b91dd4b489de2d586c3160f02", + "11eafa8b329e4e4687a522ee57c8715d", + "bf578d430dff4f578ae5b5a43067ddcd", + "41ccdd351ca04e2e8d2d86e8afb789f9", + "6bb3c1577ea74a12b1eac78e06fd3e33", + "1cdac33b6a9642cb9051888d82b8ea01", + "a8b24a3070144a1d873f86d89b5aa64e" + ] + }, + "outputId": "5411f306-8f77-41a0-eef4-6261604e1daf" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading readme: 0%| | 0.00/2.45k [00:00\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gpt3_textwiki_bio_textgpt3_sentencesannotationwiki_bio_test_idxgpt3_text_samplessent_scores_nli
0John Russell Reynolds (1820–1876) was an Engli...Sir John Russell Reynolds, 1st Baronet (22 May...[John Russell Reynolds (1820–1876) was an Engl...[major_inaccurate, major_inaccurate, major_ina...62464[John Russell Reynolds (1 November 1829 – 11 ...None
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "- gpt3_text is the output of the prompt \"This is a Wikipedia passage about {concept}:\"\n", + "- wiki_bio_text is the ground truth\n", + "- gpt3_sentences is gpt3_text split into sentences\n", + "- annotation is the label\n", + "- gpt3_text_samples are the $N$ samples generated to detect inconsistency." + ], + "metadata": { + "id": "9CJjzOVVTQuI" + } + }, + { + "cell_type": "markdown", + "source": [ + "We can have a look here at samples:" + ], + "metadata": { + "id": "77LAY9ucQXDM" + } + }, + { + "cell_type": "code", + "source": [ + "example = dataset[\"evaluation\"][0]\n", + "\n", + "sentences = example[\"gpt3_sentences\"]\n", + "samples = example[\"gpt3_text_samples\"]\n", + "annotation = example[\"annotation\"]\n", + "\n", + "sentences, samples[0], annotation" + ], + "metadata": { + "id": "84XcvK6VlSHK", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "7268ac6c-7441-49e3-8d06-1f2b81855c51" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(['John Russell Reynolds (1820–1876) was an English lawyer, judge, and author.',\n", + " 'He was born in London, the son of a barrister, and was educated at Eton College and Trinity College, Cambridge.',\n", + " \"He was called to the bar in 1845, and became a Queen's Counsel in 1859.\",\n", + " 'He was appointed a judge of the Court of Common Pleas in 1867, and was knighted in 1871.',\n", + " 'Reynolds was a prolific author, writing on a wide range of topics.',\n", + " 'He wrote several books on legal topics, including The Law of Libel and Slander (1863), The Law of Copyright (1865), and The Law of Patents for Inventions (1868).',\n", + " 'He also wrote on a variety of other topics, including history, biography, and literature.',\n", + " 'He was a frequent contributor to the Saturday Review, and wrote several books on Shakespeare, including The Mystery of William Shakespeare (1848) and The Authorship of Shakespeare (1875).',\n", + " 'He also wrote a biography of the poet John Keats (1848).'],\n", + " 'John Russell Reynolds (1 November 1829 – 11 March 1907) was an English lexicographer, editor and author. Born in London, he was the eldest son of the first Lord Ogmore, and was educated at Trinity College, Oxford, where he graduated B.A. in 1852 and became a Fellow in 1854. He was president of Magdalen Hall from 1864 to 1884, and from 1864 to 1883 was assistant-editor to the Oxford English Dictionary under James Murray. \\n\\nHe was a permanent contributor to The Saturday Review, and wrote several books about the House of Commons. He also compiled dictionaries of quotations and biographies and edited collections of newspaper articles. He had a particular interest in the works of Christian mystics, writing studies of the lives and works of Saints Augustine and Thomas à Kempis. For his edition of Thomas à Kempis\\' \"The Imitation of Christ\", first published in 1875, he wrote a biographical introduction.',\n", + " ['major_inaccurate',\n", + " 'major_inaccurate',\n", + " 'major_inaccurate',\n", + " 'major_inaccurate',\n", + " 'major_inaccurate',\n", + " 'major_inaccurate',\n", + " 'major_inaccurate',\n", + " 'major_inaccurate',\n", + " 'major_inaccurate'])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Computing the NLI Scores" + ], + "metadata": { + "id": "jsJmitbtxZ-g" + } + }, + { + "cell_type": "markdown", + "source": [ + "Recalculate the NLI scores of the original wiki bio dataset." + ], + "metadata": { + "id": "YIhhXXcw5_cl" + } + }, + { + "cell_type": "code", + "source": [ + "from tqdm import tqdm\n", + "from selfcheckgpt.modeling_selfcheck import SelfCheckNLI\n", + "import torch\n", + "\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "selfcheck_nli = SelfCheckNLI(device=device) # set device to 'cuda' if GPU is available\n", + "\n", + "for index, example in tqdm(df.iterrows()):\n", + " sentences = example[\"gpt3_sentences\"]\n", + " samples = example[\"gpt3_text_samples\"]\n", + " sent_scores_nli = selfcheck_nli.predict(\n", + " sentences = sentences, # list of sentences\n", + " sampled_passages = samples, # list of sampled passages\n", + " )\n", + " df.loc[index, \"sent_scores_nli\"] = str(list(sent_scores_nli)) # Store the scores in the sent_scores_nli column\n", + " df.to_csv(\"./wiki_bio_gpt3_hallucination.csv\", index=False)" + ], + "metadata": { + "id": "OZJAv27Lp6io", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 244, + "referenced_widgets": [ + "a54c92d5587542e2968bf15c8f5bc366", + "708d4f18c65d4f70bddb6f76d96359c9", + "65de6148380a42d584d446f3694cc66e", + "87a3765acf7443ecb47f823cdef319c5", + "0d7704d9dabf474db287a10547717d9f", + "d9c55a34c29d477a94d7cb9d8c954bda", + "f4537afacf844d158ad644dcd4166198", + "db4955a6aee24af6a0cf79ebd6bd69cf", + "7dc0b81690dd43339691baf1d2538bbf", + "8136010cfba64dff87c09eb5c2f51910", + "ecd7fc85a08d403ab8c05d580f744e38", + "4eed8bb3293c444f8b901c7c2cfa43a8", + "184afe5ad421461497155f532b6289fe", + "f5b7222d3cae4137b57c6c88c2424ed4", + "db3bc9aaf24344bcae8034a129652f89", + "7e7d6c2cf6e14c04a36589129ee049a2", + "d1f0ed2e19c54effa7cc04ade6d610de", + "85148179679f4dbda22ce6b0d910bb04", + "8e975437a7484193acac10f205bc3212", + "9fc15125ab9e4e7fb2f5f1502ebecbd8", + "df41b063bc85475d999a0b0c981672e1", + "d478bba6452b4399b538d9c2f9c6827b", + "0eab6cfa210a49f0a725fafbe0acd125", + "aab69434fb9f4cee84533df70f1e06df", + "c62d40c750ae45829cf0fe9e0e0cfb72", + "cdcbefe5fc55421fbda161db3ae21441", + "5781c13d66b040f48c780b20f0e150d8", + "67ba0b82f66245e982a24d21dcc9d427", + "5965aa9cd4764be1a90cf8e70b1bc6c8", + "1e177fb0417b4397b478915667edd123", + "5751dcc2e2f04e968b6cea7739b908a8", + "f04b4e6ab2e24bbd861646c0e42ba535", + "94c5382d4ffe4a71ab4d5bd9dd9f9625", + "9e1998ff08ef4e3db92f0d143ba3312c", + "41d6269a541e4d978cdfef5559c4b4c2", + "7c30a786258b4583b52dadcd88b62b97", + "afb65a84d45042ef9a2d3e6ab1d33e09", + "5ca612d0a4104cf18f190bbb2622d1c7", + "127b01c984854d0abe278cc49f025fb8", + "61b0ac1c9bf744bfa9be245627ceb95d", + "d6893e19ff3f46a89dc3fd7753697782", + "6c2d320a6fe14e9b91864864381980ee", + "7689b0e41f4c4e8782b777e54e54f23a", + "41ed51b66c69417aaa2e63db0125fc1e", + "c7f9a7f9951c4ceca087314bb37daa48", + "fe3b43c492554320861bd94992f998e7", + "4176f7cc43424514af21a17e0d5c651f", + "e31dd903bdcd4d308485e53b29efd3fc", + "8690171f376b4b81a1b55235cc9b5b14", + "cba91e119daa416baf5cd000ec68a3c5", + "2081af44876a4da2bbfbed670e7ef83e", + "7f1a5012a8aa4dc59a99bc30193cd45f", + "fedd63519da5400da5fad04da0c037e7", + "c186a155c8ba451089a290ece12b68e6", + "6d820256f83d4c0eb0762f7a0373095e", + "e90cf8bf022447908a38b214824b204f", + "c777482a53364af48a8d3ba52a5b703c", + "677bc1a9cece43778370f69315c8515d", + "b6c3f3d7c9a14322bbc5dd415e61732d", + "268f65028b794f4ea1ebb1c062b421b0", + "a1529d621ebc4943aa8579abb9e19cc2", + "cf4ec242aa4b4d75b559e19f0074fada", + "f1d689bdb5ff4d72acd373717677394e", + "b3d9f823468d4fa8b108cecf42f301a7", + "da2f49210445411787efb2ed61df2eb8", + "ac257c28849e4544ab348d7a9b0541f7" + ] + }, + "outputId": "ef8f0283-1694-4a02-c621-93bad9cc8bf1" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "tokenizer_config.json: 0%| | 0.00/400 [00:00] 4.86M --.-KB/s in 0.05s \n", + "\n", + "2023-12-02 07:07:30 (93.9 MB/s) - ‘wiki_bio_gpt3_hallucination.csv’ saved [5100772/5100772]\n", + "\n", + "FINISHED --2023-12-02 07:07:30--\n", + "Total wall clock time: 2.2s\n", + "Downloaded: 1 files, 4.9M in 0.05s (93.9 MB/s)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.read_csv(\"./wiki_bio_gpt3_hallucination.csv\")\n", + "df" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 597 + }, + "id": "QBJ_IaYJx_4s", + "outputId": "56911bf4-3cd2-4fe6-d2ba-853018cfe5e7" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " gpt3_text \\\n", + "0 John Russell Reynolds (1820–1876) was an Engli... \n", + "1 Matthew Aylmer, 1st Baron Aylmer (1708–1794) w... \n", + "2 Rick Mahler (born Richard Alan Mahler on April... \n", + "3 James Blair (1732–1782) was an American lawyer... \n", + "4 Tim Finchem (born August 24, 1947) is an Ameri... \n", + ".. ... \n", + "233 Gündüz Kılıç (born 28 April 1988) is a Turkish... \n", + "234 Michael Replogle (born 1951) is an American en... \n", + "235 Billy Burke (born October 28, 1894 – died Apri... \n", + "236 Ted Childs (born October 15, 1956) is an Ameri... \n", + "237 Edward Synge (1714–1798) was an Irish Anglican... \n", + "\n", + " wiki_bio_text \\\n", + "0 Sir John Russell Reynolds, 1st Baronet (22 May... \n", + "1 Admiral of the Fleet Matthew Aylmer, 1st Baron... \n", + "2 Richard Keith Mahler (August 5, 1953 in Austin... \n", + "3 James Blair (September 26, 1786 - April 1, 183... \n", + "4 Timothy W. Finchem (born April 19, 1947) is th... \n", + ".. ... \n", + "233 Baba Gündüz Kılıç (1918-1980) was a Turkish fo... \n", + "234 Michael Replogle is an internationally recogni... \n", + "235 William John Burke (Polonized as Burkeauskas; ... \n", + "236 Ted Childs commenced training as a programme d... \n", + "237 Edward Synge (1659–1741) was an Anglican clerg... \n", + "\n", + " gpt3_sentences \\\n", + "0 ['John Russell Reynolds (1820–1876) was an Eng... \n", + "1 ['Matthew Aylmer, 1st Baron Aylmer (1708–1794)... \n", + "2 ['Rick Mahler (born Richard Alan Mahler on Apr... \n", + "3 ['James Blair (1732–1782) was an American lawy... \n", + "4 ['Tim Finchem (born August 24, 1947) is an Ame... \n", + ".. ... \n", + "233 ['Gündüz Kılıç (born 28 April 1988) is a Turki... \n", + "234 ['Michael Replogle (born 1951) is an American ... \n", + "235 ['Billy Burke (born October 28, 1894 – died Ap... \n", + "236 ['Ted Childs (born October 15, 1956) is an Ame... \n", + "237 ['Edward Synge (1714–1798) was an Irish Anglic... \n", + "\n", + " annotation wiki_bio_test_idx \\\n", + "0 ['major_inaccurate', 'major_inaccurate', 'majo... 62464 \n", + "1 ['minor_inaccurate', 'minor_inaccurate', 'mino... 49661 \n", + "2 ['minor_inaccurate', 'minor_inaccurate', 'accu... 20483 \n", + "3 ['minor_inaccurate', 'major_inaccurate', 'majo... 71174 \n", + "4 ['minor_inaccurate', 'accurate', 'major_inaccu... 39945 \n", + ".. ... ... \n", + "233 ['minor_inaccurate', 'major_inaccurate', 'majo... 25585 \n", + "234 ['accurate', 'accurate', 'accurate', 'accurate... 10740 \n", + "235 ['minor_inaccurate', 'major_inaccurate', 'majo... 41463 \n", + "236 ['major_inaccurate', 'major_inaccurate', 'majo... 57341 \n", + "237 ['minor_inaccurate', 'minor_inaccurate', 'accu... 66046 \n", + "\n", + " gpt3_text_samples \\\n", + "0 ['John Russell Reynolds (1 November 1829 – 11... \n", + "1 ['\"Matthew Aylmer, 1st Baron Aylmer (c. 1650–1... \n", + "2 ['Rick Mahler (January 8, 1956 – May 25, 2005)... \n", + "3 ['James Blair (April 2, 1755 – March 8, 1842) ... \n", + "4 ['\"Tim Finchem (born May 27, 1953) is an Ameri... \n", + ".. ... \n", + "233 [\"Gündüz Kılıç (1518 – 1567) was an Ottoman na... \n", + "234 [\"Michael Replogle (born 1946) is an American ... \n", + "235 ['Billy Burke (21 August 1882 – 22 December 19... \n", + "236 ['\"Ted Childs was an American actor and busine... \n", + "237 [\"Edward Synge (1562-1641) was an English-born... \n", + "\n", + " sent_scores_nli \n", + "0 [0.8696355807129293, 0.9287475407123565, 0.931... \n", + "1 [0.9112446781247854, 0.9620911836624145, 0.997... \n", + "2 [0.9891034990549088, 0.4388777802581899, 0.955... \n", + "3 [0.9353850647807121, 0.8861920005059801, 0.993... \n", + "4 [0.9961978942155838, 0.2596603611658793, 0.992... \n", + ".. ... \n", + "233 [0.9997160047292709, 0.998373419046402, 0.9956... \n", + "234 [0.35181010272353885, 0.37309717537864345, 0.0... \n", + "235 [0.9992900729179383, 0.9886163860559464, 0.996... \n", + "236 [0.9853663831949234, 0.7629887842107564, 0.920... \n", + "237 [0.9601712554693222, 0.9915205985307693, 0.923... \n", + "\n", + "[238 rows x 7 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gpt3_textwiki_bio_textgpt3_sentencesannotationwiki_bio_test_idxgpt3_text_samplessent_scores_nli
0John Russell Reynolds (1820–1876) was an Engli...Sir John Russell Reynolds, 1st Baronet (22 May...['John Russell Reynolds (1820–1876) was an Eng...['major_inaccurate', 'major_inaccurate', 'majo...62464['John Russell Reynolds (1 November 1829 – 11...[0.8696355807129293, 0.9287475407123565, 0.931...
1Matthew Aylmer, 1st Baron Aylmer (1708–1794) w...Admiral of the Fleet Matthew Aylmer, 1st Baron...['Matthew Aylmer, 1st Baron Aylmer (1708–1794)...['minor_inaccurate', 'minor_inaccurate', 'mino...49661['\"Matthew Aylmer, 1st Baron Aylmer (c. 1650–1...[0.9112446781247854, 0.9620911836624145, 0.997...
2Rick Mahler (born Richard Alan Mahler on April...Richard Keith Mahler (August 5, 1953 in Austin...['Rick Mahler (born Richard Alan Mahler on Apr...['minor_inaccurate', 'minor_inaccurate', 'accu...20483['Rick Mahler (January 8, 1956 – May 25, 2005)...[0.9891034990549088, 0.4388777802581899, 0.955...
3James Blair (1732–1782) was an American lawyer...James Blair (September 26, 1786 - April 1, 183...['James Blair (1732–1782) was an American lawy...['minor_inaccurate', 'major_inaccurate', 'majo...71174['James Blair (April 2, 1755 – March 8, 1842) ...[0.9353850647807121, 0.8861920005059801, 0.993...
4Tim Finchem (born August 24, 1947) is an Ameri...Timothy W. Finchem (born April 19, 1947) is th...['Tim Finchem (born August 24, 1947) is an Ame...['minor_inaccurate', 'accurate', 'major_inaccu...39945['\"Tim Finchem (born May 27, 1953) is an Ameri...[0.9961978942155838, 0.2596603611658793, 0.992...
........................
233Gündüz Kılıç (born 28 April 1988) is a Turkish...Baba Gündüz Kılıç (1918-1980) was a Turkish fo...['Gündüz Kılıç (born 28 April 1988) is a Turki...['minor_inaccurate', 'major_inaccurate', 'majo...25585[\"Gündüz Kılıç (1518 – 1567) was an Ottoman na...[0.9997160047292709, 0.998373419046402, 0.9956...
234Michael Replogle (born 1951) is an American en...Michael Replogle is an internationally recogni...['Michael Replogle (born 1951) is an American ...['accurate', 'accurate', 'accurate', 'accurate...10740[\"Michael Replogle (born 1946) is an American ...[0.35181010272353885, 0.37309717537864345, 0.0...
235Billy Burke (born October 28, 1894 – died Apri...William John Burke (Polonized as Burkeauskas; ...['Billy Burke (born October 28, 1894 – died Ap...['minor_inaccurate', 'major_inaccurate', 'majo...41463['Billy Burke (21 August 1882 – 22 December 19...[0.9992900729179383, 0.9886163860559464, 0.996...
236Ted Childs (born October 15, 1956) is an Ameri...Ted Childs commenced training as a programme d...['Ted Childs (born October 15, 1956) is an Ame...['major_inaccurate', 'major_inaccurate', 'majo...57341['\"Ted Childs was an American actor and busine...[0.9853663831949234, 0.7629887842107564, 0.920...
237Edward Synge (1714–1798) was an Irish Anglican...Edward Synge (1659–1741) was an Anglican clerg...['Edward Synge (1714–1798) was an Irish Anglic...['minor_inaccurate', 'minor_inaccurate', 'accu...66046[\"Edward Synge (1562-1641) was an English-born...[0.9601712554693222, 0.9915205985307693, 0.923...
\n", + "

238 rows × 7 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Now that we have the hallucination score with NLI for each sentence, we will create a DataFrame to facilitate the computing of precision and recalls for hallucination." + ], + "metadata": { + "id": "GKIcqFS6Q7hI" + } + }, + { + "cell_type": "code", + "source": [ + "import ast\n", + "\n", + "output_df = []\n", + "\n", + "for _, row in df.iterrows():\n", + " scores = row[\"sent_scores_nli\"]\n", + " scores = ast.literal_eval(scores) # We recreate the list of scores per sentence\n", + " sentences = ast.literal_eval(row[\"gpt3_sentences\"])\n", + " annotations = ast.literal_eval(row[\"annotation\"])\n", + " for i, annotation in enumerate(annotations):\n", + " idx = len(output_df)\n", + "\n", + " output_df.append({\n", + " \"index\": idx,\n", + " \"sentence\": sentences[i],\n", + " \"wiki_bio_text\": row[\"wiki_bio_text\"],\n", + " \"annotation\": annotation,\n", + " \"probability\": scores[i]\n", + " })\n", + "\n", + "output_df = pd.DataFrame(output_df)" + ], + "metadata": { + "id": "9k3HSHKiBOAB" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "We will use the following convention:\n", + "- Label: 1 means a human annotated the sentence to be a hallucination and is the gold standard, 0 means truth.\n", + "- Probability: Probability of hallucination, which is just the previous NLI score.\n", + "- Prediction: Predicted label, 1 if the score is above 0.35, else 0." + ], + "metadata": { + "id": "8cIeJ26kRLOp" + } + }, + { + "cell_type": "code", + "source": [ + "output_df[\"label\"] = output_df.annotation.apply(lambda x: 0 if x == \"accurate\" else 1) # We add the ground truth label" + ], + "metadata": { + "id": "7Acb7LFRAt-u" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "threshold = 0.35\n", + "output_df[\"prediction\"] = output_df[\"probability\"].apply(lambda x: 1 if x > threshold else 0) # We add the predicted label" + ], + "metadata": { + "id": "k6QBhIkgE3pa" + }, + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "output_df" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "pJcHiGtcFvHZ", + "outputId": "02e99db4-dc60-4550-d597-3ab0bd0ef8f6" + }, + "execution_count": 12, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " index sentence \\\n", + "0 0 John Russell Reynolds (1820–1876) was an Engli... \n", + "1 1 He was born in London, the son of a barrister,... \n", + "2 2 He was called to the bar in 1845, and became a... \n", + "3 3 He was appointed a judge of the Court of Commo... \n", + "4 4 Reynolds was a prolific author, writing on a w... \n", + "... ... ... \n", + "1903 1903 He was appointed Dean of Clonfert in 1760 and ... \n", + "1904 1904 In 1781 he was appointed Archbishop of Tuam, a... \n", + "1905 1905 Synge was a noted scholar and a friend of the ... \n", + "1906 1906 He was a strong supporter of the Church of Ire... \n", + "1907 1907 He was also a noted collector of books and man... \n", + "\n", + " wiki_bio_text annotation \\\n", + "0 Sir John Russell Reynolds, 1st Baronet (22 May... major_inaccurate \n", + "1 Sir John Russell Reynolds, 1st Baronet (22 May... major_inaccurate \n", + "2 Sir John Russell Reynolds, 1st Baronet (22 May... major_inaccurate \n", + "3 Sir John Russell Reynolds, 1st Baronet (22 May... major_inaccurate \n", + "4 Sir John Russell Reynolds, 1st Baronet (22 May... major_inaccurate \n", + "... ... ... \n", + "1903 Edward Synge (1659–1741) was an Anglican clerg... major_inaccurate \n", + "1904 Edward Synge (1659–1741) was an Anglican clerg... minor_inaccurate \n", + "1905 Edward Synge (1659–1741) was an Anglican clerg... minor_inaccurate \n", + "1906 Edward Synge (1659–1741) was an Anglican clerg... minor_inaccurate \n", + "1907 Edward Synge (1659–1741) was an Anglican clerg... minor_inaccurate \n", + "\n", + " probability label prediction \n", + "0 0.869636 1 1 \n", + "1 0.928748 1 1 \n", + "2 0.931370 1 1 \n", + "3 0.982257 1 1 \n", + "4 0.221962 1 0 \n", + "... ... ... ... \n", + "1903 0.999400 1 1 \n", + "1904 0.941169 1 1 \n", + "1905 0.755755 1 1 \n", + "1906 0.677196 1 1 \n", + "1907 0.702615 1 1 \n", + "\n", + "[1908 rows x 7 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexsentencewiki_bio_textannotationprobabilitylabelprediction
00John Russell Reynolds (1820–1876) was an Engli...Sir John Russell Reynolds, 1st Baronet (22 May...major_inaccurate0.86963611
11He was born in London, the son of a barrister,...Sir John Russell Reynolds, 1st Baronet (22 May...major_inaccurate0.92874811
22He was called to the bar in 1845, and became a...Sir John Russell Reynolds, 1st Baronet (22 May...major_inaccurate0.93137011
33He was appointed a judge of the Court of Commo...Sir John Russell Reynolds, 1st Baronet (22 May...major_inaccurate0.98225711
44Reynolds was a prolific author, writing on a w...Sir John Russell Reynolds, 1st Baronet (22 May...major_inaccurate0.22196210
........................
19031903He was appointed Dean of Clonfert in 1760 and ...Edward Synge (1659–1741) was an Anglican clerg...major_inaccurate0.99940011
19041904In 1781 he was appointed Archbishop of Tuam, a...Edward Synge (1659–1741) was an Anglican clerg...minor_inaccurate0.94116911
19051905Synge was a noted scholar and a friend of the ...Edward Synge (1659–1741) was an Anglican clerg...minor_inaccurate0.75575511
19061906He was a strong supporter of the Church of Ire...Edward Synge (1659–1741) was an Anglican clerg...minor_inaccurate0.67719611
19071907He was also a noted collector of books and man...Edward Synge (1659–1741) was an Anglican clerg...minor_inaccurate0.70261511
\n", + "

1908 rows × 7 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Let's have a look at the distribution of hallucination score:" + ], + "metadata": { + "id": "hZq2eUFxRlKw" + } + }, + { + "cell_type": "code", + "source": [ + "output_df.probability.hist()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 447 + }, + "id": "Rd629rPEhNxB", + "outputId": "5ddb4b39-ed8a-42a5-fa87-9cbe7df069e7" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAhvElEQVR4nO3de3BU9f3/8VcSkg3BLCE4uWmASLVcLUokLOjXVkNiQQvKVBlTJloKVoItZAYF5SLhEsxQZMAIxSroFKTailWkITEWGCRcGqGDgKiFii3dUIuwSMpmk5zfH0721zWgbNyc/Wx4Pmaccc9+9uznvLuVp7tZE2VZliUAAACDRId7AwAAAF9FoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwTqdwb6AtmpubdeLECSUmJioqKirc2wEAAJfAsiydPXtWGRkZio7++vdIIjJQTpw4oczMzHBvAwAAtMGnn36qq6+++mvXRGSgJCYmSvryAp1OZ8jO6/P5VFlZqby8PMXGxobsvAjEnO3DrO3BnO3BnO3TXrP2eDzKzMz0/zn+dSIyUFo+1nE6nSEPlISEBDmdTl787Yg524dZ24M524M526e9Z30pP57BD8kCAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4ncK9AQAAOrJeM94K9xaC5oixVDYkvHvgHRQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcYIKlKamJs2ePVtZWVnq3Lmzevfurfnz58uyLP8ay7I0Z84cpaenq3PnzsrNzdVHH30UcJ5Tp06poKBATqdTSUlJmjBhgr744ovQXBEAAIh4QQXKU089pZUrV+qZZ57R4cOH9dRTT6msrEwrVqzwrykrK9Py5cu1atUq7d69W126dFF+fr7Onz/vX1NQUKCDBw+qqqpKmzZt0vbt2zVp0qTQXRUAAIhonYJZvHPnTo0ePVqjRo2SJPXq1Usvv/yy9uzZI+nLd0+WLVumWbNmafTo0ZKkl156SampqXr99dc1btw4HT58WBUVFdq7d6+ys7MlSStWrNDIkSO1ZMkSZWRkhPL6AABABAoqUIYNG6bVq1frww8/1HXXXae//vWv2rFjh5YuXSpJOnbsmNxut3Jzc/2P6dq1q3JyclRTU6Nx48appqZGSUlJ/jiRpNzcXEVHR2v37t26++67Wz2v1+uV1+v13/Z4PJIkn88nn88X3BV/jZZzhfKcaI0524dZ24M52yNS5+yIsb55kWEc0V/uOdSzDuZ8QQXKjBkz5PF41KdPH8XExKipqUkLFy5UQUGBJMntdkuSUlNTAx6Xmprqv8/tdislJSVwE506KTk52b/mq0pLSzVv3rxWxysrK5WQkBDMJVySqqqqkJ8TrTFn+zBrezBne0TanMuGhHsHbRfqWdfX11/y2qAC5ZVXXtG6deu0fv169e/fX/v379fUqVOVkZGhwsLCoDd6qWbOnKni4mL/bY/Ho8zMTOXl5cnpdIbseXw+n6qqqjRixAjFxsaG7LwIxJztw6ztwZztEalzHvDklnBvIWiOaEvzs5tDPuuWT0AuRVCBMn36dM2YMUPjxo2TJA0cOFCffPKJSktLVVhYqLS0NElSXV2d0tPT/Y+rq6vToEGDJElpaWk6efJkwHkbGxt16tQp/+O/yuFwyOFwtDoeGxvbLi/S9jovAjFn+zBrezBne0TanL1NUeHeQpuFetbBnCuob/HU19crOjrwITExMWpubpYkZWVlKS0tTdXV1f77PR6Pdu/eLZfLJUlyuVw6ffq0amtr/WveeecdNTc3KycnJ5jtAACADiqod1DuuusuLVy4UD169FD//v21b98+LV26VD/96U8lSVFRUZo6daoWLFiga6+9VllZWZo9e7YyMjI0ZswYSVLfvn11xx13aOLEiVq1apV8Pp+mTJmicePG8Q0eAAAgKchAWbFihWbPnq3Jkyfr5MmTysjI0EMPPaQ5c+b41zz66KM6d+6cJk2apNOnT+vmm29WRUWF4uPj/WvWrVunKVOm6Pbbb1d0dLTGjh2r5cuXh+6qAABARAsqUBITE7Vs2TItW7bsomuioqJUUlKikpKSi65JTk7W+vXrg3lqAABwGeF38QAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADBO0IHyz3/+Uz/5yU/UvXt3de7cWQMHDtRf/vIX//2WZWnOnDlKT09X586dlZubq48++ijgHKdOnVJBQYGcTqeSkpI0YcIEffHFF9/+agAAQIcQVKB8/vnnGj58uGJjY/WnP/1Jhw4d0q9+9St169bNv6asrEzLly/XqlWrtHv3bnXp0kX5+fk6f/68f01BQYEOHjyoqqoqbdq0Sdu3b9ekSZNCd1UAACCidQpm8VNPPaXMzEytWbPGfywrK8v/95ZladmyZZo1a5ZGjx4tSXrppZeUmpqq119/XePGjdPhw4dVUVGhvXv3Kjs7W5K0YsUKjRw5UkuWLFFGRkYorgsAAESwoALljTfeUH5+vn784x9r27ZtuuqqqzR58mRNnDhRknTs2DG53W7l5ub6H9O1a1fl5OSopqZG48aNU01NjZKSkvxxIkm5ubmKjo7W7t27dffdd7d6Xq/XK6/X67/t8XgkST6fTz6fL7gr/hot5wrlOdEac7YPs7YHc7ZHpM7ZEWOFewtBc0R/uedQzzqY8wUVKEePHtXKlStVXFysxx9/XHv37tUvfvELxcXFqbCwUG63W5KUmpoa8LjU1FT/fW63WykpKYGb6NRJycnJ/jVfVVpaqnnz5rU6XllZqYSEhGAu4ZJUVVWF/JxojTnbh1nbgznbI9LmXDYk3Dtou1DPur6+/pLXBhUozc3Nys7O1qJFiyRJN9xwg95//32tWrVKhYWFwe0yCDNnzlRxcbH/tsfjUWZmpvLy8uR0OkP2PD6fT1VVVRoxYoRiY2NDdl4EYs72Ydb2YM72iNQ5D3hyS7i3EDRHtKX52c0hn3XLJyCXIqhASU9PV79+/QKO9e3bV3/4wx8kSWlpaZKkuro6paen+9fU1dVp0KBB/jUnT54MOEdjY6NOnTrlf/xXORwOORyOVsdjY2Pb5UXaXudFIOZsH2ZtD+Zsj0ibs7cpKtxbaLNQzzqYcwX1LZ7hw4fryJEjAcc+/PBD9ezZU9KXPzCblpam6upq//0ej0e7d++Wy+WSJLlcLp0+fVq1tbX+Ne+8846am5uVk5MTzHYAAEAHFdQ7KNOmTdOwYcO0aNEi3XvvvdqzZ49Wr16t1atXS5KioqI0depULViwQNdee62ysrI0e/ZsZWRkaMyYMZK+fMfljjvu0MSJE7Vq1Sr5fD5NmTJF48aN4xs8AABAUpCBctNNN2njxo2aOXOmSkpKlJWVpWXLlqmgoMC/5tFHH9W5c+c0adIknT59WjfffLMqKioUHx/vX7Nu3TpNmTJFt99+u6KjozV27FgtX748dFcFAAAiWlCBIkl33nmn7rzzzoveHxUVpZKSEpWUlFx0TXJystavXx/sUwMAgMsEv4sHAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcb5VoCxevFhRUVGaOnWq/9j58+dVVFSk7t2764orrtDYsWNVV1cX8Ljjx49r1KhRSkhIUEpKiqZPn67GxsZvsxUAANCBtDlQ9u7dq1//+te6/vrrA45PmzZNb775pl599VVt27ZNJ06c0D333OO/v6mpSaNGjVJDQ4N27typF198UWvXrtWcOXPafhUAAKBDaVOgfPHFFyooKNBzzz2nbt26+Y+fOXNGzz//vJYuXarbbrtNgwcP1po1a7Rz507t2rVLklRZWalDhw7pt7/9rQYNGqQf/vCHmj9/vsrLy9XQ0BCaqwIAABGtU1seVFRUpFGjRik3N1cLFizwH6+trZXP51Nubq7/WJ8+fdSjRw/V1NRo6NChqqmp0cCBA5Wamupfk5+fr4cfflgHDx7UDTfc0Or5vF6vvF6v/7bH45Ek+Xw++Xy+tlzCBbWcK5TnRGvM2T7M2h7M2R6ROmdHjBXuLQTNEf3lnkM962DOF3SgbNiwQe+995727t3b6j632624uDglJSUFHE9NTZXb7fav+d84abm/5b4LKS0t1bx581odr6ysVEJCQrCX8I2qqqpCfk60xpztw6ztwZztEWlzLhsS7h20XahnXV9ff8lrgwqUTz/9VL/85S9VVVWl+Pj4oDfWVjNnzlRxcbH/tsfjUWZmpvLy8uR0OkP2PD6fT1VVVRoxYoRiY2NDdl4EYs72Ydb2YM72iNQ5D3hyS7i3EDRHtKX52c0hn3XLJyCXIqhAqa2t1cmTJ3XjjTf6jzU1NWn79u165plntGXLFjU0NOj06dMB76LU1dUpLS1NkpSWlqY9e/YEnLflWz4ta77K4XDI4XC0Oh4bG9suL9L2Oi8CMWf7MGt7MGd7RNqcvU1R4d5Cm4V61sGcK6gfkr399tt14MAB7d+/3/9Xdna2CgoK/H8fGxur6upq/2OOHDmi48ePy+VySZJcLpcOHDigkydP+tdUVVXJ6XSqX79+wWwHAAB0UEG9g5KYmKgBAwYEHOvSpYu6d+/uPz5hwgQVFxcrOTlZTqdTjzzyiFwul4YOHSpJysvLU79+/TR+/HiVlZXJ7XZr1qxZKioquuC7JAAA4PLTpm/xfJ2nn35a0dHRGjt2rLxer/Lz8/Xss8/674+JidGmTZv08MMPy+VyqUuXLiosLFRJSUmotwIAACLUtw6UrVu3BtyOj49XeXm5ysvLL/qYnj17avPmzd/2qQEAQAfF7+IBAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMbpFO4NAAAQjAFPbpG3KSrc20A74x0UAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxggqU0tJS3XTTTUpMTFRKSorGjBmjI0eOBKw5f/68ioqK1L17d11xxRUaO3as6urqAtYcP35co0aNUkJCglJSUjR9+nQ1NjZ++6sBAAAdQlCBsm3bNhUVFWnXrl2qqqqSz+dTXl6ezp07518zbdo0vfnmm3r11Ve1bds2nThxQvfcc4///qamJo0aNUoNDQ3auXOnXnzxRa1du1Zz5swJ3VUBAICI1imYxRUVFQG3165dq5SUFNXW1ur//u//dObMGT3//PNav369brvtNknSmjVr1LdvX+3atUtDhw5VZWWlDh06pLffflupqakaNGiQ5s+fr8cee0xPPvmk4uLiQnd1AAAgIn2rn0E5c+aMJCk5OVmSVFtbK5/Pp9zcXP+aPn36qEePHqqpqZEk1dTUaODAgUpNTfWvyc/Pl8fj0cGDB7/NdgAAQAcR1Dso/6u5uVlTp07V8OHDNWDAAEmS2+1WXFyckpKSAtampqbK7Xb71/xvnLTc33LfhXi9Xnm9Xv9tj8cjSfL5fPL5fG29hFZazhXKc6I15mwfZm0P5myPlvk6oq0w76Tja5lxqF/TwZyvzYFSVFSk999/Xzt27GjrKS5ZaWmp5s2b1+p4ZWWlEhISQv58VVVVIT8nWmPO9mHW9mDO9pif3RzuLVw2Qv2arq+vv+S1bQqUKVOmaNOmTdq+fbuuvvpq//G0tDQ1NDTo9OnTAe+i1NXVKS0tzb9mz549Aedr+ZZPy5qvmjlzpoqLi/23PR6PMjMzlZeXJ6fT2ZZLuCCfz6eqqiqNGDFCsbGxITsvAjFn+zBrezBne7TMefZfouVtjgr3djo0R7Sl+dnNIX9Nt3wCcimCChTLsvTII49o48aN2rp1q7KysgLuHzx4sGJjY1VdXa2xY8dKko4cOaLjx4/L5XJJklwulxYuXKiTJ08qJSVF0peF5nQ61a9fvws+r8PhkMPhaHU8Nja2Xf5h0F7nRSDmbB9mbQ/mbA9vc5S8TQSKHUL9mg7mXEEFSlFRkdavX68//vGPSkxM9P/MSNeuXdW5c2d17dpVEyZMUHFxsZKTk+V0OvXII4/I5XJp6NChkqS8vDz169dP48ePV1lZmdxut2bNmqWioqILRggAALj8BBUoK1eulCR9//vfDzi+Zs0aPfDAA5Kkp59+WtHR0Ro7dqy8Xq/y8/P17LPP+tfGxMRo06ZNevjhh+VyudSlSxcVFhaqpKTk210JAADoMIL+iOebxMfHq7y8XOXl5Rdd07NnT23evDmYpwYAAJcRfhcPAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAME6bfpsxACDy9ZrxVri3EBRHjKWyIeHeBezCOygAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIzTKdwbAICOYsCTW+Rtigr3NoAOgXdQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxuGXBQIwTq8Zb4V7C0FxxFgqGxLuXQAdC4ECXAb4LbsAIg0f8QAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4/AtnguIxG88/H3xqHBvAQCAkCFQEDaEIADgYviIBwAAGIdAAQAAxuEjHiAI/CfYAcAevIMCAACMwzsoHUQk/Zs9/1YPAPgmvIMCAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDhhDZTy8nL16tVL8fHxysnJ0Z49e8K5HQAAYIiwBcrvfvc7FRcXa+7cuXrvvff0ve99T/n5+Tp58mS4tgQAAAwRtkBZunSpJk6cqAcffFD9+vXTqlWrlJCQoBdeeCFcWwIAAIboFI4nbWhoUG1trWbOnOk/Fh0drdzcXNXU1LRa7/V65fV6/bfPnDkjSTp16pR8Pl/I9uXz+VRfX69Ovmg1NUeF7LwI1KnZUn19M3O2AbO2B3O2B3O2T8us//Of/yg2NjZk5z179qwkybKsb95DyJ41CJ999pmampqUmpoacDw1NVUffPBBq/WlpaWaN29eq+NZWVnttke0r/vDvYHLCLO2B3O2B3O2T3vO+uzZs+ratevXrglLoARr5syZKi4u9t9ubm7WqVOn1L17d0VFha6iPR6PMjMz9emnn8rpdIbsvAjEnO3DrO3BnO3BnO3TXrO2LEtnz55VRkbGN64NS6BceeWViomJUV1dXcDxuro6paWltVrvcDjkcDgCjiUlJbXb/pxOJy9+GzBn+zBrezBnezBn+7THrL/pnZMWYfkh2bi4OA0ePFjV1dX+Y83NzaqurpbL5QrHlgAAgEHC9hFPcXGxCgsLlZ2drSFDhmjZsmU6d+6cHnzwwXBtCQAAGCJsgXLffffp3//+t+bMmSO3261BgwapoqKi1Q/O2snhcGju3LmtPk5CaDFn+zBrezBnezBn+5gw6yjrUr7rAwAAYCN+Fw8AADAOgQIAAIxDoAAAAOMQKAAAwDiXXaCUl5erV69eio+PV05Ojvbs2fO161999VX16dNH8fHxGjhwoDZv3mzTTiNbMHN+7rnndMstt6hbt27q1q2bcnNzv/F/F/x/wb6mW2zYsEFRUVEaM2ZM+26wgwh2zqdPn1ZRUZHS09PlcDh03XXX8c+PSxDsnJctW6bvfve76ty5szIzMzVt2jSdP3/ept1Gpu3bt+uuu+5SRkaGoqKi9Prrr3/jY7Zu3aobb7xRDodD3/nOd7R27dp236esy8iGDRusuLg464UXXrAOHjxoTZw40UpKSrLq6uouuP7dd9+1YmJirLKyMuvQoUPWrFmzrNjYWOvAgQM27zyyBDvn+++/3yovL7f27dtnHT582HrggQesrl27Wv/4xz9s3nnkCXbWLY4dO2ZdddVV1i233GKNHj3ans1GsGDn7PV6rezsbGvkyJHWjh07rGPHjllbt2619u/fb/POI0uwc163bp3lcDisdevWWceOHbO2bNlipaenW9OmTbN555Fl8+bN1hNPPGG99tprliRr48aNX7v+6NGjVkJCglVcXGwdOnTIWrFihRUTE2NVVFS06z4vq0AZMmSIVVRU5L/d1NRkZWRkWKWlpRdcf++991qjRo0KOJaTk2M99NBD7brPSBfsnL+qsbHRSkxMtF588cX22mKH0ZZZNzY2WsOGDbN+85vfWIWFhQTKJQh2zitXrrSuueYaq6Ghwa4tdgjBzrmoqMi67bbbAo4VFxdbw4cPb9d9diSXEiiPPvqo1b9//4Bj9913n5Wfn9+OO7Osy+YjnoaGBtXW1io3N9d/LDo6Wrm5uaqpqbngY2pqagLWS1J+fv5F16Ntc/6q+vp6+Xw+JScnt9c2O4S2zrqkpEQpKSmaMGGCHduMeG2Z8xtvvCGXy6WioiKlpqZqwIABWrRokZqamuzadsRpy5yHDRum2tpa/8dAR48e1ebNmzVy5Ehb9ny5CNefhRHx24xD4bPPPlNTU1Or/1JtamqqPvjggws+xu12X3C92+1ut31GurbM+asee+wxZWRktPo/BAK1ZdY7duzQ888/r/3799uww46hLXM+evSo3nnnHRUUFGjz5s36+OOPNXnyZPl8Ps2dO9eObUectsz5/vvv12effaabb75ZlmWpsbFRP//5z/X444/bseXLxsX+LPR4PPrvf/+rzp07t8vzXjbvoCAyLF68WBs2bNDGjRsVHx8f7u10KGfPntX48eP13HPP6corrwz3djq05uZmpaSkaPXq1Ro8eLDuu+8+PfHEE1q1alW4t9ahbN26VYsWLdKzzz6r9957T6+99preeustzZ8/P9xbQwhcNu+gXHnllYqJiVFdXV3A8bq6OqWlpV3wMWlpaUGtR9vm3GLJkiVavHix3n77bV1//fXtuc0OIdhZ/+1vf9Pf//533XXXXf5jzc3NkqROnTrpyJEj6t27d/tuOgK15TWdnp6u2NhYxcTE+I/17dtXbrdbDQ0NiouLa9c9R6K2zHn27NkaP368fvazn0mSBg4cqHPnzmnSpEl64oknFB3Nv4OHwsX+LHQ6ne327ol0Gb2DEhcXp8GDB6u6utp/rLm5WdXV1XK5XBd8jMvlClgvSVVVVRddj7bNWZLKyso0f/58VVRUKDs7246tRrxgZ92nTx8dOHBA+/fv9//1ox/9SD/4wQ+0f/9+ZWZm2rn9iNGW1/Tw4cP18ccf+wNQkj788EOlp6cTJxfRljnX19e3ipCWKLT4NXMhE7Y/C9v1R3ANs2HDBsvhcFhr1661Dh06ZE2aNMlKSkqy3G63ZVmWNX78eGvGjBn+9e+++67VqVMna8mSJdbhw4etuXPn8jXjSxDsnBcvXmzFxcVZv//9761//etf/r/Onj0brkuIGMHO+qv4Fs+lCXbOx48ftxITE60pU6ZYR44csTZt2mSlpKRYCxYsCNclRIRg5zx37lwrMTHRevnll62jR49alZWVVu/eva177703XJcQEc6ePWvt27fP2rdvnyXJWrp0qbVv3z7rk08+sSzLsmbMmGGNHz/ev77la8bTp0+3Dh8+bJWXl/M14/awYsUKq0ePHlZcXJw1ZMgQa9euXf77br31VquwsDBg/SuvvGJdd911VlxcnNW/f3/rrbfesnnHkSmYOffs2dOS1OqvuXPn2r/xCBTsa/p/ESiXLtg579y508rJybEcDod1zTXXWAsXLrQaGxtt3nXkCWbOPp/PevLJJ63evXtb8fHxVmZmpjV58mTr888/t3/jEeTPf/7zBf+Z2zLbwsJC69Zbb231mEGDBllxcXHWNddcY61Zs6bd9xllWbwPBgAAzHLZ/AwKAACIHAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4/w/OippnTMCPvsAAAAASUVORK5CYII=\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Let's look at sentences which have a high hallucination score:" + ], + "metadata": { + "id": "-6DQwwxvRzJg" + } + }, + { + "cell_type": "code", + "source": [ + "sorted_df = output_df.sort_values(by='probability', ascending=False)\n", + "sorted_df" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "DvT8j2IYcZHA", + "outputId": "2d7ba580-a402-493c-8417-938da2943a3c" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " index sentence \\\n", + "1111 1111 Paul Taylor (born Paul Taylor Winger on April ... \n", + "1867 1867 Gündüz Kılıç (born 28 April 1988) is a Turkish... \n", + "1205 1205 Joe Walsh (born 28 April 1988) is an English p... \n", + "1435 1435 Stan Heal (born October 28, 1932) is an Americ... \n", + "1114 1114 Taylor was born in Cleveland, Ohio, and grew u... \n", + "... ... ... \n", + "1215 1215 He was a major benefactor of the city, donatin... \n", + "440 440 Tommy Nutter (1943–1992) was a British tailor ... \n", + "905 905 Lindsay Crouse (born May 12, 1948) is an Ameri... \n", + "1211 1211 Josiah Mason (1795–1881) was an English indust... \n", + "912 912 Crouse has appeared in numerous films and tele... \n", + "\n", + " wiki_bio_text annotation \\\n", + "1111 Paul Taylor (born June 4, 1960, San Francisco,... minor_inaccurate \n", + "1867 Baba Gündüz Kılıç (1918-1980) was a Turkish fo... minor_inaccurate \n", + "1205 For other persons named Joseph/Joe Walsh, see ... minor_inaccurate \n", + "1435 Stan \"Pops\" Heal (30 July 1920 - 15 December 2... major_inaccurate \n", + "1114 Paul Taylor (born June 4, 1960, San Francisco,... major_inaccurate \n", + "... ... ... \n", + "1215 Sir Josiah Mason (23 February 1795 - 16 June 1... accurate \n", + "440 Tommy Nutter (17 April 1943 – 17 August 1992) ... accurate \n", + "905 Lindsay Ann Crouse (born May 12, 1948) is an A... accurate \n", + "1211 Sir Josiah Mason (23 February 1795 - 16 June 1... accurate \n", + "912 Lindsay Ann Crouse (born May 12, 1948) is an A... accurate \n", + "\n", + " probability label prediction \n", + "1111 0.999736 1 1 \n", + "1867 0.999716 1 1 \n", + "1205 0.999713 1 1 \n", + "1435 0.999712 1 1 \n", + "1114 0.999675 1 1 \n", + "... ... ... ... \n", + "1215 0.008564 0 0 \n", + "440 0.004858 0 0 \n", + "905 0.003348 0 0 \n", + "1211 0.001838 0 0 \n", + "912 0.001714 0 0 \n", + "\n", + "[1908 rows x 7 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexsentencewiki_bio_textannotationprobabilitylabelprediction
11111111Paul Taylor (born Paul Taylor Winger on April ...Paul Taylor (born June 4, 1960, San Francisco,...minor_inaccurate0.99973611
18671867Gündüz Kılıç (born 28 April 1988) is a Turkish...Baba Gündüz Kılıç (1918-1980) was a Turkish fo...minor_inaccurate0.99971611
12051205Joe Walsh (born 28 April 1988) is an English p...For other persons named Joseph/Joe Walsh, see ...minor_inaccurate0.99971311
14351435Stan Heal (born October 28, 1932) is an Americ...Stan \"Pops\" Heal (30 July 1920 - 15 December 2...major_inaccurate0.99971211
11141114Taylor was born in Cleveland, Ohio, and grew u...Paul Taylor (born June 4, 1960, San Francisco,...major_inaccurate0.99967511
........................
12151215He was a major benefactor of the city, donatin...Sir Josiah Mason (23 February 1795 - 16 June 1...accurate0.00856400
440440Tommy Nutter (1943–1992) was a British tailor ...Tommy Nutter (17 April 1943 – 17 August 1992) ...accurate0.00485800
905905Lindsay Crouse (born May 12, 1948) is an Ameri...Lindsay Ann Crouse (born May 12, 1948) is an A...accurate0.00334800
12111211Josiah Mason (1795–1881) was an English indust...Sir Josiah Mason (23 February 1795 - 16 June 1...accurate0.00183800
912912Crouse has appeared in numerous films and tele...Lindsay Ann Crouse (born May 12, 1948) is an A...accurate0.00171400
\n", + "

1908 rows × 7 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "We can see below that a sentence with a very high hallucination score (0.99) is indeed looking like a hallucination, as the sentence misses two facts:\n", + "- Stan Heal is born on the 30th July 1920, which is quite far from the LLM generation of October 28th 1932!\n", + "- Stan was a football player and not a basketball player." + ], + "metadata": { + "id": "IaH8rwaoR4fU" + } + }, + { + "cell_type": "code", + "source": [ + "sorted_df" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "BwmfDyaWDM7b", + "outputId": "34f2fa4a-c0b4-403c-d4a6-aa4123f53f26" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " index sentence \\\n", + "1111 1111 Paul Taylor (born Paul Taylor Winger on April ... \n", + "1867 1867 Gündüz Kılıç (born 28 April 1988) is a Turkish... \n", + "1205 1205 Joe Walsh (born 28 April 1988) is an English p... \n", + "1435 1435 Stan Heal (born October 28, 1932) is an Americ... \n", + "1114 1114 Taylor was born in Cleveland, Ohio, and grew u... \n", + "... ... ... \n", + "1215 1215 He was a major benefactor of the city, donatin... \n", + "440 440 Tommy Nutter (1943–1992) was a British tailor ... \n", + "905 905 Lindsay Crouse (born May 12, 1948) is an Ameri... \n", + "1211 1211 Josiah Mason (1795–1881) was an English indust... \n", + "912 912 Crouse has appeared in numerous films and tele... \n", + "\n", + " wiki_bio_text annotation \\\n", + "1111 Paul Taylor (born June 4, 1960, San Francisco,... minor_inaccurate \n", + "1867 Baba Gündüz Kılıç (1918-1980) was a Turkish fo... minor_inaccurate \n", + "1205 For other persons named Joseph/Joe Walsh, see ... minor_inaccurate \n", + "1435 Stan \"Pops\" Heal (30 July 1920 - 15 December 2... major_inaccurate \n", + "1114 Paul Taylor (born June 4, 1960, San Francisco,... major_inaccurate \n", + "... ... ... \n", + "1215 Sir Josiah Mason (23 February 1795 - 16 June 1... accurate \n", + "440 Tommy Nutter (17 April 1943 – 17 August 1992) ... accurate \n", + "905 Lindsay Ann Crouse (born May 12, 1948) is an A... accurate \n", + "1211 Sir Josiah Mason (23 February 1795 - 16 June 1... accurate \n", + "912 Lindsay Ann Crouse (born May 12, 1948) is an A... accurate \n", + "\n", + " probability label prediction \n", + "1111 0.999736 1 1 \n", + "1867 0.999716 1 1 \n", + "1205 0.999713 1 1 \n", + "1435 0.999712 1 1 \n", + "1114 0.999675 1 1 \n", + "... ... ... ... \n", + "1215 0.008564 0 0 \n", + "440 0.004858 0 0 \n", + "905 0.003348 0 0 \n", + "1211 0.001838 0 0 \n", + "912 0.001714 0 0 \n", + "\n", + "[1908 rows x 7 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexsentencewiki_bio_textannotationprobabilitylabelprediction
11111111Paul Taylor (born Paul Taylor Winger on April ...Paul Taylor (born June 4, 1960, San Francisco,...minor_inaccurate0.99973611
18671867Gündüz Kılıç (born 28 April 1988) is a Turkish...Baba Gündüz Kılıç (1918-1980) was a Turkish fo...minor_inaccurate0.99971611
12051205Joe Walsh (born 28 April 1988) is an English p...For other persons named Joseph/Joe Walsh, see ...minor_inaccurate0.99971311
14351435Stan Heal (born October 28, 1932) is an Americ...Stan \"Pops\" Heal (30 July 1920 - 15 December 2...major_inaccurate0.99971211
11141114Taylor was born in Cleveland, Ohio, and grew u...Paul Taylor (born June 4, 1960, San Francisco,...major_inaccurate0.99967511
........................
12151215He was a major benefactor of the city, donatin...Sir Josiah Mason (23 February 1795 - 16 June 1...accurate0.00856400
440440Tommy Nutter (1943–1992) was a British tailor ...Tommy Nutter (17 April 1943 – 17 August 1992) ...accurate0.00485800
905905Lindsay Crouse (born May 12, 1948) is an Ameri...Lindsay Ann Crouse (born May 12, 1948) is an A...accurate0.00334800
12111211Josiah Mason (1795–1881) was an English indust...Sir Josiah Mason (23 February 1795 - 16 June 1...accurate0.00183800
912912Crouse has appeared in numerous films and tele...Lindsay Ann Crouse (born May 12, 1948) is an A...accurate0.00171400
\n", + "

1908 rows × 7 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "example = sorted_df.iloc[3]\n", + "example[\"sentence\"], example[\"wiki_bio_text\"], example[\"probability\"]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Qy6HCDJ5cjJk", + "outputId": "80bcddde-f54a-418f-a83a-477c02ac996b" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "('Stan Heal (born October 28, 1932) is an American former professional basketball player.',\n", + " 'Stan \"Pops\" Heal (30 July 1920 - 15 December 2010) was an Australian rules footballer who played for Melbourne in the Victorian Football League (VFL) as well as West Perth in the West Australian National Football League (WANFL) during the 1940s and early 1950s. Heal played his best football as a wingman but was also used on occasions as a rover. The Western Australian spent just one season at Melbourne, who had acquired his services while he was temporarily stationed in Victoria. Despite playing just eight games, he was a member of Melbourne\\'s 1941 premiership team. The following week, he played in another premiership, back home in Western Australia with West Perth. He was also a regular interstate representative for Western Australia, winning a Simpson Medal for his performance in a game against South Australia in 1949 and captaining his state at the 1950 Brisbane Carnival. As coach of West Perth from 1947 to 1952, he steered his club to two premierships, in 1949 and 1951. Heal was inducted into the Australian Football Hall of Fame in 2010. He died on 15 December 2010.',\n", + " 0.9997124433517456)" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Interpretation of results" + ], + "metadata": { + "id": "LfYaaTI1rg7c" + } + }, + { + "cell_type": "markdown", + "source": [ + "Let's now have a look at the calibrated scores of recall and precision. Calibration is key in building trust in a model. Ideally, when a model provides a 0.8 probability score that a given sample is a hallucination, one would like it to be the case that this prediction would hold true 80% of the time.\n", + "\n", + "As hallucinations labelling could happen in imbalanced settings, for instance, if we ask the LLM to perform easy vs hard tasks, precision and recall are more relevant.\n", + "\n", + "That is why we will look at precision and recall for different probability scores.\n", + "\n", + "Hallucination recall conveys the number of hallucinations that are detected by our model for a given data set. If the recall is of 0.8, it means that we have properly flagged 80% of the hallucinations.\n", + "\n", + "Hallucination precision conveys how often predicted hallucinations actually are hallucinations, and not false positives. An accuracy of 0.8 would mean that 80% of the time when we say a sentence is a hallucination, it is indeed one." + ], + "metadata": { + "id": "lX3M6bldS0Yr" + } + }, + { + "cell_type": "markdown", + "source": [ + "So first let's plot the precision scores grouped per bins of similar hallucination score." + ], + "metadata": { + "id": "nwS-7md_ZJS-" + } + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "from sklearn.metrics import precision_score, recall_score\n", + "import matplotlib.pyplot as plt\n", + "\n", + "x = []\n", + "precisions = []\n", + "\n", + "n_bins = 10\n", + "thresholds = np.linspace(0,1,n_bins)\n", + "for i in range(len(thresholds)-1):\n", + " min = thresholds[i]\n", + " max = thresholds[i+1]\n", + " bin = output_df.loc[(output_df.probability >= min) & (output_df.probability < max)]\n", + " tp = ((bin.prediction == 1) & (bin.label == 1)).sum()\n", + " fn = ((bin.prediction == 0) & (bin.label == 1)).sum()\n", + " precision = precision_score(bin.prediction.values, bin.label.values)\n", + " x.append(min)\n", + " precisions.append(precision)\n", + "\n", + "plt.bar(x, precisions, width=0.1, color='blue', edgecolor='black', capsize=5, align='center', label='Hallucination precision')\n", + "plt.plot([0, 1], [0, 1], \"k--\", label=\"Perfectly calibrated\")\n", + "plt.xlim([0, 1])\n", + "plt.ylim([0, 1])\n", + "plt.xlabel('SelfCheckGPT - NLI')\n", + "plt.ylabel('Precision (Probability of detection actually hallucination)')\n", + "plt.title('Calibration curve of hallucination')\n", + "plt.legend(loc='lower right')\n", + "plt.grid(True)\n", + "\n", + "# Show the plot\n", + "plt.show()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 472 + }, + "id": "BGOJV3XqDilZ", + "outputId": "734a42b4-856c-4b06-908f-57d08ae2eaa9" + }, + "execution_count": 17, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Now let's do the same for recall:" + ], + "metadata": { + "id": "EToxJkJxZVUs" + } + }, + { + "cell_type": "code", + "source": [ + "x = []\n", + "recalls = []\n", + "\n", + "n_bins = 10\n", + "thresholds = np.linspace(0,1,n_bins)\n", + "for i in range(len(thresholds)-1):\n", + " min = thresholds[i]\n", + " max = thresholds[i+1]\n", + " bin = output_df.loc[(output_df.probability >= min) & (output_df.probability < max)]\n", + " tp = ((bin.prediction == 1) & (bin.label == 1)).sum()\n", + " fn = ((bin.prediction == 0) & (bin.label == 1)).sum()\n", + " # precision = tp / (tp + fn)\n", + " recall = recall_score(bin.prediction.values, bin.label.values)\n", + " # x.append(f\"\\[{min}-{max}\\]\")\n", + " x.append(min)\n", + " recalls.append(recall)\n", + "\n", + "plt.bar(x, recalls, width=0.1, color='green', edgecolor='black', capsize=5, align='center', label='Hallucination recall')\n", + "plt.plot([0, 1], [0, 1], \"k--\", label=\"Perfectly calibrated\")\n", + "plt.xlim([0, 1])\n", + "plt.ylim([0, 1])\n", + "plt.xlabel('SelfCheckGPT - NLI')\n", + "plt.ylabel('Recall (Detection rate of hallucination)')\n", + "plt.title('Calibration curve of hallucination')\n", + "plt.legend(loc='lower right')\n", + "plt.grid(True)\n", + "\n", + "# Show the plot\n", + "plt.show()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 596 + }, + "id": "5FImO39OJMQr", + "outputId": "95a324dd-bceb-4c9f-b7c3-f79b3b256b3b" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "We can plot the two side by side:" + ], + "metadata": { + "id": "omUxoyLiZX_c" + } + }, + { + "cell_type": "code", + "source": [ + "fig, axs = plt.subplots(1, 2, figsize=(12, 5), sharey=True)\n", + "\n", + "# Plot the first histogram\n", + "axs[0].bar(x, precisions, width=0.1, color='blue', edgecolor='black', capsize=5, align='center', label='Hallucination precision')\n", + "axs[0].plot([0, 1], [0, 1], \"k--\", label=\"Perfectly calibrated\")\n", + "axs[0].set_xlim([0, 1])\n", + "axs[0].set_ylim([0, 1])\n", + "axs[0].set_xlabel('SelfCheckGPT - NLI')\n", + "axs[0].set_title('Hallucination precision calibration curve')\n", + "axs[0].legend(loc='lower right')\n", + "axs[0].grid(True)\n", + "\n", + "# Plot the second histogram\n", + "axs[1].bar(x, recalls, width=0.1, color='green', edgecolor='black', capsize=5, align='center', label='Hallucination recall')\n", + "axs[1].plot([0, 1], [0, 1], \"k--\", label=\"Perfectly calibrated\")\n", + "axs[1].set_xlim([0, 1])\n", + "axs[1].set_ylim([0, 1])\n", + "axs[1].set_xlabel('SelfCheckGPT - NLI')\n", + "axs[1].set_title('Hallucination recall calibration curve')\n", + "axs[1].legend(loc='lower right')\n", + "axs[1].grid(True)\n", + "\n", + "# Display the plots\n", + "plt.tight_layout()\n", + "plt.show()" + ], + "metadata": { + "id": "vvJ3KubtTGxz", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 507 + }, + "outputId": "4c674c43-78c0-4341-900e-a20de7ed2510" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "So what can be an interpretation of these plots?\n", + "\n", + "We see that our model is extremely precise in detecting hallucinations once the score is above 0.5. It reaches perfect precision, which means that whenever it makes the prediction that a sentence is a hallucination, it is almost certain it is actually the case!\n", + "\n", + "But being precise is not enough, if a model is conservative and only flags a few sentences as hallucinations, then this model would not be very useful.\n", + "\n", + "That is why we need to have a look at recall too.\n", + "\n", + "Interestingly, the recall score seems to be calibrated with the probability of hallucination: the higher the probability the higher the recall!\n", + "\n", + "This means that for instance, for an NLI score of 0.8, this model will flag 80% of the hallucinations as the recall is close to 80%, and all examples flagged are actually hallucinations as the precision is 1.0.\n", + "\n", + "This is great! It means that we can have a trustworthy metric for hallucination, as it is able to both:\n", + "- Provide a calibrated ability to flag hallucinations, aka the higher the hallucination score, the higher the likelihood to find hallucinations (calibrated recall)\n", + "- Be extremely precise in its prediction, aka not falsely labelling truthful sentences as hallucinations (perfect precision)\n", + "\n", + "**Both of those properties mean that we can now reliably and automatically detect hallucinations. This means we could either verify the trustworthiness of an answer in a chat, and when a hallucination is detected, notify the user that extra checks must be performed.**" + ], + "metadata": { + "id": "BGcRaya-WXWH" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Varying the number of samples required" + ], + "metadata": { + "id": "nRHL-y6hrmOn" + } + }, + { + "cell_type": "markdown", + "source": [ + "Before concluding, one might think that this is great but how about the cost of such metric?\n", + "\n", + "In the initial SelfCheckGPT paper, they sampled $N=20$ more answers, on top of the original prediction, to predict the hallucination score.\n", + "\n", + "This is therefore quite expensive and impractical as it would drastically increase cost and time.\n", + "\n", + "Therefore, one could think, are that many samples needed?\n", + "\n", + "To study that, we varied the number of samples used to compute the NLI score, and plotted the same graphs with $N=3,10,20$." + ], + "metadata": { + "id": "jzqDYTz2t3RO" + } + }, + { + "cell_type": "code", + "source": [ + "!wget wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1FFCXP4zBoyr6FwYn_Ken8Ak-6IHwgENv' -O wiki_bio_gpt3_hallucination_all_samples.csv" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yy9GSyHrcjOR", + "outputId": "f3b01851-45fb-497b-eaaf-bbc82f339d81" + }, + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2023-12-02 07:07:31-- http://wget/\n", + "Resolving wget (wget)... failed: Name or service not known.\n", + "wget: unable to resolve host address ‘wget’\n", + "--2023-12-02 07:07:31-- https://docs.google.com/uc?export=download&id=1FFCXP4zBoyr6FwYn_Ken8Ak-6IHwgENv\n", + "Resolving docs.google.com (docs.google.com)... 142.250.103.139, 142.250.103.138, 142.250.103.102, ...\n", + "Connecting to docs.google.com (docs.google.com)|142.250.103.139|:443... connected.\n", + "HTTP request sent, awaiting response... 303 See Other\n", + "Location: https://doc-14-8c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/cjpue7bj3vuo4flmihsiem0ku1lqtfd1/1701500850000/08030308599197976876/*/1FFCXP4zBoyr6FwYn_Ken8Ak-6IHwgENv?e=download&uuid=92fa8f40-2c49-44a5-9ac3-821a30f3475c [following]\n", + "Warning: wildcards not supported in HTTP.\n", + "--2023-12-02 07:07:33-- https://doc-14-8c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/cjpue7bj3vuo4flmihsiem0ku1lqtfd1/1701500850000/08030308599197976876/*/1FFCXP4zBoyr6FwYn_Ken8Ak-6IHwgENv?e=download&uuid=92fa8f40-2c49-44a5-9ac3-821a30f3475c\n", + "Resolving doc-14-8c-docs.googleusercontent.com (doc-14-8c-docs.googleusercontent.com)... 173.194.196.132, 2607:f8b0:4001:c1a::84\n", + "Connecting to doc-14-8c-docs.googleusercontent.com (doc-14-8c-docs.googleusercontent.com)|173.194.196.132|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 6048623 (5.8M) [text/csv]\n", + "Saving to: ‘wiki_bio_gpt3_hallucination_all_samples.csv’\n", + "\n", + "wiki_bio_gpt3_hallu 100%[===================>] 5.77M --.-KB/s in 0.03s \n", + "\n", + "2023-12-02 07:07:33 (206 MB/s) - ‘wiki_bio_gpt3_hallucination_all_samples.csv’ saved [6048623/6048623]\n", + "\n", + "FINISHED --2023-12-02 07:07:33--\n", + "Total wall clock time: 2.1s\n", + "Downloaded: 1 files, 5.8M in 0.03s (206 MB/s)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.read_csv(\"./wiki_bio_gpt3_hallucination_all_samples.csv\")" + ], + "metadata": { + "id": "6qlZDYBxc9zB" + }, + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import ast\n", + "import numpy as np\n", + "\n", + "total_samples = 20\n", + "n_sample = 3\n", + "\n", + "def get_scores_from_df(df, n_sample=20, total_samples=20, threshold=0.35):\n", + "\n", + " output_df = []\n", + "\n", + " for _, example in df.iterrows():\n", + " scores = np.frombuffer(ast.literal_eval(example[\"sent_scores_nli\"]))\n", + " sentences = ast.literal_eval(example[\"gpt3_sentences\"])\n", + " n_sentences = len(sentences)\n", + " scores = scores.reshape(n_sentences, total_samples)\n", + " scores = scores[:,:n_sample]\n", + " scores = scores.mean(axis=-1)\n", + "\n", + " annotations = ast.literal_eval(example[\"annotation\"])\n", + " for i, annotation in enumerate(annotations):\n", + " idx = len(output_df)\n", + "\n", + " output_df.append({\n", + " \"index\": idx,\n", + " \"annotation\": annotation,\n", + " \"probability\": scores[i]\n", + " })\n", + "\n", + " output_df = pd.DataFrame(output_df)\n", + " output_df[\"label\"] = output_df.annotation.apply(lambda x: 0 if x == \"accurate\" else 1) # We add the ground truth label\n", + " output_df[\"prediction\"] = output_df[\"probability\"].apply(lambda x: 1 if x > threshold else 0) # We add the predicted label\n", + " return output_df" + ], + "metadata": { + "id": "YQxguleMcibD" + }, + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn.metrics import precision_score, recall_score\n", + "\n", + "n_samples = [3, 10, 20]\n", + "total_samples = 20\n", + "\n", + "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) # Single row, two columns\n", + "\n", + "n_bins = 10\n", + "thresholds = np.linspace(0, 1, n_bins)\n", + "\n", + "colors = ['blue', 'green', 'purple', 'orange'] # Different color for each n_sample\n", + "\n", + "for j, n_sample in enumerate(n_samples):\n", + " output_df = get_scores_from_df(df, n_sample=n_sample, total_samples=total_samples)\n", + " precisions = []\n", + " recalls = []\n", + " x = []\n", + "\n", + " for i in range(len(thresholds) - 1):\n", + " min_threshold = thresholds[i]\n", + " max_threshold = thresholds[i + 1]\n", + " bin_df = output_df.loc[(output_df.probability >= min_threshold) & (output_df.probability < max_threshold)]\n", + " precision = precision_score(bin_df.prediction.values, bin_df.label.values)\n", + " recall = recall_score(bin_df.prediction.values, bin_df.label.values)\n", + " x.append(min_threshold)\n", + " precisions.append(precision)\n", + " recalls.append(recall)\n", + "\n", + " # Plot precision and recall for this n_sample\n", + " ax1.bar(x, precisions, width=0.1, color=colors[j], edgecolor='black', label=f'n_sample={n_sample}')\n", + " ax2.bar(x, recalls, width=0.1, color=colors[j], edgecolor='black', label=f'n_sample={n_sample}')\n", + "\n", + "# Set properties for precision plot\n", + "ax1.plot([0, 1], [0, 1], \"k--\", label=\"Perfectly calibrated\")\n", + "ax1.set_xlim([0, 1])\n", + "ax1.set_ylim([0, 1])\n", + "ax1.set_xlabel('Probability Threshold')\n", + "ax1.set_title('Precision Calibration Curve')\n", + "ax1.legend(loc='lower right')\n", + "ax1.grid(True)\n", + "\n", + "# Set properties for recall plot\n", + "ax2.plot([0, 1], [0, 1], \"k--\", label=\"Perfectly calibrated\")\n", + "ax2.set_xlim([0, 1])\n", + "ax2.set_ylim([0, 1])\n", + "ax2.set_xlabel('Probability Threshold')\n", + "ax2.set_title('Recall Calibration Curve')\n", + "ax2.legend(loc='lower right')\n", + "ax2.grid(True)\n", + "\n", + "# Display the plots\n", + "plt.tight_layout()\n", + "plt.show()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 839 + }, + "id": "_TV85iXYc5Dr", + "outputId": "380a8ee9-9713-463e-ada3-848c72f3dd7e" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n", + "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", + " _warn_prf(average, modifier, msg_start, len(result))\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "While we can observe slight differences, the overall behavior is the same, even for $N=3$.\n", + "\n", + "While still being a high number and multiplying the cost by 4, this initial work provides a first lead towards a practical, generic, and automatic way to detect hallucinations to build Trustworthy AI systems." + ], + "metadata": { + "id": "Hmix48dNulZY" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Conclusion" + ], + "metadata": { + "id": "x57c91JDrw6h" + } + }, + { + "cell_type": "markdown", + "source": [ + "We have seen through that notebook that hallucinations can be detected automatically and reliably with a metric that works for any text generation task.\n", + "\n", + "This is a potential great step towards developping Trustworthy AI, which will be necessary but not necessarily sufficient, in order to build AI systems that we can rely on.\n", + "\n", + "We hope we have provided you with useful insights, whether you are a researcher or practitioner, or anything in between.\n", + "\n", + "If you are interested in Confidential and Trustworthy AI, do not hesitate to have a look at [BlindChat](https://chat.mithrilsecurity.io/), our privacy-by-design Conversational AI, or [contact us](https://www.mithrilsecurity.io/contact) directly." + ], + "metadata": { + "id": "0VUj2IFYryin" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "XHMrO1mVLF39" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file