diff --git "a/\bBuild_Knowledge_Base_From_Texts.ipynb" "b/\bBuild_Knowledge_Base_From_Texts.ipynb"
new file mode 100644
index 0000000..1bf7f14
--- /dev/null
+++ "b/\bBuild_Knowledge_Base_From_Texts.ipynb"
@@ -0,0 +1,4618 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "machine_shape": "hm",
+ "gpuType": "T4",
+ "authorship_tag": "ABX9TyNOeEAcEMmDgCQOQkSfagC/",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "accelerator": "GPU",
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "dc6961e0853d4ffaa98e53b4460dd1c4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ba2b3aceabac4a0892d463c46c52f1e1",
+ "IPY_MODEL_6d92cd90a0cf48549f689cf4968e053e",
+ "IPY_MODEL_74a44928b17d4580b5ce4bd290b9fdf2"
+ ],
+ "layout": "IPY_MODEL_b3180f6272f246089bd97ec79c5393df"
+ }
+ },
+ "ba2b3aceabac4a0892d463c46c52f1e1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6592b210e8e440559eeaf820b789831c",
+ "placeholder": "",
+ "style": "IPY_MODEL_67353cfd3f2347428050c05ab437111a",
+ "value": "Downloading (…)okenizer_config.json: 100%"
+ }
+ },
+ "6d92cd90a0cf48549f689cf4968e053e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1d751558bcd342058dc576269c9f3218",
+ "max": 1225,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d96f821ee7bb4c5c8a9285abe09a15db",
+ "value": 1225
+ }
+ },
+ "74a44928b17d4580b5ce4bd290b9fdf2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_644c7bcd501d40dd90a4dbf15bcfca40",
+ "placeholder": "",
+ "style": "IPY_MODEL_70bfb1dbf1134e31b4908fde482149f7",
+ "value": " 1.23k/1.23k [00:00<00:00, 110kB/s]"
+ }
+ },
+ "b3180f6272f246089bd97ec79c5393df": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6592b210e8e440559eeaf820b789831c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "67353cfd3f2347428050c05ab437111a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "1d751558bcd342058dc576269c9f3218": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d96f821ee7bb4c5c8a9285abe09a15db": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "644c7bcd501d40dd90a4dbf15bcfca40": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "70bfb1dbf1134e31b4908fde482149f7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "142c6e961b924cb7a9558d26c21ee4d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_eecba992f0aa4cdfb192cc69f70d05f6",
+ "IPY_MODEL_9246edaf57a7426eb09807bc1764c5c3",
+ "IPY_MODEL_eacd76ddbefe44eda8af16b05e75b76c"
+ ],
+ "layout": "IPY_MODEL_3fd5679f65dc4d8cb7c0e1a0319a62bd"
+ }
+ },
+ "eecba992f0aa4cdfb192cc69f70d05f6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_28b107e08c714f41a51420c89a3b9c06",
+ "placeholder": "",
+ "style": "IPY_MODEL_d2250bcf4ff04d76998508f2f48b3354",
+ "value": "Downloading (…)olve/main/vocab.json: 100%"
+ }
+ },
+ "9246edaf57a7426eb09807bc1764c5c3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d1189d55310f401ca10991dd56e3f236",
+ "max": 798293,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_9daa4a2e54d44cdab78ca611e40932c7",
+ "value": 798293
+ }
+ },
+ "eacd76ddbefe44eda8af16b05e75b76c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_51667eec4cad49789c5ea544d173d9a8",
+ "placeholder": "",
+ "style": "IPY_MODEL_1fca4ddcf05c427aa13598d3d7362446",
+ "value": " 798k/798k [00:00<00:00, 2.47MB/s]"
+ }
+ },
+ "3fd5679f65dc4d8cb7c0e1a0319a62bd": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "28b107e08c714f41a51420c89a3b9c06": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d2250bcf4ff04d76998508f2f48b3354": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d1189d55310f401ca10991dd56e3f236": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9daa4a2e54d44cdab78ca611e40932c7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "51667eec4cad49789c5ea544d173d9a8": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1fca4ddcf05c427aa13598d3d7362446": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2bf61c728f764c6abb1635566b3644d7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_dcfd7c8c4b654fb6a416b45795df34ff",
+ "IPY_MODEL_51cb4776841f4dbfbd62b69d9342de11",
+ "IPY_MODEL_06bf5a2919bb44ab9a495ec3a53031d5"
+ ],
+ "layout": "IPY_MODEL_70d344c4fb6141c89a6eb8d1c2002782"
+ }
+ },
+ "dcfd7c8c4b654fb6a416b45795df34ff": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_213756610d484e4aa62133e29b3ae175",
+ "placeholder": "",
+ "style": "IPY_MODEL_fc26f5b31dc748879103fa2353d3c56a",
+ "value": "Downloading (…)olve/main/merges.txt: 100%"
+ }
+ },
+ "51cb4776841f4dbfbd62b69d9342de11": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a26e5ed71db540cfb70fea5b82284949",
+ "max": 456356,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_ed5f6f2a8e6b41ab85a62e20fb8f94e1",
+ "value": 456356
+ }
+ },
+ "06bf5a2919bb44ab9a495ec3a53031d5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_894efc053e4a4c05bd9e3a331abdd14b",
+ "placeholder": "",
+ "style": "IPY_MODEL_0ff6b15e251c4863a57a5e0501b7ae70",
+ "value": " 456k/456k [00:00<00:00, 1.59MB/s]"
+ }
+ },
+ "70d344c4fb6141c89a6eb8d1c2002782": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "213756610d484e4aa62133e29b3ae175": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fc26f5b31dc748879103fa2353d3c56a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a26e5ed71db540cfb70fea5b82284949": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ed5f6f2a8e6b41ab85a62e20fb8f94e1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "894efc053e4a4c05bd9e3a331abdd14b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0ff6b15e251c4863a57a5e0501b7ae70": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "3f398257b98e45d994b0f26576b96a11": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_fcb2f85a9dc74b829196de8694f3c48b",
+ "IPY_MODEL_563c7fd74658459694d97b52ecaafbc0",
+ "IPY_MODEL_f87a2ec827e74a73a5d45f0dc1877f6d"
+ ],
+ "layout": "IPY_MODEL_0b7b6392ba904c60bbf74e2e48653429"
+ }
+ },
+ "fcb2f85a9dc74b829196de8694f3c48b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_24c4a9954d3f4705b001797088ae8be5",
+ "placeholder": "",
+ "style": "IPY_MODEL_d28a4282298d459dac788e9e33139ae3",
+ "value": "Downloading (…)/main/tokenizer.json: 100%"
+ }
+ },
+ "563c7fd74658459694d97b52ecaafbc0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2097d6565bca4e5a9fc8d2fac4db260b",
+ "max": 1356697,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_9cb7dfcf1c7044ea9696bcdf39ae572d",
+ "value": 1356697
+ }
+ },
+ "f87a2ec827e74a73a5d45f0dc1877f6d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b092d85c45a94eab816f5d4577481f0f",
+ "placeholder": "",
+ "style": "IPY_MODEL_9ee73a32f314438f8c3fa577c8c33766",
+ "value": " 1.36M/1.36M [00:00<00:00, 5.49MB/s]"
+ }
+ },
+ "0b7b6392ba904c60bbf74e2e48653429": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "24c4a9954d3f4705b001797088ae8be5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d28a4282298d459dac788e9e33139ae3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "2097d6565bca4e5a9fc8d2fac4db260b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9cb7dfcf1c7044ea9696bcdf39ae572d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "b092d85c45a94eab816f5d4577481f0f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9ee73a32f314438f8c3fa577c8c33766": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d15e540c70f4468e99cc2236ae21f273": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_0f91b7bcf82546d196ecdab4aa3d7845",
+ "IPY_MODEL_667c4ef7bad844c4af9721573fd8043e",
+ "IPY_MODEL_3ee1f0d8ba424166a3b280f64ed9c1d6"
+ ],
+ "layout": "IPY_MODEL_1e0f2c349d314914b5e130b21b7cb0d0"
+ }
+ },
+ "0f91b7bcf82546d196ecdab4aa3d7845": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c67a8e56474f4e5f91124e22e5da1673",
+ "placeholder": "",
+ "style": "IPY_MODEL_70208313614f44fab1986624409ffd72",
+ "value": "Downloading (…)in/added_tokens.json: 100%"
+ }
+ },
+ "667c4ef7bad844c4af9721573fd8043e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_75b3a2c5447a4e3998e4c97b0fb71c45",
+ "max": 123,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_c260660884084316bf1b2fde69077f3f",
+ "value": 123
+ }
+ },
+ "3ee1f0d8ba424166a3b280f64ed9c1d6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d7f7cf95d90d439abe22644a918ba9e8",
+ "placeholder": "",
+ "style": "IPY_MODEL_0723401924eb45bfbcef28a0eb698403",
+ "value": " 123/123 [00:00<00:00, 11.4kB/s]"
+ }
+ },
+ "1e0f2c349d314914b5e130b21b7cb0d0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c67a8e56474f4e5f91124e22e5da1673": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "70208313614f44fab1986624409ffd72": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "75b3a2c5447a4e3998e4c97b0fb71c45": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c260660884084316bf1b2fde69077f3f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "d7f7cf95d90d439abe22644a918ba9e8": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0723401924eb45bfbcef28a0eb698403": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "e5b772b2415044b3a5d7affecfd27da2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b507b582dc134e41892f8872519216e8",
+ "IPY_MODEL_8a73d20d93f8418c9d025ac6dd1303f0",
+ "IPY_MODEL_a21d310bb53c4721b28acb922b662d9f"
+ ],
+ "layout": "IPY_MODEL_aab727e6a3c6439787f8ef484ed68b50"
+ }
+ },
+ "b507b582dc134e41892f8872519216e8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_cc30bc56fffb497db3a2b45b4eee8f49",
+ "placeholder": "",
+ "style": "IPY_MODEL_d1fe2c5f5d7a4f548b416bc881e1595d",
+ "value": "Downloading (…)cial_tokens_map.json: 100%"
+ }
+ },
+ "8a73d20d93f8418c9d025ac6dd1303f0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e132194d187e47fc89a0af9ad6e89217",
+ "max": 344,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_774cff1c76704faea7d9c86324e41ca7",
+ "value": 344
+ }
+ },
+ "a21d310bb53c4721b28acb922b662d9f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d14e3ffc451c45fabc6da24981bb71a4",
+ "placeholder": "",
+ "style": "IPY_MODEL_c33fe6c9077f4517b800f6dd38d50114",
+ "value": " 344/344 [00:00<00:00, 27.4kB/s]"
+ }
+ },
+ "aab727e6a3c6439787f8ef484ed68b50": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cc30bc56fffb497db3a2b45b4eee8f49": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d1fe2c5f5d7a4f548b416bc881e1595d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "e132194d187e47fc89a0af9ad6e89217": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "774cff1c76704faea7d9c86324e41ca7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "d14e3ffc451c45fabc6da24981bb71a4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c33fe6c9077f4517b800f6dd38d50114": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "526a8c8ddbdd4546bc6f82af5fb36c21": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6085482c24ed4e3fbd6fc481391bfa74",
+ "IPY_MODEL_353b89fc4dc54f56b245582a99ced712",
+ "IPY_MODEL_995ba2e88c1c41ce834dc9095e00d73f"
+ ],
+ "layout": "IPY_MODEL_013bbd8ac6774224abc5e122234e99f1"
+ }
+ },
+ "6085482c24ed4e3fbd6fc481391bfa74": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_98a6f77f778e48a9b147cbeddb54dff3",
+ "placeholder": "",
+ "style": "IPY_MODEL_a0bb322eabda46e08b9b2a696a6bee10",
+ "value": "Downloading (…)lve/main/config.json: 100%"
+ }
+ },
+ "353b89fc4dc54f56b245582a99ced712": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_05038873795c450397aaaf681164f1c7",
+ "max": 1421,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_e2b4ac2abd4148009c66bc6b828424e9",
+ "value": 1421
+ }
+ },
+ "995ba2e88c1c41ce834dc9095e00d73f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ec0a3f632ed04a5889781ff18f12ca68",
+ "placeholder": "",
+ "style": "IPY_MODEL_2add321b36f44f92bad137d4f84e754c",
+ "value": " 1.42k/1.42k [00:00<00:00, 123kB/s]"
+ }
+ },
+ "013bbd8ac6774224abc5e122234e99f1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "98a6f77f778e48a9b147cbeddb54dff3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a0bb322eabda46e08b9b2a696a6bee10": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "05038873795c450397aaaf681164f1c7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e2b4ac2abd4148009c66bc6b828424e9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "ec0a3f632ed04a5889781ff18f12ca68": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2add321b36f44f92bad137d4f84e754c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "93595250100c4a738aafaa667088343d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_323e830c365c42a4a6c18a3a144fd7da",
+ "IPY_MODEL_cea7304b4312486682aef9b4fcce6408",
+ "IPY_MODEL_44df9cd6b9aa4dbcad864fc70e129c0c"
+ ],
+ "layout": "IPY_MODEL_23155f50a29340719fe178aaa61929b1"
+ }
+ },
+ "323e830c365c42a4a6c18a3a144fd7da": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7f92c9784fdc447b85cf21c6b90400a6",
+ "placeholder": "",
+ "style": "IPY_MODEL_1159130dc01c480c910337e64cafc40b",
+ "value": "Downloading model.safetensors: 100%"
+ }
+ },
+ "cea7304b4312486682aef9b4fcce6408": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_ebfa740da3fe467a83d8e37c74ff788c",
+ "max": 1625455696,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_f1ac869947cd40eb9ca46aa0e60357ee",
+ "value": 1625455696
+ }
+ },
+ "44df9cd6b9aa4dbcad864fc70e129c0c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0a367d8cb34a464bace1e627cf79e93a",
+ "placeholder": "",
+ "style": "IPY_MODEL_7ad5fc0f649d4400a388494cc4aa2e19",
+ "value": " 1.63G/1.63G [00:04<00:00, 323MB/s]"
+ }
+ },
+ "23155f50a29340719fe178aaa61929b1": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7f92c9784fdc447b85cf21c6b90400a6": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1159130dc01c480c910337e64cafc40b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ebfa740da3fe467a83d8e37c74ff788c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1ac869947cd40eb9ca46aa0e60357ee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "0a367d8cb34a464bace1e627cf79e93a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7ad5fc0f649d4400a388494cc4aa2e19": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ }
+ }
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "iMH4cIE86FRi",
+ "outputId": "6a9c2555-6ed8-4eb9-d485-366309683cd1"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Mounted at /content/drive/\n"
+ ]
+ }
+ ],
+ "source": [
+ "from google.colab import drive\n",
+ "\n",
+ "drive.mount('/content/drive/')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip3 install tokenizers wandb sentencepiece"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "FBTlwAz07qvf",
+ "outputId": "50a1b277-9795-4982-dcf2-def7684bd85a"
+ },
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting tokenizers\n",
+ " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m30.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting wandb\n",
+ " Downloading wandb-0.15.8-py3-none-any.whl (2.1 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m74.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting sentencepiece\n",
+ " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m80.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: Click!=8.0.0,>=7.1 in /usr/local/lib/python3.10/dist-packages (from wandb) (8.1.6)\n",
+ "Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)\n",
+ " Downloading GitPython-3.1.32-py3-none-any.whl (188 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m188.5/188.5 kB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: requests<3,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (2.27.1)\n",
+ "Requirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (5.9.5)\n",
+ "Collecting sentry-sdk>=1.0.0 (from wandb)\n",
+ " Downloading sentry_sdk-1.29.2-py2.py3-none-any.whl (215 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m215.6/215.6 kB\u001b[0m \u001b[31m27.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting docker-pycreds>=0.4.0 (from wandb)\n",
+ " Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
+ "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from wandb) (6.0.1)\n",
+ "Collecting pathtools (from wandb)\n",
+ " Downloading pathtools-0.1.2.tar.gz (11 kB)\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting setproctitle (from wandb)\n",
+ " Downloading setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from wandb) (67.7.2)\n",
+ "Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.10/dist-packages (from wandb) (1.4.4)\n",
+ "Requirement already satisfied: protobuf!=4.21.0,<5,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (3.20.3)\n",
+ "Requirement already satisfied: six>=1.4.0 in /usr/local/lib/python3.10/dist-packages (from docker-pycreds>=0.4.0->wandb) (1.16.0)\n",
+ "Collecting gitdb<5,>=4.0.1 (from GitPython!=3.1.29,>=1.0.0->wandb)\n",
+ " Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.0.0->wandb) (1.26.16)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.0.0->wandb) (2023.7.22)\n",
+ "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.0.0->wandb) (2.0.12)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.0.0->wandb) (3.4)\n",
+ "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb)\n",
+ " Downloading smmap-5.0.0-py3-none-any.whl (24 kB)\n",
+ "Building wheels for collected packages: pathtools\n",
+ " Building wheel for pathtools (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for pathtools: filename=pathtools-0.1.2-py3-none-any.whl size=8791 sha256=648f0855f7aaf7670ab3aeeaff7158e0e72aa1d71d84a8ddba38d3f6202b764f\n",
+ " Stored in directory: /root/.cache/pip/wheels/e7/f3/22/152153d6eb222ee7a56ff8617d80ee5207207a8c00a7aab794\n",
+ "Successfully built pathtools\n",
+ "Installing collected packages: tokenizers, sentencepiece, pathtools, smmap, setproctitle, sentry-sdk, docker-pycreds, gitdb, GitPython, wandb\n",
+ "Successfully installed GitPython-3.1.32 docker-pycreds-0.4.0 gitdb-4.0.10 pathtools-0.1.2 sentencepiece-0.1.99 sentry-sdk-1.29.2 setproctitle-1.3.2 smmap-5.0.0 tokenizers-0.13.3 wandb-0.15.8\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip3 install transformers wikipedia newspaper3k GoogleNews pyvis huggingface-hub"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "df_3VUzL7vEB",
+ "outputId": "c0c383a0-e358-4973-f88b-a672b0b9bcae"
+ },
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting transformers\n",
+ " Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m28.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting wikipedia\n",
+ " Downloading wikipedia-1.4.0.tar.gz (27 kB)\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting newspaper3k\n",
+ " Downloading newspaper3k-0.2.8-py3-none-any.whl (211 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.1/211.1 kB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting GoogleNews\n",
+ " Downloading GoogleNews-1.6.8-py3-none-any.whl (8.1 kB)\n",
+ "Collecting pyvis\n",
+ " Downloading pyvis-0.3.2-py3-none-any.whl (756 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m756.0/756.0 kB\u001b[0m \u001b[31m48.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting huggingface-hub\n",
+ " Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m31.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n",
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n",
+ "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n",
+ "Collecting safetensors>=0.3.1 (from transformers)\n",
+ " Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m60.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n",
+ "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from wikipedia) (4.11.2)\n",
+ "Requirement already satisfied: Pillow>=3.3.0 in /usr/local/lib/python3.10/dist-packages (from newspaper3k) (9.4.0)\n",
+ "Collecting cssselect>=0.9.2 (from newspaper3k)\n",
+ " Downloading cssselect-1.2.0-py2.py3-none-any.whl (18 kB)\n",
+ "Requirement already satisfied: lxml>=3.6.0 in /usr/local/lib/python3.10/dist-packages (from newspaper3k) (4.9.3)\n",
+ "Requirement already satisfied: nltk>=3.2.1 in /usr/local/lib/python3.10/dist-packages (from newspaper3k) (3.8.1)\n",
+ "Collecting feedparser>=5.2.1 (from newspaper3k)\n",
+ " Downloading feedparser-6.0.10-py3-none-any.whl (81 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.1/81.1 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting tldextract>=2.0.1 (from newspaper3k)\n",
+ " Downloading tldextract-3.4.4-py3-none-any.whl (93 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.3/93.3 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting feedfinder2>=0.0.4 (from newspaper3k)\n",
+ " Downloading feedfinder2-0.0.4.tar.gz (3.3 kB)\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting jieba3k>=0.35.1 (from newspaper3k)\n",
+ " Downloading jieba3k-0.35.1.zip (7.4 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m99.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.10/dist-packages (from newspaper3k) (2.8.2)\n",
+ "Collecting tinysegmenter==0.3 (from newspaper3k)\n",
+ " Downloading tinysegmenter-0.3.tar.gz (16 kB)\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting dateparser (from GoogleNews)\n",
+ " Downloading dateparser-1.1.8-py2.py3-none-any.whl (293 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.8/293.8 kB\u001b[0m \u001b[31m34.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: ipython>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from pyvis) (7.34.0)\n",
+ "Requirement already satisfied: jinja2>=2.9.6 in /usr/local/lib/python3.10/dist-packages (from pyvis) (3.1.2)\n",
+ "Requirement already satisfied: jsonpickle>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from pyvis) (3.0.1)\n",
+ "Requirement already satisfied: networkx>=1.11 in /usr/local/lib/python3.10/dist-packages (from pyvis) (3.1)\n",
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (2023.6.0)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (4.7.1)\n",
+ "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->wikipedia) (2.4.1)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from feedfinder2>=0.0.4->newspaper3k) (1.16.0)\n",
+ "Collecting sgmllib3k (from feedparser>=5.2.1->newspaper3k)\n",
+ " Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.3.0->pyvis) (67.7.2)\n",
+ "Collecting jedi>=0.16 (from ipython>=5.3.0->pyvis)\n",
+ " Downloading jedi-0.19.0-py2.py3-none-any.whl (1.6 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m86.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython>=5.3.0->pyvis) (4.4.2)\n",
+ "Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=5.3.0->pyvis) (0.7.5)\n",
+ "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.3.0->pyvis) (5.7.1)\n",
+ "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.3.0->pyvis) (3.0.39)\n",
+ "Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from ipython>=5.3.0->pyvis) (2.14.0)\n",
+ "Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=5.3.0->pyvis) (0.2.0)\n",
+ "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=5.3.0->pyvis) (0.1.6)\n",
+ "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.3.0->pyvis) (4.8.0)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2>=2.9.6->pyvis) (2.1.3)\n",
+ "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>=3.2.1->newspaper3k) (8.1.6)\n",
+ "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>=3.2.1->newspaper3k) (1.3.1)\n",
+ "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n",
+ "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
+ "Collecting requests-file>=1.4 (from tldextract>=2.0.1->newspaper3k)\n",
+ " Downloading requests_file-1.5.1-py2.py3-none-any.whl (3.7 kB)\n",
+ "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from dateparser->GoogleNews) (2022.7.1)\n",
+ "Requirement already satisfied: tzlocal in /usr/local/lib/python3.10/dist-packages (from dateparser->GoogleNews) (5.0.1)\n",
+ "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=5.3.0->pyvis) (0.8.3)\n",
+ "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=5.3.0->pyvis) (0.7.0)\n",
+ "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->pyvis) (0.2.6)\n",
+ "Building wheels for collected packages: wikipedia, tinysegmenter, feedfinder2, jieba3k, sgmllib3k\n",
+ " Building wheel for wikipedia (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11680 sha256=5f95f7b897002560af12b6daf0da12ddee9a36a6c93101f94ad5eb75cb9875ad\n",
+ " Stored in directory: /root/.cache/pip/wheels/5e/b6/c5/93f3dec388ae76edc830cb42901bb0232504dfc0df02fc50de\n",
+ " Building wheel for tinysegmenter (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for tinysegmenter: filename=tinysegmenter-0.3-py3-none-any.whl size=13539 sha256=8d62524612e31b509a54305b8e0e104ab32af54eb5ad901062d4d9533a50d563\n",
+ " Stored in directory: /root/.cache/pip/wheels/c8/d6/6c/384f58df48c00b9a31d638005143b5b3ac62c3d25fb1447f23\n",
+ " Building wheel for feedfinder2 (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for feedfinder2: filename=feedfinder2-0.0.4-py3-none-any.whl size=3339 sha256=8a1d05e2c4435d58c49f5203b127482bbc7c95e94292838f7f447b578c12f87c\n",
+ " Stored in directory: /root/.cache/pip/wheels/97/02/e7/a1ff1760e12bdbaab0ac824fae5c1bc933e41c4ccd6a8f8edb\n",
+ " Building wheel for jieba3k (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for jieba3k: filename=jieba3k-0.35.1-py3-none-any.whl size=7398380 sha256=0174623e56e9194e3b4d88b4798130bf053a505dd8f696d2c9ab57045db6cdbc\n",
+ " Stored in directory: /root/.cache/pip/wheels/7a/c4/0c/12a9a314ecac499456c4c3b2fcc2f635a3b45a39dfbd240299\n",
+ " Building wheel for sgmllib3k (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6047 sha256=e3f33fb1fec62ee525f7778f73a7cfca81a04c1dd07f58208a05cedb7b075ead\n",
+ " Stored in directory: /root/.cache/pip/wheels/f0/69/93/a47e9d621be168e9e33c7ce60524393c0b92ae83cf6c6e89c5\n",
+ "Successfully built wikipedia tinysegmenter feedfinder2 jieba3k sgmllib3k\n",
+ "Installing collected packages: tinysegmenter, sgmllib3k, safetensors, jieba3k, jedi, feedparser, cssselect, wikipedia, requests-file, huggingface-hub, feedfinder2, dateparser, transformers, tldextract, pyvis, GoogleNews, newspaper3k\n",
+ "Successfully installed GoogleNews-1.6.8 cssselect-1.2.0 dateparser-1.1.8 feedfinder2-0.0.4 feedparser-6.0.10 huggingface-hub-0.16.4 jedi-0.19.0 jieba3k-0.35.1 newspaper3k-0.2.8 pyvis-0.3.2 requests-file-1.5.1 safetensors-0.3.1 sgmllib3k-1.0.0 tinysegmenter-0.3 tldextract-3.4.4 transformers-4.31.0 wikipedia-1.4.0\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip3 install datasets"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "gsxwyby7AUgC",
+ "outputId": "7f428bec-39c8-471c-bd20-850af08a7304"
+ },
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting datasets\n",
+ " Downloading datasets-2.14.3-py3-none-any.whl (519 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.1/519.1 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.22.4)\n",
+ "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n",
+ "Collecting dill<0.3.8,>=0.3.0 (from datasets)\n",
+ " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n",
+ "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.27.1)\n",
+ "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.65.0)\n",
+ "Collecting xxhash (from datasets)\n",
+ " Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting multiprocess (from datasets)\n",
+ " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n",
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.8.5)\n",
+ "Requirement already satisfied: huggingface-hub<1.0.0,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.16.4)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.1)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n",
+ "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.0.12)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n",
+ "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.2)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.2)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.2)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.7.1)\n",
+ "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (1.26.16)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2023.7.22)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.4)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2022.7.1)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n",
+ "Installing collected packages: xxhash, dill, multiprocess, datasets\n",
+ "Successfully installed datasets-2.14.3 dill-0.3.7 multiprocess-0.70.15 xxhash-3.3.0\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip3 install bertviz"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "GxXUgy_g7qlK",
+ "outputId": "11742c1c-4adb-49b8-88b5-a4b35240725f"
+ },
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting bertviz\n",
+ " Downloading bertviz-1.4.0-py3-none-any.whl (157 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m157.6/157.6 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: transformers>=2.0 in /usr/local/lib/python3.10/dist-packages (from bertviz) (4.31.0)\n",
+ "Requirement already satisfied: torch>=1.0 in /usr/local/lib/python3.10/dist-packages (from bertviz) (2.0.1+cu118)\n",
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from bertviz) (4.65.0)\n",
+ "Collecting boto3 (from bertviz)\n",
+ " Downloading boto3-1.28.19-py3-none-any.whl (135 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.8/135.8 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from bertviz) (2.27.1)\n",
+ "Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from bertviz) (2022.10.31)\n",
+ "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from bertviz) (0.1.99)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.0->bertviz) (3.12.2)\n",
+ "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.0->bertviz) (4.7.1)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.0->bertviz) (1.11.1)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.0->bertviz) (3.1)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.0->bertviz) (3.1.2)\n",
+ "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.0->bertviz) (2.0.0)\n",
+ "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.0->bertviz) (3.25.2)\n",
+ "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.0->bertviz) (16.0.6)\n",
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=2.0->bertviz) (0.16.4)\n",
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=2.0->bertviz) (1.22.4)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers>=2.0->bertviz) (23.1)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=2.0->bertviz) (6.0.1)\n",
+ "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=2.0->bertviz) (0.13.3)\n",
+ "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=2.0->bertviz) (0.3.1)\n",
+ "Collecting botocore<1.32.0,>=1.31.19 (from boto3->bertviz)\n",
+ " Downloading botocore-1.31.19-py3-none-any.whl (11.1 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.1/11.1 MB\u001b[0m \u001b[31m43.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3->bertviz)\n",
+ " Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n",
+ "Collecting s3transfer<0.7.0,>=0.6.0 (from boto3->bertviz)\n",
+ " Downloading s3transfer-0.6.1-py3-none-any.whl (79 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.8/79.8 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->bertviz) (1.26.16)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->bertviz) (2023.7.22)\n",
+ "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->bertviz) (2.0.12)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->bertviz) (3.4)\n",
+ "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.10/dist-packages (from botocore<1.32.0,>=1.31.19->boto3->bertviz) (2.8.2)\n",
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers>=2.0->bertviz) (2023.6.0)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.0->bertviz) (2.1.3)\n",
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.0->bertviz) (1.3.0)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.32.0,>=1.31.19->boto3->bertviz) (1.16.0)\n",
+ "Installing collected packages: jmespath, botocore, s3transfer, boto3, bertviz\n",
+ "Successfully installed bertviz-1.4.0 boto3-1.28.19 botocore-1.31.19 jmespath-1.0.1 s3transfer-0.6.1\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import os\n",
+ "os.chdir(\"drive/\")\n",
+ "os.chdir('My Drive')\n",
+ "os.chdir('Experiment')"
+ ],
+ "metadata": {
+ "id": "D613nlta7wzS"
+ },
+ "execution_count": 6,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "OUTPUT_DIR = './transformer-anatomy-outputs/'\n",
+ "if not os.path.exists(OUTPUT_DIR):\n",
+ " os.makedirs(OUTPUT_DIR)"
+ ],
+ "metadata": {
+ "id": "GaCLZWGL7zzs"
+ },
+ "execution_count": 7,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "gpu_info = !nvidia-smi\n",
+ "gpu_info = '\\n'.join(gpu_info)\n",
+ "if gpu_info.find('failed') >= 0:\n",
+ " print('Not connected to a GPU')\n",
+ "else:\n",
+ " print(gpu_info)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "R2MZpfIR8ZUM",
+ "outputId": "757c27a7-dbeb-4684-ce1c-663bd5c0dd71"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Fri Aug 4 11:58:25 2023 \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 525.105.17 Driver Version: 525.105.17 CUDA Version: 12.0 |\n",
+ "|-------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|===============================+======================+======================|\n",
+ "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
+ "| N/A 39C P8 9W / 70W | 0MiB / 15360MiB | 0% Default |\n",
+ "| | | N/A |\n",
+ "+-------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=============================================================================|\n",
+ "| No running processes found |\n",
+ "+-----------------------------------------------------------------------------+\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Library"
+ ],
+ "metadata": {
+ "id": "xcOLNW9gVzSF"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# ====================================================\n",
+ "# Library\n",
+ "# ====================================================\n",
+ "import os\n",
+ "import gc\n",
+ "import re\n",
+ "import ast\n",
+ "import sys\n",
+ "import copy\n",
+ "import json\n",
+ "import time\n",
+ "import math\n",
+ "from math import sqrt\n",
+ "import shutil\n",
+ "import string\n",
+ "import pickle\n",
+ "import random\n",
+ "import joblib\n",
+ "import itertools\n",
+ "import logging\n",
+ "from pathlib import Path\n",
+ "import warnings\n",
+ "warnings.filterwarnings(\"ignore\")\n",
+ "\n",
+ "import scipy as sp\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "pd.set_option('display.max_rows', 500)\n",
+ "pd.set_option('display.max_columns', 500)\n",
+ "pd.set_option('display.width', 1000)\n",
+ "from tqdm.auto import tqdm\n",
+ "from sklearn.metrics import f1_score\n",
+ "from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold\n",
+ "from sklearn.preprocessing import LabelEncoder\n",
+ "from sklearn.model_selection import StratifiedGroupKFold\n",
+ "\n",
+ "import torch\n",
+ "print(f\"torch.__version__: {torch.__version__}\")\n",
+ "import torch.nn as nn\n",
+ "from torch.nn import Parameter\n",
+ "import torch.nn.functional as F\n",
+ "from torch.optim import Adam, SGD, AdamW\n",
+ "from torch.utils.data import DataLoader, Dataset\n",
+ "import torch.cuda.amp as amp\n",
+ "\n",
+ "import tokenizers\n",
+ "import transformers\n",
+ "print(f\"tokenizers.__version__: {tokenizers.__version__}\")\n",
+ "print(f\"transformers.__version__: {transformers.__version__}\")\n",
+ "from transformers import AutoTokenizer, AutoModel, AutoConfig, AutoModelForSeq2SeqLM\n",
+ "from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup\n",
+ "\n",
+ "import datasets\n",
+ "import huggingface_hub\n",
+ "import matplotlib.font_manager as font_manager\n",
+ "import matplotlib.pyplot as plt\n",
+ "from IPython.display import set_matplotlib_formats\n",
+ "\n",
+ "import wikipedia\n",
+ "from newspaper import Article, ArticleException\n",
+ "from GoogleNews import GoogleNews\n",
+ "import IPython\n",
+ "from pyvis.network import Network\n",
+ "\n",
+ "from bertviz.transformers_neuron_view import BertModel\n",
+ "from bertviz.neuron_view import show\n",
+ "\n",
+ "%env TOKENIZERS_PARALLELISM=true\n",
+ "\n",
+ "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "H8dPVpoF8qXy",
+ "outputId": "5ba49d09-b143-418c-ca39-a2564b3252a9"
+ },
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "torch.__version__: 2.0.1+cu118\n",
+ "tokenizers.__version__: 0.13.3\n",
+ "transformers.__version__: 4.31.0\n",
+ "env: TOKENIZERS_PARALLELISM=true\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Load model and tokenizer"
+ ],
+ "metadata": {
+ "id": "XeA9XZMsWTUy"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Load model and tokenizer\n",
+ "tokenizer = AutoTokenizer.from_pretrained(\"Babelscape/rebel-large\")\n",
+ "model = AutoModelForSeq2SeqLM.from_pretrained(\"Babelscape/rebel-large\")"
+ ],
+ "metadata": {
+ "id": "ARcvAmTgNpKb",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 273,
+ "referenced_widgets": [
+ "dc6961e0853d4ffaa98e53b4460dd1c4",
+ "ba2b3aceabac4a0892d463c46c52f1e1",
+ "6d92cd90a0cf48549f689cf4968e053e",
+ "74a44928b17d4580b5ce4bd290b9fdf2",
+ "b3180f6272f246089bd97ec79c5393df",
+ "6592b210e8e440559eeaf820b789831c",
+ "67353cfd3f2347428050c05ab437111a",
+ "1d751558bcd342058dc576269c9f3218",
+ "d96f821ee7bb4c5c8a9285abe09a15db",
+ "644c7bcd501d40dd90a4dbf15bcfca40",
+ "70bfb1dbf1134e31b4908fde482149f7",
+ "142c6e961b924cb7a9558d26c21ee4d4",
+ "eecba992f0aa4cdfb192cc69f70d05f6",
+ "9246edaf57a7426eb09807bc1764c5c3",
+ "eacd76ddbefe44eda8af16b05e75b76c",
+ "3fd5679f65dc4d8cb7c0e1a0319a62bd",
+ "28b107e08c714f41a51420c89a3b9c06",
+ "d2250bcf4ff04d76998508f2f48b3354",
+ "d1189d55310f401ca10991dd56e3f236",
+ "9daa4a2e54d44cdab78ca611e40932c7",
+ "51667eec4cad49789c5ea544d173d9a8",
+ "1fca4ddcf05c427aa13598d3d7362446",
+ "2bf61c728f764c6abb1635566b3644d7",
+ "dcfd7c8c4b654fb6a416b45795df34ff",
+ "51cb4776841f4dbfbd62b69d9342de11",
+ "06bf5a2919bb44ab9a495ec3a53031d5",
+ "70d344c4fb6141c89a6eb8d1c2002782",
+ "213756610d484e4aa62133e29b3ae175",
+ "fc26f5b31dc748879103fa2353d3c56a",
+ "a26e5ed71db540cfb70fea5b82284949",
+ "ed5f6f2a8e6b41ab85a62e20fb8f94e1",
+ "894efc053e4a4c05bd9e3a331abdd14b",
+ "0ff6b15e251c4863a57a5e0501b7ae70",
+ "3f398257b98e45d994b0f26576b96a11",
+ "fcb2f85a9dc74b829196de8694f3c48b",
+ "563c7fd74658459694d97b52ecaafbc0",
+ "f87a2ec827e74a73a5d45f0dc1877f6d",
+ "0b7b6392ba904c60bbf74e2e48653429",
+ "24c4a9954d3f4705b001797088ae8be5",
+ "d28a4282298d459dac788e9e33139ae3",
+ "2097d6565bca4e5a9fc8d2fac4db260b",
+ "9cb7dfcf1c7044ea9696bcdf39ae572d",
+ "b092d85c45a94eab816f5d4577481f0f",
+ "9ee73a32f314438f8c3fa577c8c33766",
+ "d15e540c70f4468e99cc2236ae21f273",
+ "0f91b7bcf82546d196ecdab4aa3d7845",
+ "667c4ef7bad844c4af9721573fd8043e",
+ "3ee1f0d8ba424166a3b280f64ed9c1d6",
+ "1e0f2c349d314914b5e130b21b7cb0d0",
+ "c67a8e56474f4e5f91124e22e5da1673",
+ "70208313614f44fab1986624409ffd72",
+ "75b3a2c5447a4e3998e4c97b0fb71c45",
+ "c260660884084316bf1b2fde69077f3f",
+ "d7f7cf95d90d439abe22644a918ba9e8",
+ "0723401924eb45bfbcef28a0eb698403",
+ "e5b772b2415044b3a5d7affecfd27da2",
+ "b507b582dc134e41892f8872519216e8",
+ "8a73d20d93f8418c9d025ac6dd1303f0",
+ "a21d310bb53c4721b28acb922b662d9f",
+ "aab727e6a3c6439787f8ef484ed68b50",
+ "cc30bc56fffb497db3a2b45b4eee8f49",
+ "d1fe2c5f5d7a4f548b416bc881e1595d",
+ "e132194d187e47fc89a0af9ad6e89217",
+ "774cff1c76704faea7d9c86324e41ca7",
+ "d14e3ffc451c45fabc6da24981bb71a4",
+ "c33fe6c9077f4517b800f6dd38d50114",
+ "526a8c8ddbdd4546bc6f82af5fb36c21",
+ "6085482c24ed4e3fbd6fc481391bfa74",
+ "353b89fc4dc54f56b245582a99ced712",
+ "995ba2e88c1c41ce834dc9095e00d73f",
+ "013bbd8ac6774224abc5e122234e99f1",
+ "98a6f77f778e48a9b147cbeddb54dff3",
+ "a0bb322eabda46e08b9b2a696a6bee10",
+ "05038873795c450397aaaf681164f1c7",
+ "e2b4ac2abd4148009c66bc6b828424e9",
+ "ec0a3f632ed04a5889781ff18f12ca68",
+ "2add321b36f44f92bad137d4f84e754c",
+ "93595250100c4a738aafaa667088343d",
+ "323e830c365c42a4a6c18a3a144fd7da",
+ "cea7304b4312486682aef9b4fcce6408",
+ "44df9cd6b9aa4dbcad864fc70e129c0c",
+ "23155f50a29340719fe178aaa61929b1",
+ "7f92c9784fdc447b85cf21c6b90400a6",
+ "1159130dc01c480c910337e64cafc40b",
+ "ebfa740da3fe467a83d8e37c74ff788c",
+ "f1ac869947cd40eb9ca46aa0e60357ee",
+ "0a367d8cb34a464bace1e627cf79e93a",
+ "7ad5fc0f649d4400a388494cc4aa2e19"
+ ]
+ },
+ "outputId": "77dd16bf-588b-4c4b-94d3-047d8e048df4"
+ },
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading (…)okenizer_config.json: 0%| | 0.00/1.23k [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "dc6961e0853d4ffaa98e53b4460dd1c4"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading (…)olve/main/vocab.json: 0%| | 0.00/798k [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "142c6e961b924cb7a9558d26c21ee4d4"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading (…)olve/main/merges.txt: 0%| | 0.00/456k [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "2bf61c728f764c6abb1635566b3644d7"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading (…)/main/tokenizer.json: 0%| | 0.00/1.36M [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "3f398257b98e45d994b0f26576b96a11"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading (…)in/added_tokens.json: 0%| | 0.00/123 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "d15e540c70f4468e99cc2236ae21f273"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading (…)cial_tokens_map.json: 0%| | 0.00/344 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "e5b772b2415044b3a5d7affecfd27da2"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading (…)lve/main/config.json: 0%| | 0.00/1.42k [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "526a8c8ddbdd4546bc6f82af5fb36c21"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Downloading model.safetensors: 0%| | 0.00/1.63G [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "93595250100c4a738aafaa667088343d"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## From short text to Knowledge Base"
+ ],
+ "metadata": {
+ "id": "Uep9V0oqWdyK"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def extract_relations_from_model_output(text):\n",
+ " relations = []\n",
+ " relation, subject, relation, object_ = '', '', '', ''\n",
+ " text = text.strip()\n",
+ " current = 'x'\n",
+ " text_replaced = text.replace(\"\", \"\").replace(\"\", \"\").replace(\"\", \"\")\n",
+ " for token in text_replaced.split():\n",
+ " if token == \"\":\n",
+ " current = 't'\n",
+ " if relation != '':\n",
+ " relations.append({\n",
+ " 'head': subject.strip(),\n",
+ " 'type': relation.strip(),\n",
+ " 'tail': object_.strip()\n",
+ " })\n",
+ " relation = ''\n",
+ " subject = ''\n",
+ " elif token == \"\":\n",
+ " current = 's'\n",
+ " if relation != '':\n",
+ " relations.append({\n",
+ " 'head': subject.strip(),\n",
+ " 'type': relation.strip(),\n",
+ " 'tail': object_.strip()\n",
+ " })\n",
+ " object_ = ''\n",
+ " elif token == \"\":\n",
+ " current = 'o'\n",
+ " relation = ''\n",
+ " else:\n",
+ " if current == 't':\n",
+ " subject += ' ' + token\n",
+ " elif current == 's':\n",
+ " object_ += ' ' + token\n",
+ " elif current == 'o':\n",
+ " relation += ' ' + token\n",
+ " if subject != '' and relation != '' and object_ != '':\n",
+ " relations.append({\n",
+ " 'head': subject.strip(),\n",
+ " 'type': relation.strip(),\n",
+ " 'tail': object_.strip()\n",
+ " })\n",
+ " return relations"
+ ],
+ "metadata": {
+ "id": "vzZmJ7NnWZNQ"
+ },
+ "execution_count": 12,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "class KB():\n",
+ " def __init__(self):\n",
+ " self.entities = {} # { entity_title: {...} }\n",
+ " self.relations = [] # [ head: entity_title, type: ..., tail: entity_title,\n",
+ " # meta: { article_url: { spans: [...] } } ]\n",
+ " self.sources = {} # { article_url: {...} }\n",
+ "\n",
+ " def are_relations_equal(self, r1, r2):\n",
+ " return all(r1[attr] == r2[attr] for attr in [\"head\", \"type\", \"tail\"])\n",
+ "\n",
+ " def exists_relation(self, r1):\n",
+ " return any(self.are_relations_equal(r1, r2) for r2 in self.relations)\n",
+ "\n",
+ "\n",
+ " def merge_with_kb(self, kb2):\n",
+ " for r in kb2.relations:\n",
+ " article_url = list(r[\"meta\"].keys())[0]\n",
+ " source_data = kb2.sources[article_url]\n",
+ " self.add_relation(r, source_data[\"article_title\"],\n",
+ " source_data[\"article_publish_date\"])\n",
+ "\n",
+ "\n",
+ " def merge_relations(self, r2):\n",
+ " r1 = [r for r in self.relations\n",
+ " if self.are_relations_equal(r2, r)][0]\n",
+ "\n",
+ " # if different article\n",
+ " article_url = list(r2[\"meta\"].keys())[0]\n",
+ " if article_url not in r1[\"meta\"]:\n",
+ " r1[\"meta\"][article_url] = r2[\"meta\"][article_url]\n",
+ "\n",
+ " # if existing article\n",
+ " else:\n",
+ " spans_to_add = [span for span in r2[\"meta\"][article_url][\"spans\"]\n",
+ " if span not in r1[\"meta\"][article_url][\"spans\"]]\n",
+ " r1[\"meta\"][article_url][\"spans\"] += spans_to_add\n",
+ "\n",
+ "\n",
+ " def add_entity(self, e):\n",
+ " self.entities[e[\"title\"]] = {k:v for k,v in e.items() if k != \"title\"}\n",
+ "\n",
+ "\n",
+ " def add_relation(self, r, article_title, article_publish_date):\n",
+ " # check on wikipedia\n",
+ " candidate_entities = [r[\"head\"], r[\"tail\"]]\n",
+ " entities = [self.get_wikipedia_data(ent) for ent in candidate_entities]\n",
+ "\n",
+ " # if one entity does not exist, stop\n",
+ " if any(ent is None for ent in entities):\n",
+ " return\n",
+ "\n",
+ " # manage new entities\n",
+ " for e in entities:\n",
+ " self.add_entity(e)\n",
+ "\n",
+ " # rename relation entities with their wikipedia titles\n",
+ " r[\"head\"] = entities[0][\"title\"]\n",
+ " r[\"tail\"] = entities[1][\"title\"]\n",
+ "\n",
+ " # add source if not in kb\n",
+ " article_url = list(r[\"meta\"].keys())[0]\n",
+ " if article_url not in self.sources:\n",
+ " self.sources[article_url] = {\n",
+ " \"article_title\": article_title,\n",
+ " \"article_publish_date\": article_publish_date\n",
+ " }\n",
+ "\n",
+ " # manage new relation\n",
+ " if not self.exists_relation(r):\n",
+ " self.relations.append(r)\n",
+ " else:\n",
+ " self.merge_relations(r)\n",
+ "\n",
+ "\n",
+ " def get_wikipedia_data(self, candidate_entity):\n",
+ " try:\n",
+ " page = wikipedia.page(candidate_entity, auto_suggest=False)\n",
+ " entity_data = {\n",
+ " \"title\": page.title,\n",
+ " \"url\": page.url,\n",
+ " \"summary\": page.summary\n",
+ " }\n",
+ " return entity_data\n",
+ " except:\n",
+ " return None\n",
+ "\n",
+ " def print(self):\n",
+ " print(\"Entities:\")\n",
+ " for e in self.entities.items():\n",
+ " print(f\" {e}\")\n",
+ " print(\"Relations:\")\n",
+ " for r in self.relations:\n",
+ " print(f\" {r}\")\n",
+ " print(\"Sources:\")\n",
+ " for s in self.sources.items():\n",
+ " print(f\" {s}\")"
+ ],
+ "metadata": {
+ "id": "XT3wsLA2WZMY"
+ },
+ "execution_count": 13,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## From long text to Knowledge Base"
+ ],
+ "metadata": {
+ "id": "h5vGlLksX5vl"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# 1. Initialize an empty knowledge base KB object.\n",
+ "# 2. Tokenize the input text.\n",
+ "# 3. Use REBEL to generate relations from the text.\n",
+ "# 4. Parse REBEL output and store relation triplets into the knowledge base object.\n",
+ "# 5. Return the knowledge base object.\n",
+ "\n",
+ "def from_text_to_kb(text, article_url, span_length=128, article_title=None, article_publish_date=None, verbose=False):\n",
+ " # tokenize whole text\n",
+ " inputs = tokenizer([text], return_tensors=\"pt\")\n",
+ "\n",
+ " # compute span boundaries\n",
+ " num_tokens = len(inputs[\"input_ids\"][0])\n",
+ " if verbose:\n",
+ " print(f\"Input has {num_tokens} tokens\")\n",
+ " num_spans = math.ceil(num_tokens / span_length)\n",
+ " if verbose:\n",
+ " print(f\"Input has {num_spans} spans\")\n",
+ " overlap = math.ceil((num_spans * span_length - num_tokens) /\n",
+ " max(num_spans - 1, 1))\n",
+ " spans_boundaries = []\n",
+ " start = 0\n",
+ " for i in range(num_spans):\n",
+ " spans_boundaries.append([start + span_length * i,\n",
+ " start + span_length * (i + 1)])\n",
+ " start -= overlap\n",
+ " if verbose:\n",
+ " print(f\"Span boundaries are {spans_boundaries}\")\n",
+ "\n",
+ " # transform input with spans\n",
+ " tensor_ids = [inputs[\"input_ids\"][0][boundary[0]:boundary[1]]\n",
+ " for boundary in spans_boundaries]\n",
+ " tensor_masks = [inputs[\"attention_mask\"][0][boundary[0]:boundary[1]]\n",
+ " for boundary in spans_boundaries]\n",
+ " inputs = {\n",
+ " \"input_ids\": torch.stack(tensor_ids),\n",
+ " \"attention_mask\": torch.stack(tensor_masks)\n",
+ " }\n",
+ "\n",
+ " # generate relations\n",
+ " num_return_sequences = 3\n",
+ " gen_kwargs = {\n",
+ " \"max_length\": 256,\n",
+ " \"length_penalty\": 0,\n",
+ " \"num_beams\": 3,\n",
+ " \"num_return_sequences\": num_return_sequences\n",
+ " }\n",
+ " generated_tokens = model.generate(\n",
+ " **inputs,\n",
+ " **gen_kwargs,\n",
+ " )\n",
+ "\n",
+ " # decode relations\n",
+ " decoded_preds = tokenizer.batch_decode(generated_tokens,\n",
+ " skip_special_tokens=False)\n",
+ "\n",
+ " # create kb\n",
+ " kb = KB()\n",
+ " i = 0\n",
+ " for sentence_pred in decoded_preds:\n",
+ " current_span_index = i // num_return_sequences\n",
+ " relations = extract_relations_from_model_output(sentence_pred)\n",
+ " for relation in relations:\n",
+ " relation[\"meta\"] = {\n",
+ " article_url: {\n",
+ " \"spans\": [spans_boundaries[current_span_index]]\n",
+ " }\n",
+ " }\n",
+ " kb.add_relation(relation, article_title, article_publish_date)\n",
+ " i += 1\n",
+ "\n",
+ " return kb"
+ ],
+ "metadata": {
+ "id": "SqqaGqEmX4Pt"
+ },
+ "execution_count": 14,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def get_article(url):\n",
+ " article = Article(url)\n",
+ " article.download()\n",
+ " article.parse()\n",
+ " return article\n",
+ "\n",
+ "def from_url_to_kb(url):\n",
+ " article = get_article(url)\n",
+ " config = {\n",
+ " \"article_title\": article.title,\n",
+ " \"article_publish_date\": article.publish_date\n",
+ " }\n",
+ " kb = from_text_to_kb(article.text, article.url, **config)\n",
+ " return kb"
+ ],
+ "metadata": {
+ "id": "AABCk9WcX4MK"
+ },
+ "execution_count": 16,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "url = \"https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html\"\n",
+ "kb = from_url_to_kb(url)\n",
+ "kb.print()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "AvdpKQk0X4KS",
+ "outputId": "d5e34e75-1256-424a-e0ae-4689bf6bf5d6"
+ },
+ "execution_count": 17,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Entities:\n",
+ " ('MicroStrategy', {'url': 'https://en.wikipedia.org/wiki/MicroStrategy', 'summary': \"MicroStrategy Incorporated is an American company that provides business intelligence (BI), mobile software, and cloud-based services. Founded in 1989 by Michael J. Saylor, Sanju Bansal, and Thomas Spahr, the firm develops software to analyze internal and external data in order to make business decisions and to develop mobile apps. It is a public company headquartered in Tysons Corner, Virginia, in the Washington metropolitan area. Its primary business analytics competitors include SAP AG Business Objects, IBM Cognos, and Oracle Corporation's BI Platform. Saylor is the Executive Chairman and, from 1989 to 2022, was the CEO.\\n\\n\"})\n",
+ " ('Michael J. Saylor', {'url': 'https://en.wikipedia.org/wiki/Michael_J._Saylor', 'summary': \"Michael J. Saylor (born February 4, 1965) is an American entrepreneur and business executive. He is the executive chairman and a co-founder of MicroStrategy, a company that provides business intelligence, mobile software, and cloud-based services. Saylor served as MicroStrategy's chief executive officer from 1989 to 2022. He authored the 2012 book The Mobile Wave: How Mobile Intelligence Will Change Everything. He is also the sole trustee of Saylor Academy, a provider of free online education. As of 2016, Saylor had been granted 31 patents and had 9 additional applications under review.\\n\\n\"})\n",
+ " ('Terra (blockchain)', {'url': 'https://en.wikipedia.org/wiki/Terra_(blockchain)', 'summary': 'Terra is a blockchain protocol and payment platform used for algorithmic stablecoins. The project was created in 2018 by Terraform Labs, a startup co-founded by Do Kwon and Daniel Shin. It is most known for its Terra stablecoin and the associated Luna reserve asset cryptocurrency.\\nIn May 2022, the Terra blockchain was temporarily halted after the collapse of the stablecoin TerraUSD (UST) and Luna, in an event that wiped out almost $45 billion in market capitalisation within a week.\\n\\n'})\n",
+ " ('Stablecoin', {'url': 'https://en.wikipedia.org/wiki/Stablecoin', 'summary': 'A stablecoin is a type of cryptocurrency where the value of the digital asset is supposed to be pegged to a reference asset, which is either fiat money, exchange-traded commodities (such as precious metals or industrial metals), or another cryptocurrency.In theory, 1:1 backing by a reference asset could make a stablecoin value track the value of the peg and not be subject to the radical changes in value common in the market for many digital assets. In practice, however, stablecoin issuers have yet to be proven to maintain adequate reserves to support a stable value.'})\n",
+ " ('Bitcoin', {'url': 'https://en.wikipedia.org/wiki/Bitcoin', 'summary': 'Bitcoin (abbreviation: BTC or XBT; sign: ₿) is a protocol which implements a public, permanent, and decentralized ledger. \\nBitcoin transactions are verified by network nodes through cryptography and recorded in a public distributed ledger called a blockchain. The cryptocurrency was invented in 2008 by an unknown person or group of people using the name Satoshi Nakamoto. The currency began use in 2009, when its implementation was released as open-source software.:\\u200ach. 1\\u200a\\nThe word \"bitcoin\" was defined in a white paper published on October 31, 2008. It is a compound of the words bit and coin.The Library of Congress reports that, as of November 2021, nine countries have fully banned bitcoin use, and a further forty-two have implicitly banned it. A few governments have used bitcoin in some capacity. El Salvador has adopted Bitcoin as legal tender, although use by merchants remains low. Ukraine has accepted cryptocurrency donations to fund the resistance to the 2022 Russian invasion. Iran has used bitcoin to bypass sanctions.\\nIn 2018, Bitcoin has been described as an economic bubble by at least eight recipients of the Nobel Memorial Prize in Economic Sciences.The environmental effects of bitcoin are substantial. Its proof-of-work algorithm for bitcoin mining is designed to be computationally difficult, which requires the consumption of increasing quantities of electricity, the generation of which has contributed to climate change. According to the University of Cambridge, bitcoin has emitted an estimated 200 million tonnes of carbon dioxide since its launch, or about 0.04% of all carbon dioxide released since 2009.'})\n",
+ " ('Cryptocurrency', {'url': 'https://en.wikipedia.org/wiki/Cryptocurrency', 'summary': 'A cryptocurrency, crypto-currency, or crypto is a digital currency designed to work as a medium of exchange through a computer network that is not reliant on any central authority, such as a government or bank, to uphold or maintain it. It is a decentralized system for verifying that the parties to a transaction have the money they claim to have, eliminating the need for traditional intermediaries, such as banks, when funds are being transferred between two entities.Individual coin ownership records are stored in a digital ledger, which is a computerized database using strong cryptography to secure transaction records, control the creation of additional coins, and verify the transfer of coin ownership. Despite their name, cryptocurrencies are not considered to be currencies in the traditional sense, and while varying treatments have been applied to them, including classification as commodities, securities, and currencies, cryptocurrencies are generally viewed as a distinct asset class in practice. Some crypto schemes use validators to maintain the cryptocurrency. In a proof-of-stake model, owners put up their tokens as collateral. In return, they get authority over the token in proportion to the amount they stake. Generally, these token stakers get additional ownership in the token over time via network fees, newly minted tokens, or other such reward mechanisms.Cryptocurrency does not exist in physical form (like paper money) and is typically not issued by a central authority. Cryptocurrencies typically use decentralized control as opposed to a central bank digital currency (CBDC). When a cryptocurrency is minted, or created prior to issuance, or issued by a single issuer, it is generally considered centralized. When implemented with decentralized control, each cryptocurrency works through distributed ledger technology, typically a blockchain, that serves as a public financial transaction database.The first cryptocurrency was Bitcoin, which was first released as open-source software in 2009. As of March 2022, there were more than 9,000 other cryptocurrencies in the marketplace, of which more than 70 had a market capitalization exceeding $1 billion.'})\n",
+ " ('Lightning', {'url': 'https://en.wikipedia.org/wiki/Lightning', 'summary': 'Lightning is a natural phenomenon formed by electrostatic discharges through the atmosphere between two electrically charged regions, either both in the atmosphere or with one in the atmosphere and on the ground, temporarily neutralizing these in a near-instantaneous release of an average of one gigajoule of energy. This discharge may produce a wide range of electromagnetic radiation, from heat created by the rapid movement of electrons, to brilliant flashes of visible light in the form of black-body radiation. Lightning causes thunder, a sound from the shock wave which develops as gases in the vicinity of the discharge experience a sudden increase in pressure. Lightning occurs commonly during thunderstorms as well as other types of energetic weather systems, but volcanic lightning can also occur during volcanic eruptions. Lightning is an atmospheric electrical phenomenon and contributes to the global atmospheric electrical circuit.\\nThe three main kinds of lightning are distinguished by where they occur: either inside a single thundercloud (intra-cloud), between two clouds (cloud-to-cloud), or between a cloud and the ground (cloud-to-ground), in which case it is referred to as a lightning strike. Many other observational variants are recognized, including \"heat lightning\", which can be seen from a great distance but not heard; dry lightning, which can cause forest fires; and ball lightning, which is rarely observed scientifically.\\nHumans have deified lightning for millennia. Idiomatic expressions derived from lightning, such as the English expression \"bolt from the blue\", are common across languages. At all times people have been fascinated by the sight and difference of lightning. The fear of lightning is called astraphobia.\\nThe first known photograph of lightning is from 1847, by Thomas Martin Easterly. The first surviving photograph is from 1882, by William Nicholson Jennings, a photographer who spent half his life capturing pictures of lightning and proving its diversity.\\nThere is growing evidence that lightning activity is increased by particulate emissions (a form of air pollution). However, lightning may also improve air quality and clean greenhouse gases such as methane from the atmosphere, while creating nitrogen oxide and ozone at the same time. Lightning is also the major cause of wildfire, and wildfire can contribute to climate change as well. More studies are warranted to clarify their relationshhip.'})\n",
+ "Relations:\n",
+ " {'head': 'MicroStrategy', 'type': 'founded by', 'tail': 'Michael J. Saylor', 'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html': {'spans': [[0, 128]]}}}\n",
+ " {'head': 'Michael J. Saylor', 'type': 'employer', 'tail': 'MicroStrategy', 'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html': {'spans': [[0, 128]]}}}\n",
+ " {'head': 'MicroStrategy', 'type': 'owned by', 'tail': 'Michael J. Saylor', 'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html': {'spans': [[0, 128]]}}}\n",
+ " {'head': 'Terra (blockchain)', 'type': 'instance of', 'tail': 'Stablecoin', 'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html': {'spans': [[112, 240]]}}}\n",
+ " {'head': 'Bitcoin', 'type': 'instance of', 'tail': 'Cryptocurrency', 'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html': {'spans': [[224, 352], [448, 576]]}}}\n",
+ " {'head': 'Bitcoin', 'type': 'uses', 'tail': 'Lightning', 'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html': {'spans': [[336, 464]]}}}\n",
+ " {'head': 'Lightning', 'type': 'used by', 'tail': 'Bitcoin', 'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html': {'spans': [[336, 464]]}}}\n",
+ " {'head': 'Lightning', 'type': 'instance of', 'tail': 'Cryptocurrency', 'meta': {'https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html': {'spans': [[448, 576]]}}}\n",
+ "Sources:\n",
+ " ('https://finance.yahoo.com/news/microstrategy-bitcoin-millions-142143795.html', {'article_title': \"Microstrategy chief: 'Bitcoin is going to go into the millions'\", 'article_publish_date': None})\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def get_news_links(query, lang=\"en\", region=\"US\", pages=1, max_links=100000):\n",
+ " googlenews = GoogleNews(lang=lang, region=region)\n",
+ " googlenews.search(query)\n",
+ " all_urls = []\n",
+ " for page in range(pages):\n",
+ " googlenews.get_page(page)\n",
+ " all_urls += googlenews.get_links()\n",
+ " return list(set(all_urls))[:max_links]\n",
+ "\n",
+ "def from_urls_to_kb(urls, verbose=False):\n",
+ " kb = KB()\n",
+ " if verbose:\n",
+ " print(f\"{len(urls)} links to visit\")\n",
+ " for url in urls:\n",
+ " if verbose:\n",
+ " print(f\"Visiting {url}...\")\n",
+ " try:\n",
+ " kb_url = from_url_to_kb(url)\n",
+ " kb.merge_with_kb(kb_url)\n",
+ " except ArticleException:\n",
+ " if verbose:\n",
+ " print(f\" Couldn't download article at url {url}\")\n",
+ " return kb"
+ ],
+ "metadata": {
+ "id": "iP5Rp9yINpDW"
+ },
+ "execution_count": 18,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "news_links = get_news_links(\"Google\", pages=1, max_links=3)\n",
+ "kb = from_urls_to_kb(news_links, verbose=True)\n",
+ "kb.print()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "FVOcXub-Zlgy",
+ "outputId": "56649e12-9d6d-478c-89ce-f6ceec92beab"
+ },
+ "execution_count": 19,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "3 links to visit\n",
+ "Visiting https://www.theguardian.com/technology/2023/aug/03/google-to-launch-privacy-tools-which-remove-unwanted-personal-images...\n",
+ "Visiting https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Token indices sequence length is longer than the specified maximum sequence length for this model (2047 > 1024). Running this sequence through the model will result in indexing errors\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Visiting https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold...\n",
+ "Entities:\n",
+ " ('Google', {'url': 'https://en.wikipedia.org/wiki/Google', 'summary': 'Google LLC ( (listen)) is an American multinational technology company focusing on artificial intelligence, online advertising, search engine technology, cloud computing, computer software, quantum computing, e-commerce, and consumer electronics. It has been referred to as \"the most powerful company in the world\" and as one of the world\\'s most valuable brands due to its market dominance, data collection, and technological advantages in the field of artificial intelligence. Google\\'s parent company Alphabet Inc. is one of the five Big Tech companies, alongside Amazon, Apple Inc., Meta Platforms, and Microsoft.\\nGoogle was founded on September 4, 1998, by computer scientists Larry Page and Sergey Brin while they were PhD students at Stanford University in California. Together they own about 14% of its publicly listed shares and control 56% of its stockholder voting power through super-voting stock. The company went public via an initial public offering (IPO) in 2004. In 2015, Google was reorganized as a wholly owned subsidiary of Alphabet Inc. Google is Alphabet\\'s largest subsidiary and is a holding company for Alphabet\\'s internet properties and interests. Sundar Pichai was appointed CEO of Google on October 24, 2015, replacing Larry Page, who became the CEO of Alphabet. On December 3, 2019, Pichai also became the CEO of Alphabet.The company has since rapidly grown to offer a multitude of products and services beyond Google Search, many of which hold dominant market positions. These products address a wide range of use cases, including email (Gmail), navigation (Waze & Maps), cloud computing (Cloud), web browsing (Chrome), video sharing (YouTube), productivity (Workspace), operating systems (Android), cloud storage (Drive), language translation (Translate), photo storage (Photos), video calling (Meet), smart home (Nest), smartphones (Pixel), wearable technology (Pixel Watch & Fitbit), music streaming (YouTube Music), video on demand (YouTube TV), artificial intelligence (Google Assistant), machine learning APIs (TensorFlow), AI chips (TPU), and more. Discontinued Google products include gaming (Stadia), Glass, Google+, Reader, Play Music, Nexus, Hangouts, and Inbox by Gmail.Google\\'s other ventures outside of Internet services and consumer electronics include quantum computing (Sycamore), self-driving cars (Waymo, formerly the Google Self-Driving Car Project), smart cities (Sidewalk Labs), and transformer models (Google Brain).Google and YouTube are the two most visited websites worldwide followed by Facebook and Twitter. Google is also the largest search engine, mapping and navigation application, email provider, office suite, video sharing platform, photo and cloud storage provider, mobile operating system, web browser, ML framework, and AI virtual assistant provider in the world as measured by market share. On the list of most valuable brands, Google is ranked second by Forbes and fourth by Interbrand. It has received significant criticism involving issues such as privacy concerns, tax avoidance, censorship, search neutrality, antitrust and abuse of its monopoly position.'})\n",
+ " ('Google Search', {'url': 'https://en.wikipedia.org/wiki/Google_Search', 'summary': 'Google Search (also known simply as Google or Google.com) is a search engine provided and operated by Google. Handling more than 3.5 billion searches per day, it has a 92% share of the global search engine market. It is the most-visited website in the world. Additionally, it is the most searched and used search engine in the entire world.\\nThe order of search results returned by Google is based, in part, on a priority rank system called \"PageRank\". Google Search also provides many different options for customized searches, using symbols to include, exclude, specify or require certain search behavior, and offers specialized interactive experiences, such as flight status and package tracking, weather forecasts, currency, unit, and time conversions, word definitions, and more.\\nThe main purpose of Google Search is to search for text in publicly accessible documents offered by web servers, as opposed to other data, such as images or data contained in databases. It was originally developed in 1996 by Larry Page, Sergey Brin, and Scott Hassan. In 2011, Google introduced \"Google Voice Search\" to search for spoken, rather than typed, words. In 2012, Google introduced a Knowledge Graph semantic search feature.\\nAnalysis of the frequency of search terms may indicate economic, social and health trends. Data about the frequency of use of search terms on Google can be openly inquired via Google Trends and have been shown to correlate with flu outbreaks and unemployment levels, and provide the information faster than traditional reporting methods and surveys. As of mid-2016, Google\\'s search engine has begun to rely on deep neural networks.'})\n",
+ " ('SafeSearch', {'url': 'https://en.wikipedia.org/wiki/SafeSearch', 'summary': \"SafeSearch is a feature in Google Search and Google Images that acts as an automated filter of pornography and potentially offensive and inappropriate content.On November 11, 2009, Google introduced the ability for users with Google Accounts to lock on the SafeSearch level in Google's web and image searches. Once configured, a password is required to change the setting.On December 12, 2012, Google removed the option to turn off the filter entirely, requiring users to enter more specific search queries to access adult content.SafeSearch is also often used on school computers, to prevent schoolchildren from accessing pornographic content.Government and internet companies can enforce SafeSearch.\\n\\n\"})\n",
+ " ('United States', {'url': 'https://en.wikipedia.org/wiki/United_States', 'summary': \"The United States of America (U.S.A. or USA), commonly known as the United States (U.S. or US) or informally as America, is a country primarily located in North America consisting of 50 states, a federal district, five major unincorporated territories, nine Minor Outlying Islands, and 326 Indian reservations. It is the world's third-largest country by both land and total area. It shares land borders with Canada to its north and with Mexico to its south and has maritime borders with the Bahamas, Cuba, Russia, and other nations. With a population of over 333 million, it is the most populous country in the Americas and the third-most populous in the world. The national capital of the United States is Washington, D.C., and its most populous city and principal financial center is New York City.\\nIndigenous peoples have inhabited the Americas for thousands of years. Beginning in 1607, British colonization led to the establishment of the Thirteen Colonies in what is now the Eastern United States. They quarreled with the British Crown over taxation and political representation, leading to the American Revolution and ensuing Revolutionary War. The United States declared independence on July 4, 1776, becoming the first nation-state founded on Enlightenment principles of unalienable natural rights, consent of the governed, and liberal democracy. The country began expanding across North America, spanning the continent by 1848. Sectional division over slavery led to the secession of the Confederate States of America, which fought the remaining states of the Union during the American Civil War (1861–1865). With the Union's victory and preservation, slavery was abolished nationally.\\nBy 1900, the United States had established itself as a great power, becoming the world's largest economy. After Japan's attack on Pearl Harbor in 1941, the U.S. entered World War II on the Allied side. The aftermath of the war left the United States and the Soviet Union as the world's two superpowers and led to the Cold War. During the Cold War, both countries engaged in a struggle for ideological dominance but avoided direct military conflict. They also competed in the Space Race, which culminated in the 1969 landing of Apollo 11, making the U.S. the first and only nation to land humans on the Moon. With the Soviet Union's collapse and the subsequent end of the Cold War in 1991, the United States emerged as the world's sole superpower.\\nThe United States government is a federal republic and a representative democracy with three separate branches of government. It has a bicameral national legislature composed of the House of Representatives, a lower house; and the Senate, an upper house based on equal representation for each state. Many policy issues are decentralized at a state or local level, with widely differing laws by jurisdiction. The U.S. ranks highly in international measures of quality of life, income and wealth, economic competitiveness, human rights, innovation, and education; it has low levels of perceived corruption. It has higher levels of incarceration and inequality than most other developed nations, and is the only developed nation without universal healthcare. As a melting pot of cultures and ethnicities, the U.S. has been shaped by the world's largest immigrant population.\\nThe United States is a highly developed country that has the highest median income of any polity in the world. Its economy accounts for approximately a quarter of global GDP and is the world's largest by GDP at market exchange rates. It is the world's largest importer and second-largest exporter, and possesses the largest amount of wealth of any country. The United States is a founding member of the United Nations, World Bank, International Monetary Fund, Organization of American States, NATO, World Health Organization, and is a permanent member of the United Nations Security Council. It is the world's foremost political, cultural, economic, military, and scientific force.\"})\n",
+ " ('Artificial intelligence arms race', {'url': 'https://en.wikipedia.org/wiki/Artificial_intelligence_arms_race', 'summary': 'A military artificial intelligence arms race is an arms race between two or more states to develop and deploy lethal autonomous weapons systems (LAWS). Since the mid-2010s, many analysts have noted the emergence of such an arms race between global superpowers for better military AI, driven by increasing geopolitical and military tensions. An AI arms race is sometimes placed in the context of an AI Cold War between the US and China.'})\n",
+ " ('Artificial intelligence', {'url': 'https://en.wikipedia.org/wiki/Artificial_intelligence', 'summary': \"Artificial intelligence (AI) is the intelligence of machines or software, as opposed to the intelligence of human beings or animals. AI applications include advanced web search engines (e.g., Google Search), recommendation systems (used by YouTube, Amazon, and Netflix), understanding human speech (such as Siri and Alexa), self-driving cars (e.g., Waymo), generative or creative tools (ChatGPT and AI art), and competing at the highest level in strategic games (such as chess and Go).Artificial intelligence was founded as an academic discipline in 1956. The field went through multiple cycles of optimism followed by disappointment and loss of funding, but after 2012, when deep learning surpassed all previous AI techniques, there was a vast increase in funding and interest.\\nThe various sub-fields of AI research are centered around particular goals and the use of particular tools. The traditional goals of AI research include reasoning, knowledge representation, planning, learning, natural language processing, perception, and support for robotics. General intelligence (the ability to solve an arbitrary problem) is among the field's long-term goals.\\nTo solve these problems, AI researchers have adapted and integrated a wide range of problem-solving techniques, including search and mathematical optimization, formal logic, artificial neural networks, and methods based on statistics, probability, and economics. AI also draws upon psychology, linguistics, philosophy, neuroscience and many other fields.\"})\n",
+ " ('Arms race', {'url': 'https://en.wikipedia.org/wiki/Arms_race', 'summary': 'An arms race occurs when two or more groups compete in military superiority. It consists of a competition between two or more states to have superior armed forces, concerning production of weapons, the growth of a military, and the aim of superior military technology. Unlike a sporting race, which constitutes a specific event with winning interpretable as the outcome of a singular project, arms races constitute spiralling systems of on-going and potentially open-ended behavior.The existing scholarly literature is divided as to whether arms races correlate with war. International-relations scholars explain arms races in terms of the security dilemma, engineering spiral models, states with revisionist aims, and deterrence models.'})\n",
+ " ('Michelle Donelan', {'url': 'https://en.wikipedia.org/wiki/Michelle_Donelan', 'summary': 'Michelle Emma May Elizabeth Donelan (born 8 April 1984) is a British politician serving as Secretary of State for Science, Innovation and Technology since July 2023, having served in the position from February to April 2023 also. Donelan previously served in the Johnson government as Minister of State for Higher and Further Education from 2020 to 2022 and as Secretary of State for Education for two days during the July 2022 government crisis. She also served under Liz Truss and Rishi Sunak as Secretary of State for Digital, Culture, Media and Sport from September 2022 to February 2023. She is a member of the Conservative Party and has been Member of Parliament (MP) for Chippenham in Wiltshire since 2015.'})\n",
+ " ('The Guardian', {'url': 'https://en.wikipedia.org/wiki/The_Guardian', 'summary': 'The Guardian is a British daily newspaper. It was founded in 1821 as The Manchester Guardian, and changed its name in 1959. Along with its sister papers, The Observer and The Guardian Weekly, The Guardian is part of the Guardian Media Group, owned by the Scott Trust Limited. The trust was created in 1936 to \"secure the financial and editorial independence of The Guardian in perpetuity and to safeguard the journalistic freedom and liberal values of The Guardian free from commercial or political interference\". The trust was converted into a limited company in 2008, with a constitution written so as to maintain for The Guardian the same protections as were built into the structure of the Scott Trust by its creators. Profits are reinvested in its journalism rather than distributed to owners or shareholders. It is considered a newspaper of record in the UK.The editor-in-chief Katharine Viner succeeded Alan Rusbridger in 2015. Since 2018, the paper\\'s main newsprint sections have been published in tabloid format. As of July 2021, its print edition had a daily circulation of 105,134. The newspaper has an online edition, TheGuardian.com, as well as two international websites, Guardian Australia (founded in 2013) and Guardian US (founded in 2011). The paper\\'s readership is generally on the mainstream left of British political opinion, and the term \"Guardian reader\" is used to imply a stereotype of liberal, left-wing or \"politically correct\" views. Frequent typographical errors during the age of manual typesetting led Private Eye magazine to dub the paper the \"Grauniad\" in the 1970s, a nickname still occasionally used by the editors for self-mockery.In an Ipsos MORI research poll in September 2018 designed to interrogate the public\\'s trust of specific titles online, The Guardian scored highest for digital-content news, with 84% of readers agreeing that they \"trust what [they] see in it\". A December 2018 report of a poll by the Publishers Audience Measurement Company stated that the paper\\'s print edition was found to be the most trusted in the UK in the period from October 2017 to September 2018. It was also reported to be the most-read of the UK\\'s \"quality newsbrands\", including digital editions; other \"quality\" brands included The Times, The Daily Telegraph, The Independent, and the i. While The Guardian\\'s print circulation is in decline, the report indicated that news from The Guardian, including that reported online, reaches more than 23 million UK adults each month.Chief among the notable \"scoops\" obtained by the paper was the 2011 News International phone-hacking scandal—and in particular the hacking of the murdered English teenager Milly Dowler\\'s phone. The investigation led to the closure of the News of the World, the UK\\'s best-selling Sunday newspaper and one of the highest-circulation newspapers in history. In June 2013, The Guardian broke news of the secret collection by the Obama administration of Verizon telephone records, and subsequently revealed the existence of the surveillance program PRISM after knowledge of it was leaked to the paper by the whistleblower and former National Security Agency contractor Edward Snowden. In 2016, The Guardian led an investigation into the Panama Papers, exposing then–Prime Minister David Cameron\\'s links to offshore bank accounts. It has been named \"newspaper of the year\" four times at the annual British Press Awards: most recently in 2014, for its reporting on government surveillance.'})\n",
+ " ('Google DeepMind', {'url': 'https://en.wikipedia.org/wiki/Google_DeepMind', 'summary': \"DeepMind Technologies Limited, doing business as Google DeepMind, is a British-American artificial intelligence research laboratory which serves as a subsidiary of Google. Founded in the UK in 2010, it was acquired by Google in 2014, becoming a wholly owned subsidiary of Google parent company Alphabet Inc. after Google's corporate restructuring in 2015. The company is based in London, with research centres in Canada, France, and the United States.\\nGoogle DeepMind has created a neural network that learns how to play video games in a fashion similar to that of humans, as well as a Neural Turing machine, or a neural network that may be able to access an external memory like a conventional Turing machine, resulting in a computer that mimics the short-term memory of the human brain.DeepMind made headlines in 2016 after its AlphaGo program beat a human professional Go player Lee Sedol, a world champion, in a five-game match, which was the subject of a documentary film. A more general program, AlphaZero, beat the most powerful programs playing go, chess and shogi (Japanese chess) after a few days of play against itself using reinforcement learning. In 2020, DeepMind made significant advances in the problem of protein folding with AlphaFold. In July 2022, it was announced that over 200 million predicted protein structures, representing virtually all known proteins, would be released on the AlphaFold database.DeepMind posted a blog post on 28 April 2022 on a single visual language model (VLM) named Flamingo that can accurately describe a picture of something with just a few training images. In July 2022, DeepMind announced the development of DeepNash, a model-free multi-agent reinforcement learning system capable of playing the board game Stratego at the level of a human expert. The company merged with Google AI's Google Brain division to become Google DeepMind in April 2023.\"})\n",
+ " ('London', {'url': 'https://en.wikipedia.org/wiki/London', 'summary': 'London ( (listen)) is the capital and largest city of England and the United Kingdom, with a population of just under 9 million. It stands on the River Thames in south-east England at the head of a 50-mile (80 km) estuary down to the North Sea and has been a major settlement for two millennia. The City of London, its ancient core and financial centre, was founded by the Romans as Londinium and retains its mediaeval boundaries. The City of Westminster, to the west of the City of London, has for centuries hosted the national government and parliament. Since the 19th century, the name \"London\" also refers to the metropolis around this core, historically split among the counties of Middlesex, Essex, Surrey, Kent, and Hertfordshire, which since 1965 has largely comprised Greater London, which is governed by 33 local authorities and the Greater London Authority.As one of the world\\'s major global cities, London exerts a strong influence on its arts, entertainment, fashion, commerce and finance, education, health care, media, science and technology, tourism, transport, and communications. Its GDP (€801.66 billion in 2017) makes it the largest urban economy in Europe, and it is one of the major financial centres in the world. With Europe\\'s largest concentration of higher education institutions, it is home to some of the highest-ranked academic institutions in the world—Imperial College London in natural and applied sciences, the London School of Economics in social sciences, and the comprehensive University College London. London is the most visited city in Europe and has the busiest city airport system in the world. The London Underground is the oldest rapid transit system in the world.London\\'s diverse cultures encompass over 300 languages. The mid-2018 population of Greater London of about 9 million made it Europe\\'s third-most populous city, accounting for 13.4% of the population of the United Kingdom and over 16% of the population of England. The Greater London Built-up Area is the fourth-most populous in Europe, with about 9.8 million inhabitants at the 2011 census. The London metropolitan area is the third-most populous in Europe, with about 14 million inhabitants in 2016, granting London the status of a megacity.\\nLondon has four World Heritage Sites: the Tower of London; Kew Gardens; the combined Palace of Westminster, Westminster Abbey, and St Margaret\\'s Church; and also the historic settlement in Greenwich, where the Royal Observatory, Greenwich, defines the prime meridian (0° longitude) and Greenwich Mean Time. Other landmarks include Buckingham Palace, the London Eye, Piccadilly Circus, St Paul\\'s Cathedral, Tower Bridge, and Trafalgar Square. London has many museums, galleries, libraries, and cultural venues, including the British Museum, National Gallery, Natural History Museum, Tate Modern, British Library, and numerous West End theatres. Important sporting events held in London include the FA Cup Final, the Wimbledon Tennis Championships, and the London Marathon. In 2012, London became the first city to host three Summer Olympic Games.'})\n",
+ " ('United Kingdom', {'url': 'https://en.wikipedia.org/wiki/United_Kingdom', 'summary': 'The United Kingdom of Great Britain and Northern Ireland, simply known as the United Kingdom (UK) or Britain, is a country in Northwestern Europe, off the north-western coast of the continental mainland. It comprises England, Scotland, Wales, and Northern Ireland. It includes the island of Great Britain, the north-eastern part of the island of Ireland, and most of the smaller islands within the British Isles. Northern Ireland shares a land border with the Republic of Ireland; otherwise, the United Kingdom is surrounded by the Atlantic Ocean, the North Sea, the English Channel, the Celtic Sea and the Irish Sea. The total area of the United Kingdom is 242,495 square kilometres (93,628 sq mi), with an estimated 2023 population of over 68 million people.\\nThe United Kingdom has evolved from a series of annexations, unions and separations of constituent countries over several hundred years. The Treaty of Union between the Kingdom of England (which also included Wales) and the Kingdom of Scotland in 1707 resulted in their unification to become the Kingdom of Great Britain. Its union in 1801 with the Kingdom of Ireland created the United Kingdom of Great Britain and Ireland. Most of Ireland seceded from the UK in 1922, leaving the present United Kingdom of Great Britain and Northern Ireland, which formally adopted its name in 1927. The nearby Isle of Man, Guernsey and Jersey are not part of the UK, being Crown Dependencies, but the British government is responsible for their defence and international representation. The UK became the first industrialised country and was the world\\'s foremost power for the majority of the 19th and early 20th centuries, particularly during the \"Pax Britannica\" between 1815 and 1914. The British Empire, at its height in the 1920s, encompassed almost a quarter of the world\\'s landmass and population, and was the largest empire in history; however, its involvement in World War I and World War II, the cumulative crisis and the loss of prestige led to the decolonization of most of the British colonies and the eventual end of the Empire. A part of the core Anglophonic world, British influence can be observed in the language, culture, legal and political systems of many of its former colonies. \\nThe United Kingdom is a constitutional monarchy and parliamentary democracy. The capital and largest city of the United Kingdom (as well as the capital of England) is London, a megacity which (alongside New York City) is one of the world\\'s two main financial centres. The cities of Edinburgh, Cardiff, and Belfast are respectively the national capitals of Scotland, Wales, and Northern Ireland. Other major cities include Birmingham, Manchester, Glasgow, and Leeds. The UK consists of three distinct legal jurisdictions: England and Wales, Scotland, and Northern Ireland. This is due to these areas retaining their existing legal systems even after joining the UK. Since 1998, Scotland, Wales, and Northern Ireland also have their own devolved governments and legislatures, each with varying powers.The UK has the world\\'s sixth-largest economy by nominal gross domestic product (GDP), and the tenth-largest by purchasing power parity. It is a recognised nuclear state and is ranked fourth globally in military expenditure. The UK has been a permanent member of the UN Security Council since its first session in 1946. It is a member of the Commonwealth of Nations, the Council of Europe, the G7, the OECD, NATO, the Five Eyes, AUKUS and the CPTPP.'})\n",
+ " ('2011', {'url': 'https://en.wikipedia.org/wiki/2011', 'summary': '2011 (MMXI) was a common year starting on Saturday of the Gregorian calendar, the 2011th year of the Common Era (CE) and Anno Domini (AD) designations, the 11th year of the 3rd millennium and the 21st century, and the 2nd year of the 2010s decade. \\nThe most notable event of the year was the Arab Spring that involved the killing of Muammar Gaddafi in Libya as part of a series of protests and government overthrows that swept through the Middle East.\\n2011 was designated as: \\n\\nInternational Year of Forests\\nInternational Year of Chemistry\\nInternational Year for People of African DescentIn 2011, the nation of Samoa only had 364 days as it moved across the International Date Line skipping December 30, 2011; it is now 24 hours ahead of American Samoa.\\n\\n'})\n",
+ " ('Demis Hassabis', {'url': 'https://en.wikipedia.org/wiki/Demis_Hassabis', 'summary': 'Demis Hassabis (born 27 July 1976) is a British artificial intelligence researcher and entrepreneur. In his early career he was a video game AI programmer and designer, and an expert board games player. He is the chief executive officer and co-founder of DeepMind and Isomorphic Labs, and a UK Government AI Advisor.'})\n",
+ " ('Chess', {'url': 'https://en.wikipedia.org/wiki/Chess', 'summary': \"Chess is a board game for two players, called White and Black, each controlling an army of chess pieces in their color, with the objective to checkmate the opponent's king. It is sometimes called international chess or Western chess to distinguish it from related games, such as xiangqi (Chinese chess) and shogi (Japanese chess). The recorded history of chess goes back at least to the emergence of a similar game, chaturanga, in seventh century India. The rules of chess as they are known today emerged in Europe at the end of the 15th century, with standardization and universal acceptance by the end of the 19th century. Today, chess is one of the world's most popular games, played by millions of people worldwide.\\nChess is an abstract strategy game that involves no hidden information and no elements of chance. It is played on a chessboard with 64 squares arranged in an 8×8 grid. At the start, each player controls sixteen pieces: one king, one queen, two rooks, two bishops, two knights, and eight pawns. White moves first, followed by Black. The game is won by checkmating the opponent's king, i.e. threatening it with inescapable capture. There are also several ways a game can end in a draw.\\nOrganized chess arose in the 19th century. Chess competition today is governed internationally by FIDE (the International Chess Federation). The first universally recognized World Chess Champion, Wilhelm Steinitz, claimed his title in 1886; Ding Liren is the current World Champion. A huge body of chess theory has developed since the game's inception. Aspects of art are found in chess composition, and chess in its turn influenced Western culture and the arts, and has connections with other fields such as mathematics, computer science, and psychology.\\nOne of the goals of early computer scientists was to create a chess-playing machine. In 1997, Deep Blue became the first computer to beat the reigning World Champion in a match when it defeated Garry Kasparov. Today's chess engines are significantly stronger than the best human players and have deeply influenced the development of chess theory; however, chess is not a solved game.\"})\n",
+ " ('ChatGPT', {'url': 'https://en.wikipedia.org/wiki/ChatGPT', 'summary': 'ChatGPT (Chat Generative Pre-Trained Transformer) is a large language model-based chatbot developed by OpenAI and launched on November 30, 2022, notable for enabling users to refine and steer a conversation towards a desired length, format, style, level of detail, and language used. Successive prompts and replies, known as prompt engineering, are taken into account at each stage of the conversation as a context.ChatGPT is built upon GPT-3.5 and GPT-4, from OpenAI\\'s proprietary series of foundational GPT models, fine-tuned for conversational applications using a combination of supervised and reinforcement learning techniques. ChatGPT was released as a freely available research preview, but due to its popularity, OpenAI now operates the service on a freemium model. It allows users on its free tier to access the GPT-3.5 based version, while the more advanced GPT-4 based version, as well as priority access to newer features, are provided to paid subscribers under the commercial name \"ChatGPT Plus\".\\nBy January 2023, it had become what was then the fastest-growing consumer software application in history, gaining over 100 million users and contributing to OpenAI\\'s valuation growing to US$29 billion. Within months, Google, Baidu, and Meta accelerated the development of their competing products: Bard, Ernie Bot, and LLaMA. Some observers expressed concern over the potential of ChatGPT to displace or atrophy human intelligence, and its potential to enable plagiarism or fuel misinformation.\\n\\n'})\n",
+ " ('OpenAI', {'url': 'https://en.wikipedia.org/wiki/OpenAI', 'summary': 'OpenAI is an American artificial intelligence (AI) research laboratory consisting of the non-profit OpenAI, Inc. and its for-profit subsidiary corporation OpenAI, L.P.. OpenAI conducts research on artificial intelligence with the declared intention of developing \"safe and beneficial\" artificial general intelligence, which it defines as \"highly autonomous systems that outperform humans at most economically valuable work\".OpenAI was founded in 2015 by Ilya Sutskever, Greg Brockman, Trevor Blackwell, Vicki Cheung, Andrej Karpathy, Durk Kingma, Jessica Livingston, John Schulman, Pamela Vagata, and Wojciech Zaremba, with Sam Altman and Elon Musk serving as the initial board members. Microsoft provided OpenAI LP with a $1 billion investment in 2019 and a $10 billion investment in 2023.'})\n",
+ " ('Bard', {'url': 'https://en.wikipedia.org/wiki/Bard', 'summary': 'In Celtic cultures, a bard is a professional story teller, verse-maker, music composer, oral historian and genealogist, employed by a patron (such as a monarch or chieftain) to commemorate one or more of the patron\\'s ancestors and to praise the patron\\'s own activities.\\nWith the decline of a living bardic tradition in the modern period, the term has loosened to mean a generic minstrel or author (especially a famous one). For example, William Shakespeare and Rabindranath Tagore are respectively known as \"the Bard of Avon\" (often simply \"the Bard\") and \"the Bard of Bengal\". In 16th-century Scotland, it turned into a derogatory term for an itinerant musician; nonetheless it was later romanticised by Sir Walter Scott (1771–1832).'})\n",
+ " ('Chatbot', {'url': 'https://en.wikipedia.org/wiki/Chatbot', 'summary': \"A chatbot (originally chatterbot) is a software application that aims to mimic human conversation through text or voice interactions, typically online. Modern chatbots are artificial intelligence (AI) systems that are capable of maintaining a conversation with a user in natural language and simulating the way a human would behave as a conversational partner. Such technologies often utilize aspects of deep learning and natural language processing.\\nRecently this field has gained widespread attention due to the popularity of OpenAI's ChatGPT, followed by alternatives such as Microsoft's Bing Chat (which uses OpenAI's GPT-4) and Google's Bard. Such examples reflect the recent practice of such products being built based upon broad foundational large language models that get fine-tuned so as to target specific tasks or applications (i.e. simulating human conversation, in the case of chatbots). Chatbots can also be designed or customized to further target even more specific situations and/or particular subject-matter domains.A major area where chatbots have long been used is in customer service and support, such as with various sorts of virtual assistants. Recently, companies spanning various industries have begun using the latest generative artificial intelligence technologies to power more advanced developments in such areas.\"})\n",
+ " ('Microsoft Bing', {'url': 'https://en.wikipedia.org/wiki/Microsoft_Bing', 'summary': 'Microsoft Bing (commonly known as Bing) is a web search engine owned and operated by Microsoft. The service has its origins in Microsoft\\'s previous search engines: MSN Search, Windows Live Search and later Live Search. Bing provides a variety of search services, including web, video, image and map search products. It is developed using ASP.NET.\\nBing, Microsoft\\'s replacement for Live Search, was unveiled by Microsoft CEO Steve Ballmer on May 28, 2009, at the All Things Digital conference in San Diego, California, for release on June 3, 2009. Notable new features at the time included the listing of search suggestions while queries are entered and a list of related searches (called \"Explore pane\") based on semantic technology from Powerset, which Microsoft had acquired in 2008.In July 2009, Microsoft and Yahoo! announced a deal in which Bing would power Yahoo! Search. Yahoo! finished the transition in 2012.In October 2011, Microsoft stated that they were working on new back-end search infrastructure with the goal of delivering faster and slightly more relevant search results for users. Known as \"Tiger\", the new index-serving technology had been incorporated into Bing globally since August that year. In May 2012, Microsoft announced another redesign of its search engine that includes \"Sidebar\", a social feature that searches users\\' social networks for information relevant to the search query.The BitFunnel search engine indexing algorithm and various components of the search engine were made open source by Microsoft in 2016.In February 2023, Microsoft introduced Bing Chat, an artificial intelligence chatbot experience based on GPT-4, integrated into the search engine. Bing reached 100 million active users the following month. As of March 2023, (Microsoft) Bing is the second largest search engine globally, with a query volume of 12%, behind Google\\'s 79%; Baidu is at 5% and Yahoo! Search, which Bing largely powers, has 2%.'})\n",
+ " ('Microsoft', {'url': 'https://en.wikipedia.org/wiki/Microsoft', 'summary': 'Microsoft Corporation is an American multinational technology corporation headquartered in Redmond, Washington. Microsoft\\'s best-known software products are the Windows line of operating systems, the Microsoft 365 suite of productivity applications, and the Internet Explorer and Edge web browsers. Its flagship hardware products are the Xbox video game consoles and the Microsoft Surface lineup of touchscreen personal computers. Microsoft ranked No. 14 in the 2022 Fortune 500 rankings of the largest United States corporations by total revenue; it was the world\\'s largest software maker by revenue as of 2022. It is considered one of the Big Five American information technology companies, alongside Alphabet (parent company of Google), Amazon, Apple, and Meta Platforms (formerly Facebook, Inc.).\\nMicrosoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters for the Altair 8800. It rose to dominate the personal computer operating system market with MS-DOS in the mid-1980s, followed by Windows. The company\\'s 1986 initial public offering (IPO) and subsequent rise in its share price created three billionaires and an estimated 12,000 millionaires among Microsoft employees. Since the 1990s, it has increasingly diversified from the operating system market and has made a number of corporate acquisitions, the largest being the acquisition of LinkedIn for $26.2 billion in December 2016, followed by their acquisition of Skype Technologies for $8.5 billion in May 2011.As of 2015, Microsoft is market-dominant in the IBM PC compatible operating system market and the office software suite market, although it has lost the majority of the overall operating system market to Android. The company also produces a wide range of other consumer and enterprise software for desktops, laptops, tabs, gadgets, and servers, including Internet search (with Bing), the digital services market (through MSN), mixed reality (HoloLens), cloud computing (Azure), and software development (Visual Studio).\\nSteve Ballmer replaced Gates as CEO in 2000 and later envisioned a \"devices and services\" strategy. This unfolded with Microsoft acquiring Danger Inc. in 2008, entering the personal computer production market for the first time in June 2012 with the launch of the Microsoft Surface line of tablet computers, and later forming Microsoft Mobile through the acquisition of Nokia\\'s devices and services division. Since Satya Nadella took over as CEO in 2014, the company has scaled back on hardware and instead focused on cloud computing, a move that helped the company\\'s shares reach their highest value since December 1999.Earlier dethroned by Apple in 2010, in 2018, Microsoft reclaimed its position as the most valuable publicly traded company in the world. In April 2019, Microsoft reached a trillion-dollar market cap, becoming the third U.S. public company to be valued at over $1 trillion after Apple and Amazon, respectively. As of 2022, Microsoft has the fourth-highest global brand valuation.\\nMicrosoft has been criticized for its monopolistic practices and the company\\'s software has been criticized for problems with ease of use, robustness, and security.'})\n",
+ " ('Keir Starmer', {'url': 'https://en.wikipedia.org/wiki/Keir_Starmer', 'summary': 'Sir Keir Rodney Starmer ( (listen); born 2 September 1962) is a British politician and barrister who has served as Leader of the Opposition and Leader of the Labour Party since 2020. He has been Member of Parliament (MP) for Holborn and St Pancras since 2015. He was previously Director of Public Prosecutions from 2008 to 2013.\\nStarmer was born in London and raised in Surrey, where he attended the selective state Reigate Grammar School, which became a private school while he was a student. He graduated with a Bachelor of Laws degree from the University of Leeds in 1985 and gained a postgraduate Bachelor of Civil Law degree at St Edmund Hall at the University of Oxford in 1986. After being called to the Bar, Starmer practised predominantly in criminal defence work, specialising in human rights matters. Becoming a member of Doughty Street Chambers in 1990, he was appointed as Queen\\'s Counsel (QC) in 2002. In 2008, he became Director of Public Prosecutions (DPP) and Head of the Crown Prosecution Service, holding these positions until 2013. On conclusion of his five-year term as DPP, he was appointed Knight Commander of the Order of the Bath (KCB) in the 2014 New Year Honours.\\nElected to the House of Commons at the 2015 general election, Starmer was appointed Shadow Minister for Immigration by party leader Jeremy Corbyn in September 2015. He resigned in 2016 as part of the wider June 2016 British shadow cabinet resignations in protest at Corbyn\\'s leadership, but accepted a new post under Corbyn later that year as Shadow Secretary of State for Exiting the European Union following the EU membership referendum. Starmer advocated a second referendum on Brexit, in which he stated he would vote to \"remain\"; this policy was ultimately included in the 2019 Labour election platform.\\nAfter Corbyn resigned following Labour\\'s 2019 general election defeat, Starmer won the party\\'s 2020 leadership election. His tenure as leader has seen him move towards the political centre and abandon the left-wing platform of his leadership campaign. His leadership has been characterised by opposition to some of the government response to the COVID-19 pandemic and issues such as Partygate, the September 2022 mini-budget, and the cost of living crisis. Starmer has emphasised the importance of eliminating antisemitism in the Labour Party. The party has seen varied results in local elections and by-elections under Starmer\\'s leadership, but since late 2021 has maintained leads in opinion polling over the governing Conservative Party.'})\n",
+ " ('Labour Leader', {'url': 'https://en.wikipedia.org/wiki/Labour_Leader', 'summary': 'The Labour Leader was a British socialist newspaper published for almost one hundred years. It was later renamed New Leader and Socialist Leader, before finally taking the name Labour Leader again.'})\n",
+ " ('Chartered Management Institute', {'url': 'https://en.wikipedia.org/wiki/Chartered_Management_Institute', 'summary': \"The Chartered Management Institute (CMI) is a professional institution for management based in the United Kingdom. It was founded as the British Institute of Management (BIM) in 1947 or 1948, merged with the Institution of Industrial Managers (IIM) in 1992 to form the Institute of Management (IM), and gained a royal charter, and its present name, in 2002.\\nThe major membership classes are Member, Fellow - for those with significant expertise - and Companion - the most senior grade.\\nIn addition to supporting its members, the organisation encourages management development, carries out research, produces a wide variety of publications on management interests, and publishes the official members' magazine, Professional Manager. The institute also engages with government and other public bodies concerning policy on management and business related issues. Professional Manager magazine is circulated to over 80,000 members of the CMI.\\n\\n\"})\n",
+ " ('Samsung Galaxy Z Fold 5', {'url': 'https://en.wikipedia.org/wiki/Samsung_Galaxy_Z_Fold_5', 'summary': 'The Samsung Galaxy Z Fold 5 (stylized as Samsung Galaxy Z Fold5) is an Android-based foldable smartphone going to be created by Samsung Electronics. It was announced on July 26, 2023.\\n\\n'})\n",
+ " ('Foldable smartphone', {'url': 'https://en.wikipedia.org/wiki/Foldable_smartphone', 'summary': 'A foldable smartphone (also known as a foldable phone or simply foldable) is a smartphone with a folding form factor. It is reminiscent of the clamshell (or \"flip phone\") design of many earlier feature phones. Some variants of the concept use multiple touchscreen panels on a hinge, while other designs utilise a flexible display. Concepts of such devices date back as early as Nokia\\'s \"Morph\" concept in 2008, and a concept presented by Samsung Electronics in 2013 (as part of a larger set of concepts utilizing flexible OLED displays), while the first commercially available folding smartphones with OLED displays began to emerge in November 2018.\\nSome devices may fold out on a vertical axis to into a wider, tablet-like form, but are still usable in a smaller, folded state; the display may either wrap around to the back of the device when folded (as with the Royole FlexPai and Huawei Mate X), or use a booklet-like design where the larger, folded screen is located on the interior, and a screen on its \"cover\" allows the user to interact with the device without opening it (such as the Samsung Galaxy Fold series). Horizontally-folding smartphones have also been produced, typically using a clamshell form factor.\\nThe first generation of commercially released foldable smartphones faced concerns over their durability, as well as their high prices.\\n\\n'})\n",
+ " ('Pixel Fold', {'url': 'https://en.wikipedia.org/wiki/Pixel_Fold', 'summary': \"The Pixel Fold is an Android-powered foldable smartphone designed, developed, and marketed by Google as part of the Google Pixel product line. It was officially announced on May 10, 2023, at the annual Google I/O keynote, and was released in the United States on June 28. Reception was mixed, with many critics praising the phone's cameras and overall design but criticizing the price, durability, weight, and inner display.\"})\n",
+ " ('Samsung', {'url': 'https://en.wikipedia.org/wiki/Samsung', 'summary': \"Samsung Group, or simply Samsung (Korean: 삼성; RR: samseong [samsʌŋ]) (stylized as SΛMSUNG), is a South Korean multinational manufacturing conglomerate headquartered in Samsung Town, Seoul, South Korea. It comprises numerous affiliated businesses, most of them united under the Samsung brand, and is the largest South Korean chaebol (business conglomerate). As of 2020, Samsung has the eighth highest global brand value.Samsung was founded by Lee Byung-chul in 1938 as a trading company. Over the next three decades, the group diversified into areas including food processing, textiles, insurance, securities, and retail. Samsung entered the electronics industry in the late 1960s and the construction and shipbuilding industries in the mid-1970s; these areas would drive its subsequent growth. Following Lee's death in 1987, Samsung was separated into five business groups – Samsung Group, Shinsegae Group, CJ Group and Hansol Group, and JoongAng Group.\\nNotable Samsung industrial affiliates include Samsung Electronics (the world's largest information technology company, consumer electronics maker and chipmaker measured by 2017 revenues), Samsung Heavy Industries (the world's second largest shipbuilder measured by 2010 revenues), and Samsung Engineering and Samsung C&T Corporation (respectively the world's 13th and 36th largest construction companies). Other notable subsidiaries include Samsung Life Insurance (the world's 14th largest life insurance company), Samsung Everland (operator of Everland Resort, the oldest theme park in South Korea) and Cheil Worldwide (the world's 15th largest advertising agency, as measured by 2012 revenues).\"})\n",
+ " ('Samsung Galaxy', {'url': 'https://en.wikipedia.org/wiki/Samsung_Galaxy', 'summary': 'Samsung Galaxy (Korean: 삼성 갤럭시, stylized as SΛMSUNG Galaxy since 2015 (except Japan where it omits the Samsung branding), previously stylized as Samsung GALAXY; abbreviated as SG) is a series of computing and mobile computing devices that are designed, manufactured and marketed by Samsung Electronics. The product line includes the Samsung Galaxy S series of high-end smartphones, the Samsung Galaxy Tab series of tablets, the Samsung Galaxy Note series of tablets and phablets with the added functionality of a stylus, the foldable Samsung Galaxy Z series, and smartwatches including the first version of the Samsung Galaxy Gear, with later versions dropping the Galaxy branding, until the release of the Samsung Galaxy Watch in 2018.\\nSamsung Galaxy devices use the Android operating system produced by Google, with a custom user interface called One UI (with previous versions being known as Samsung Experience and TouchWiz). However, the Galaxy TabPro S is the first Galaxy-branded Windows 10 device that was announced in CES 2016.\\nThe Galaxy Watch is the first Galaxy-branded smartwatch since the release of later iterations of the Gear smartwatch from 2014 to 2017. In 2020, Samsung added the Samsung Galaxy Chromebook 2-in-1 laptop running ChromeOS to the Galaxy branding lineup. The follow-on Samsung Galaxy Chromebook 2 was released in 2021.'})\n",
+ " ('Cream', {'url': 'https://en.wikipedia.org/wiki/Cream', 'summary': 'Cream is a dairy product composed of the higher-fat layer skimmed from the top of milk before homogenization. In un-homogenized milk, the fat, which is less dense, eventually rises to the top. In the industrial production of cream, this process is accelerated by using centrifuges called \"separators\". In many countries, it is sold in several grades depending on the total butterfat content. It can be dried to a powder for shipment to distant markets, and contains high levels of saturated fat.Cream skimmed from milk may be called \"sweet cream\" to distinguish it from cream skimmed from whey, a by-product of cheese-making. Whey cream has a lower fat content and tastes more salty, tangy and \"cheesy\". In many countries partially fermented cream is also sold: sour cream, crème fraîche, and so on. Both forms have many culinary uses in both sweet and savoury dishes.\\nCream produced by cattle (particularly Jersey cattle) grazing on natural pasture often contains some carotenoid pigments derived from the plants they eat; traces of these intensely colored pigments give milk a slightly yellow tone, hence the name of the yellowish-white color: cream. Carotenoids are also the origin of butter\\'s yellow color. Cream from goat\\'s milk, water buffalo milk, or from cows fed indoors on grain or grain-based pellets, is white.'})\n",
+ " ('Color', {'url': 'https://en.wikipedia.org/wiki/Color', 'summary': \"Color (American English) or colour (Commonwealth English) is the visual perception based on the electromagnetic spectrum. Though color is not an inherent property of matter, color perception is related to an object's light absorption, reflection, emission spectra and interference. For most humans, color are perceived in the visible light spectrum with three types of cone cells (trichromacy). Other animals may have a different number of cone cell types or have eyes sensitive to different wavelength, such as bees that can distinguish ultraviolet, and thus have a different color sensitivity range. Animal perception of color originates from different light wavelength or spectral sensitivity in cone cell types, which is then processed by the brain.\\nColors have perceived properties such as hue, colorfulness (saturation) and luminance. Colors can also be additively mixed (commonly used for actual light) or subtractively mixed (commonly used for materials). If the colors are mixed in the right proportions, because of metamerism, they may look the same as a single-wavelength light. For convenience, colors can be organized in a color space, which when being abstracted as a mathematical color model can assign each region of color with a corresponding set of numbers. As such, color spaces are an essential tool for color reproduction in print, photography, computer monitors and television. The most well-known color models are RGB, CMYK, YUV, HSL and HSV.\\nBecause the perception of color is an important aspect of human life, different colors have been associated with emotions, activity, and nationality. Names of color regions in different cultures can have different, sometimes overlapping areas. In visual arts, color theory is used to govern the use of colors in an aesthetically pleasing and harmonious way. The theory of color includes the color complements; color balance; and classification of primary colors (traditionally red, yellow, blue), secondary colors (traditionally orange, green, purple) and tertiary colors. The study of colors in general is called color science.\"})\n",
+ " ('DCI-P3', {'url': 'https://en.wikipedia.org/wiki/DCI-P3', 'summary': 'DCI-P3 is an RGB color space first defined in 2005 as part of the Digital Cinema Initiative, to be used for digital theatrical motion picture distribution (DCDM). Display P3 is a variant developed by Apple Inc. for wide-gamut displays.'})\n",
+ " ('Color space', {'url': 'https://en.wikipedia.org/wiki/Color_space', 'summary': 'A color space is a specific organization of colors. In combination with color profiling supported by various physical devices, it supports reproducible representations of color – whether such representation entails an analog or a digital representation. A color space may be arbitrary, i.e. with physically realized colors assigned to a set of physical color swatches with corresponding assigned color names (including discrete numbers in – for example – the Pantone collection), or structured with mathematical rigor (as with the NCS System, Adobe RGB and sRGB). A \"color space\" is a useful conceptual tool for understanding the color capabilities of a particular device or digital file. When trying to reproduce color on another device, color spaces can show whether shadow/highlight detail and color saturation can be retained, and by how much either will be compromised.\\nA \"color model\" is an abstract mathematical model describing the way colors can be represented as tuples of numbers (e.g. triples in RGB or quadruples in CMYK); however, a color model with no associated mapping function to an absolute color space is a more or less arbitrary color system with no connection to any globally understood system of color interpretation. Adding a specific mapping function between a color model and a reference color space establishes within the reference color space a definite \"footprint\", known as a gamut, and for a given color model, this defines a color space. For example, Adobe RGB and sRGB are two different absolute color spaces, both based on the RGB color model. When defining a color space, the usual reference standard is the CIELAB or CIEXYZ color spaces, which were specifically designed to encompass all colors the average human can see.\\nSince \"color space\" identifies a particular combination of the color model and the mapping function, the word is often used informally to identify a color model. However, even though identifying a color space automatically identifies the associated color model, this usage is incorrect in a strict sense. For example, although several specific color spaces are based on the RGB color model, there is no such thing as the singular RGB color space.'})\n",
+ " ('Stylus', {'url': 'https://en.wikipedia.org/wiki/Stylus', 'summary': 'A stylus (plural styli or styluses) is a writing utensil or a small tool for some other form of marking or shaping, for example, in pottery. It can also be a computer accessory that is used to assist in navigating or providing more precision when using touchscreens. It usually refers to a narrow elongated staff, similar to a modern ballpoint pen. Many styluses are heavily curved to be held more easily. Another widely used writing tool is the stylus used by blind users in conjunction with the slate for punching out the dots in Braille.'})\n",
+ " ('Samsung Galaxy Z Fold 4', {'url': 'https://en.wikipedia.org/wiki/Samsung_Galaxy_Z_Fold_4', 'summary': 'The Samsung Galaxy Z Fold 4 (stylized as Samsung Galaxy Z Fold4) is a foldable smartphone that is part of the Samsung Galaxy Z series. It was announced at the August 2022 edition of Galaxy Unpacked alongside the Galaxy Z Flip 4. It was released on August 25, 2022. It is the successor to the Galaxy Z Fold 3.'})\n",
+ " ('Globe', {'url': 'https://en.wikipedia.org/wiki/Globe', 'summary': 'A globe is a spherical model of Earth, of some other celestial body, or of the celestial sphere. Globes serve purposes similar to maps, but unlike maps, they do not distort the surface that they portray except to scale it down. A model globe of Earth is called a terrestrial globe. A model globe of the celestial sphere is called a celestial globe.\\nA globe shows details of its subject. A terrestrial globe shows landmasses and water bodies. It might show nations and major cities and the network of latitude and longitude lines. Some have raised relief to show mountains and other large landforms. A celestial globe shows notable stars, and may also show positions of other prominent astronomical objects. Typically, it will also divide the celestial sphere into constellations.\\nThe word globe comes from the Latin word globus, meaning \"sphere\". Globes have a long history. The first known mention of a globe is from Strabo, describing the Globe of Crates from about 150 BC. The oldest surviving terrestrial globe is the Erdapfel, made by Martin Behaim in 1492. The oldest surviving celestial globe sits atop the Farnese Atlas, carved in the 2nd century Roman Empire.\\n\\n'})\n",
+ " ('Amaranth', {'url': 'https://en.wikipedia.org/wiki/Amaranth', 'summary': 'Amaranthus is a cosmopolitan genus of annual or short-lived perennial plants collectively known as amaranths. Some amaranth species are cultivated as leaf vegetables, pseudocereals, and ornamental plants. Catkin-like cymes of densely packed flowers grow in summer or fall. Amaranth varies in flower, leaf, and stem color with a range of striking pigments from the spectrum of maroon to crimson and can grow longitudinally from 1 to 2.5 metres (3 to 8 feet) tall with a cylindrical, succulent, fibrous stem that is hollow with grooves and bracteoles when mature.\\nThere are approximately 75 species in the genus, 10 of which are dioecious and native to North America with the remaining 65 monoecious species endemic to every continent (except Antarctica) from tropical lowlands to the Himalayas. Members of this genus share many characteristics and uses with members of the closely related genus Celosia. Amaranth grain is collected from the genus. The leaves of some species are also eaten.'})\n",
+ " ('IPhone 7', {'url': 'https://en.wikipedia.org/wiki/IPhone_7', 'summary': \"The iPhone 7 and iPhone 7 Plus are smartphones that were designed, developed, and marketed by Apple Inc. They are the tenth generation of the iPhone. They were announced on September 7, 2016, at the Bill Graham Civic Auditorium in San Francisco by Apple CEO Tim Cook, and were released on September 16, 2016, succeeding the iPhone 6, iPhone 6 Plus, iPhone 6S and iPhone 6S Plus as the flagship devices in the iPhone series. Apple also released the iPhone 7 and iPhone 7 Plus in numerous countries worldwide throughout September and October 2016. They were succeeded as flagship devices by the iPhone 8 and iPhone 8 Plus on September 12, 2017, and were discontinued with the announcement of the iPhone 11 and iPhone 11 Pro on September 10, 2019.\\n\\nThe iPhone 7's overall design is similar to the iPhone 6 and iPhone 6S. Changes introduced included new color options (Matte Black and Jet Black), water and dust resistance, a new capacitive, static home button, revised antenna bands, and the controversial removal of the 3.5 mm headphone jack. The device's internal hardware received upgrades, including a heterogeneous quad-core system-on-chip with improved system and graphics performance, upgraded 12 megapixel rear-facing cameras with optical image stabilization on all models, and an additional telephoto lens exclusive to the iPhone 7 Plus to provide enhanced (2x) optical zoom capabilities and portrait mode. The front camera is the first in the series with 1080p (Full HD) video resolution. The iPhone 7 & 7 Plus are supported from iOS 10 to iOS 15, and they are the third to support six versions of iOS before support was terminated, after the iPhone 5S.\"})\n",
+ " ('IPhone 6', {'url': 'https://en.wikipedia.org/wiki/IPhone_6', 'summary': 'The iPhone 6 and iPhone 6 Plus are smartphones that were designed and marketed by Apple Inc. They are the eighth generation of the iPhone, succeeding the iPhone 5, iPhone 5C and iPhone 5S, and were announced on September 9, 2014, and released on September 19, 2014. The iPhone 6 and iPhone 6 Plus jointly were themselves replaced as the flagship devices of the iPhone series by the iPhone 6S and iPhone 6S Plus on September 9, 2015. The iPhone 6 and 6 Plus include larger 4.7 and 5.5 inches (120 and 140 mm) displays, a faster processor, upgraded cameras, improved LTE and Wi-Fi connectivity and support for a near field communications-based mobile payments offering.The iPhone 6 and 6 Plus received positive reviews, with critics regarding their redesign, specifications, camera, and battery life as being improvements over previous iPhone models. However, aspects of the design of iPhone 6 were also criticized, including plastic strips on the rear of the device for its antenna that disrupted the otherwise metal exterior, and the screen resolution of the standard-sized iPhone 6 being lower than other devices in its class. The iPhone 6 sold extremely well, making it the best-selling iPhone model and the most successful smartphone to date.\\nThe iPhone 6 and 6 Plus have been the subject of several hardware issues, including most prominently, being susceptible to bending under hard pressure (dubbed \"Bendgate\"), and as a byproduct of this lack of rigidity, the touchscreen\\'s internal hardware being susceptible to losing its connection to the phone\\'s logic board (nicknamed \"Touch Disease\"). Additionally, some iPhone 6 Plus models were the subject of camera issues, including some with malfunctioning optical image stabilization or otherwise defects on rear cameras.\\nThe iPhone 6 and 6 Plus were moved to the midrange spot in Apple\\'s iPhone lineup when the iPhone 6S and 6S Plus were released in September 2015. The iPhone 6 and 6 Plus were discontinued in most markets on September 7, 2016, when Apple announced the iPhone 7 and iPhone 7 Plus. Their spot as the entry-level iPhone was replaced by the first-generation iPhone SE, which was released earlier on March 31, 2016. The iPhone 6 was relaunched with 32 GB of storage in Asian markets in February 2017 as a midrange/budget iPhone. It was later expanded to Europe, before hitting the US markets in May 2017, and Canada in July 2017. The iPhone 6 and 6 Plus supported iOS 8, 9, 10, 11 and 12 before being dropped by iOS 13, and they are the third to support five versions of iOS after the iPhone 4S and the iPhone 5.\\n\\n'})\n",
+ " ('Asphalt 9: Legends', {'url': 'https://en.wikipedia.org/wiki/Asphalt_9:_Legends', 'summary': 'Asphalt 9: Legends is a 2018 racing game developed by Gameloft Barcelona and published by Gameloft. Released on July 25, 2018, it\\'s the fifteenth major game of Asphalt series. In comparison to previous entries, there are several new and improved features, such as a prestigious car lineup, new control schemes, including the autopilot mode called \"TouchDrive\", and race modes, and the reimplemented \"shockwave nitro\" from Asphalt 6: Adrenaline. The graphics are considered significantly improved compared to its 2013 predecessor, Asphalt 8: Airborne.\\n\\n'})\n",
+ " ('Game', {'url': 'https://en.wikipedia.org/wiki/Game', 'summary': 'A game is a structured form of play, usually undertaken for entertainment or fun, and sometimes used as an educational tool. Many games are also considered to be work (such as professional players of spectator sports or games) or art (such as jigsaw puzzles or games involving an artistic layout such as Mahjong, solitaire, or some video games).\\nGames are sometimes played purely for enjoyment, sometimes for achievement or reward as well. They can be played alone, in teams, or online; by amateurs or by professionals. The players may have an audience of non-players, such as when people are entertained by watching a chess championship. On the other hand, players in a game may constitute their own audience as they take their turn to play. Often, part of the entertainment for children playing a game is deciding who is part of their audience and who is a player. A toy and a game are not the same. Toys generally allow for unrestricted play whereas games present rules for the player to follow. \\nKey components of games are goals, rules, challenge, and interaction. Games generally involve mental or physical stimulation, and often both. Many games help develop practical skills, serve as a form of exercise, or otherwise perform an educational, simulational, or psychological role.\\nAttested as early as 2600 BC, games are a universal part of human experience and present in all cultures. The Royal Game of Ur, Senet, and Mancala are some of the oldest known games.\\n\\n'})\n",
+ " ('Single-core', {'url': 'https://en.wikipedia.org/wiki/Single-core', 'summary': 'A single-core processor is a microprocessor with a single core on its die. It performs the fetch-decode-execute cycle once per clock-cycle, as it only runs on one thread. A computer using a single core CPU is generally slower than a multi-core system. \\nSingle core processors used to be widespread in desktop computers, but as applications demanded more processing power, the slower speed of single core systems became a detriment to performance. Windows supported single-core processors up until the release of Windows 11, where a dual-core processor is required. Single core processors are still in use in some niche circumstances. Some older legacy systems like those running antiquated operating systems (e.g. Windows 98) cannot gain any benefit from multi-core processors. Single core processors are also used in hobbyist computers like the Raspberry Pi and Single-board microcontrollers. The production of single-core desktop processors ended in 2013 with the Celeron G470.\\n\\n'})\n",
+ " ('Multi-core processor', {'url': 'https://en.wikipedia.org/wiki/Multi-core_processor', 'summary': \"A multi-core processor is a microprocessor on a single integrated circuit with two or more separate processing units, called cores, each of which reads and executes program instructions. The instructions are ordinary CPU instructions (such as add, move data, and branch) but the single processor can run instructions on separate cores at the same time, increasing overall speed for programs that support multithreading or other parallel computing techniques. Manufacturers typically integrate the cores onto a single integrated circuit die (known as a chip multiprocessor or CMP) or onto multiple dies in a single chip package. The microprocessors currently used in almost all personal computers are multi-core.\\nA multi-core processor implements multiprocessing in a single physical package. Designers may couple cores in a multi-core device tightly or loosely. For example, cores may or may not share caches, and they may implement message passing or shared-memory inter-core communication methods. Common network topologies used to interconnect cores include bus, ring, two-dimensional mesh, and crossbar. Homogeneous multi-core systems include only identical cores; heterogeneous multi-core systems have cores that are not identical (e.g. big.LITTLE have heterogeneous cores that share the same instruction set, while AMD Accelerated Processing Units have cores that do not share the same instruction set). Just as with single-processor systems, cores in multi-core systems may implement architectures such as VLIW, superscalar, vector, or multithreading.\\nMulti-core processors are widely used across many application domains, including general-purpose, embedded, network, digital signal processing (DSP), and graphics (GPU). Core count goes up to even dozens, and for specialized chips over 10,000, and in supercomputers (i.e. clusters of chips) the count can go over 10 million (and in one case up to 20 million processing elements total in addition to host processors).The improvement in performance gained by the use of a multi-core processor depends very much on the software algorithms used and their implementation. In particular, possible gains are limited by the fraction of the software that can run in parallel simultaneously on multiple cores; this effect is described by Amdahl's law. In the best case, so-called embarrassingly parallel problems may realize speedup factors near the number of cores, or even more if the problem is split up enough to fit within each core's cache(s), avoiding use of much slower main-system memory. Most applications, however, are not accelerated as much unless programmers invest effort in refactoring.The parallelization of software is a significant ongoing topic of research. Cointegration of multiprocessor applications provides flexibility in network architecture design. Adaptability within parallel models is an additional feature of systems utilizing these protocols.\"})\n",
+ " ('Electric battery', {'url': 'https://en.wikipedia.org/wiki/Electric_battery', 'summary': 'A battery is a source of electric power consisting of one or more electrochemical cells with external connections for powering electrical devices. When a battery is supplying power, its positive terminal is the cathode and its negative terminal is the anode. The terminal marked negative is the source of electrons that will flow through an external electric circuit to the positive terminal. When a battery is connected to an external electric load, a redox reaction converts high-energy reactants to lower-energy products, and the free-energy difference is delivered to the external circuit as electrical energy. Historically the term \"battery\" specifically referred to a device composed of multiple cells; however, the usage has evolved to include devices composed of a single cell.Primary (single-use or \"disposable\") batteries are used once and discarded, as the electrode materials are irreversibly changed during discharge; a common example is the alkaline battery used for flashlights and a multitude of portable electronic devices. Secondary (rechargeable) batteries can be discharged and recharged multiple times using an applied electric current; the original composition of the electrodes can be restored by reverse current. Examples include the lead–acid batteries used in vehicles and lithium-ion batteries used for portable electronics such as laptops and mobile phones.\\nBatteries come in many shapes and sizes, from miniature cells used to power hearing aids and wristwatches to, at the largest extreme, huge battery banks the size of rooms that provide standby or emergency power for telephone exchanges and computer data centers. Batteries have much lower specific energy (energy per unit mass) than common fuels such as gasoline. In automobiles, this is somewhat offset by the higher efficiency of electric motors in converting electrical energy to mechanical work, compared to combustion engines.'})\n",
+ " ('Hour', {'url': 'https://en.wikipedia.org/wiki/Hour', 'summary': \"An hour (symbol: h; also abbreviated hr) is a unit of time historically reckoned as 1⁄24 of a day and defined contemporarily as exactly 3,600 seconds (SI). There are 60 minutes in an hour, and 24 hours in a day.\\nThe hour was initially established in the ancient Near East as a variable measure of 1⁄12 of the night or daytime. Such seasonal hours, also known as temporal hours or unequal hours, varied by season and latitude.\\nEqual hours or equinoctial hours were taken as 1⁄24 of the day as measured from noon to noon; the minor seasonal variations of this unit were eventually smoothed by making it 1⁄24 of the mean solar day. Since this unit was not constant due to long term variations in the Earth's rotation, the hour was finally separated from the Earth's rotation and defined in terms of the atomic or physical second.\\nIn the modern metric system, hours are an accepted unit of time defined as 3,600 atomic seconds. However, on rare occasions an hour may incorporate a positive or negative leap second, effectively making it appear to last 3,599 or 3,601 seconds, in order to keep UTC within 0.9 seconds of UT1, the latter of which is based on measurements of the mean solar day.\"})\n",
+ " ('5G', {'url': 'https://en.wikipedia.org/wiki/5G', 'summary': 'In telecommunications, 5G is the fifth-generation technology standard for broadband cellular networks, which cellular phone companies began deploying worldwide in 2019, and is the planned successor to the 4G networks which provide connectivity to most current cellphones.\\nLike its predecessors, 5G networks are cellular networks, in which the service area is divided into small geographical areas called cells. All 5G wireless devices in a cell are connected to the Internet and telephone network by radio waves through a local antenna in the cell. The new networks have higher download speeds, peak speed of 10 gigabits per second (Gbit/s) when there is only one user in the network. 5G has higher bandwidth to deliver faster speeds than 4G and can thus connect more different devices, improving the quality of Internet services in crowded areas. Due to the increased bandwidth, it is expected the 5G networks will increasingly be used as general internet service providers (ISPs), competing with existing ISPs such as cable internet, and also will make possible new applications in internet-of-things (IoT) and machine-to-machine areas. Cellphones with 4G capability alone are not able to use the 5G networks.'})\n",
+ " (\"Tom's Hardware\", {'url': 'https://en.wikipedia.org/wiki/Tom%27s_Hardware', 'summary': \"Tom's Hardware is an online publication owned by Future plc and focused on technology. It was founded in 1996 by Thomas Pabst. It provides articles, news, price comparisons, videos and reviews on computer hardware and high technology. The site features coverage on CPUs, motherboards, RAM, PC cases, graphic cards, display technology, power supplies and displays, storage, smartphones, tablets, gaming, consoles, and computer peripherals.\\nTom's Hardware has a forum and featured blogs.\"})\n",
+ "Relations:\n",
+ " {'head': 'Google', 'type': 'owner of', 'tail': 'Google Search', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/google-to-launch-privacy-tools-which-remove-unwanted-personal-images': {'spans': [[0, 128]]}}}\n",
+ " {'head': 'Google Search', 'type': 'owned by', 'tail': 'Google', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/google-to-launch-privacy-tools-which-remove-unwanted-personal-images': {'spans': [[0, 128]]}}}\n",
+ " {'head': 'Google', 'type': 'product or material produced', 'tail': 'Google Search', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/google-to-launch-privacy-tools-which-remove-unwanted-personal-images': {'spans': [[0, 128]]}}}\n",
+ " {'head': 'SafeSearch', 'type': 'owned by', 'tail': 'Google', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/google-to-launch-privacy-tools-which-remove-unwanted-personal-images': {'spans': [[114, 242], [228, 356]]}}}\n",
+ " {'head': 'SafeSearch', 'type': 'country', 'tail': 'United States', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/google-to-launch-privacy-tools-which-remove-unwanted-personal-images': {'spans': [[114, 242]]}}}\n",
+ " {'head': 'SafeSearch', 'type': 'developer', 'tail': 'Google', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/google-to-launch-privacy-tools-which-remove-unwanted-personal-images': {'spans': [[114, 242]]}}}\n",
+ " {'head': 'Google', 'type': 'owner of', 'tail': 'SafeSearch', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/google-to-launch-privacy-tools-which-remove-unwanted-personal-images': {'spans': [[228, 356]]}}}\n",
+ " {'head': 'Artificial intelligence arms race', 'type': 'facet of', 'tail': 'Artificial intelligence', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[127, 255]]}}}\n",
+ " {'head': 'Artificial intelligence', 'type': 'instance of', 'tail': 'Arms race', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[127, 255]]}}}\n",
+ " {'head': 'Michelle Donelan', 'type': 'employer', 'tail': 'The Guardian', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[254, 382]]}}}\n",
+ " {'head': 'Google DeepMind', 'type': 'headquarters location', 'tail': 'London', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[381, 509]]}}}\n",
+ " {'head': 'Google DeepMind', 'type': 'country', 'tail': 'United Kingdom', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[381, 509]]}}}\n",
+ " {'head': 'Google DeepMind', 'type': 'inception', 'tail': '2011', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[508, 636]]}}}\n",
+ " {'head': 'Google DeepMind', 'type': 'founded by', 'tail': 'Demis Hassabis', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[508, 636]]}}}\n",
+ " {'head': 'Google DeepMind', 'type': 'owned by', 'tail': 'Google', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[508, 636]]}}}\n",
+ " {'head': 'Google', 'type': 'subsidiary', 'tail': 'Google DeepMind', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[508, 636]]}}}\n",
+ " {'head': 'Demis Hassabis', 'type': 'sport', 'tail': 'Chess', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[508, 636]]}}}\n",
+ " {'head': 'Demis Hassabis', 'type': 'employer', 'tail': 'Google DeepMind', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[508, 636]]}}}\n",
+ " {'head': 'ChatGPT', 'type': 'developer', 'tail': 'OpenAI', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[889, 1017]]}}}\n",
+ " {'head': 'Bard', 'type': 'instance of', 'tail': 'Chatbot', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[889, 1017]]}}}\n",
+ " {'head': 'ChatGPT', 'type': 'instance of', 'tail': 'Chatbot', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[889, 1017]]}}}\n",
+ " {'head': 'Microsoft Bing', 'type': 'owned by', 'tail': 'Microsoft', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[1016, 1144]]}}}\n",
+ " {'head': 'Google', 'type': 'location of formation', 'tail': 'London', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[1143, 1271]]}}}\n",
+ " {'head': 'Keir Starmer', 'type': 'position held', 'tail': 'Labour Leader', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[1651, 1779]]}}}\n",
+ " {'head': 'Chartered Management Institute', 'type': 'country', 'tail': 'United Kingdom', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[1778, 1906]]}}}\n",
+ " {'head': 'Chatbot', 'type': 'subclass of', 'tail': 'Artificial intelligence', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[1778, 1906]]}}}\n",
+ " {'head': 'ChatGPT', 'type': 'developer', 'tail': 'Google', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[1905, 2033]]}}}\n",
+ " {'head': 'Google', 'type': 'owner of', 'tail': 'ChatGPT', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[1905, 2033]]}}}\n",
+ " {'head': 'ChatGPT', 'type': 'owned by', 'tail': 'Google', 'meta': {'https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race': {'spans': [[1905, 2033]]}}}\n",
+ " {'head': 'Samsung Galaxy Z Fold 5', 'type': 'subclass of', 'tail': 'Foldable smartphone', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[0, 128], [254, 382]]}}}\n",
+ " {'head': 'Samsung Galaxy Z Fold 5', 'type': 'instance of', 'tail': 'Foldable smartphone', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[0, 128]]}}}\n",
+ " {'head': 'Pixel Fold', 'type': 'manufacturer', 'tail': 'Google', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[127, 255], [381, 509], [1270, 1398], [2032, 2160]]}}}\n",
+ " {'head': 'Samsung Galaxy Z Fold 5', 'type': 'manufacturer', 'tail': 'Samsung', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[254, 382], [508, 636], [1270, 1398], [2540, 2668]]}}}\n",
+ " {'head': 'Samsung Galaxy', 'type': 'manufacturer', 'tail': 'Samsung', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[508, 636]]}}}\n",
+ " {'head': 'Cream', 'type': 'instance of', 'tail': 'Color', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[889, 1017]]}}}\n",
+ " {'head': 'Samsung', 'type': 'product or material produced', 'tail': 'Samsung Galaxy Z Fold 5', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[1016, 1144]]}}}\n",
+ " {'head': 'DCI-P3', 'type': 'instance of', 'tail': 'Color space', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[1397, 1525]]}}}\n",
+ " {'head': 'Pixel Fold', 'type': 'followed by', 'tail': 'Samsung Galaxy Z Fold 5', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[1524, 1652]]}}}\n",
+ " {'head': 'Samsung Galaxy Z Fold 5', 'type': 'different from', 'tail': 'Pixel Fold', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[1651, 1779], [2921, 3049]]}}}\n",
+ " {'head': 'Pixel Fold', 'type': 'different from', 'tail': 'Samsung Galaxy Z Fold 5', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[1651, 1779], [2921, 3049]]}}}\n",
+ " {'head': 'Samsung Galaxy', 'type': 'subclass of', 'tail': 'Stylus', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[1905, 2033]]}}}\n",
+ " {'head': 'Samsung Galaxy Z Fold 4', 'type': 'followed by', 'tail': 'Samsung Galaxy Z Fold 5', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[1905, 2033]]}}}\n",
+ " {'head': 'Globe', 'type': 'has part', 'tail': 'Amaranth', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[2159, 2287]]}}}\n",
+ " {'head': 'Globe', 'type': 'parent taxon', 'tail': 'Amaranth', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[2159, 2287]]}}}\n",
+ " {'head': 'Amaranth', 'type': 'part of', 'tail': 'Globe', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[2159, 2287]]}}}\n",
+ " {'head': 'IPhone 7', 'type': 'manufacturer', 'tail': 'Samsung', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[2286, 2414]]}}}\n",
+ " {'head': 'IPhone 6', 'type': 'manufacturer', 'tail': 'Samsung', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[2286, 2414]]}}}\n",
+ " {'head': 'Asphalt 9: Legends', 'type': 'instance of', 'tail': 'Game', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[2667, 2795]]}}}\n",
+ " {'head': 'Single-core', 'type': 'followed by', 'tail': 'Multi-core processor', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[2794, 2922]]}}}\n",
+ " {'head': 'Single-core', 'type': 'opposite of', 'tail': 'Multi-core processor', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[2794, 2922]]}}}\n",
+ " {'head': 'Single-core', 'type': 'subclass of', 'tail': 'Multi-core processor', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[2794, 2922]]}}}\n",
+ " {'head': 'Electric battery', 'type': 'subclass of', 'tail': 'Hour', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[2921, 3049]]}}}\n",
+ " {'head': '5G', 'type': 'uses', 'tail': \"Tom's Hardware\", 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[3048, 3176]]}}}\n",
+ " {'head': 'Samsung Galaxy Z Fold 5', 'type': 'followed by', 'tail': 'Pixel Fold', 'meta': {'https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold': {'spans': [[3175, 3303]]}}}\n",
+ "Sources:\n",
+ " ('https://www.theguardian.com/technology/2023/aug/03/google-to-launch-privacy-tools-which-remove-unwanted-personal-images', {'article_title': 'Google to launch privacy tools which remove unwanted personal images', 'article_publish_date': datetime.datetime(2023, 8, 3, 0, 0)})\n",
+ " ('https://www.theguardian.com/technology/2023/aug/03/so-important-uk-minister-endorses-googles-training-drive-in-ai-arms-race', {'article_title': 'AI for all? Google ups the ante with free UK training courses for firms', 'article_publish_date': datetime.datetime(2023, 8, 3, 0, 0)})\n",
+ " ('https://www.tomsguide.com/face-off/samsung-galaxy-z-fold-5-vs-google-pixel-fold', {'article_title': \"Samsung Galaxy Z Fold 5 vs Google Pixel Fold: What's the best foldable phone?\", 'article_publish_date': datetime.datetime(2023, 8, 3, 5, 30, 11, tzinfo=tzlocal())})\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Visualize the Knowledge Base"
+ ],
+ "metadata": {
+ "id": "ubGNrbCrZ6f4"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "news_links = get_news_links(\"Google\", pages=5, max_links=20)\n",
+ "kb = from_urls_to_kb(news_links, verbose=True)\n",
+ "filename = \"network_3_google.html\""
+ ],
+ "metadata": {
+ "id": "YXrG2XY4ZleN"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def save_network_html(kb, filename=\"network.html\", use_notebook:bool=True):\n",
+ " if use_notebook:\n",
+ " # create network\n",
+ " net = Network(directed=True, width=\"700px\", height=\"700px\", bgcolor=\"#eeeeee\", notebook=True, cdn_resources='in_line')\n",
+ " else:\n",
+ " # create network\n",
+ " net = Network(directed=True, width='700px', height='700px', bgcolor='#eeeeee')\n",
+ "\n",
+ " # nodes\n",
+ " color_entity = \"#00FF00\"\n",
+ " for e in kb.entities:\n",
+ " net.add_node(e, shape=\"circle\", color=color_entity)\n",
+ "\n",
+ " # edges\n",
+ " for r in kb.relations:\n",
+ " net.add_edge(r[\"head\"], r[\"tail\"],\n",
+ " title=r[\"type\"], label=r[\"type\"])\n",
+ "\n",
+ " # save network\n",
+ " net.repulsion(\n",
+ " node_distance=200,\n",
+ " central_gravity=0.2,\n",
+ " spring_length=200,\n",
+ " spring_strength=0.05,\n",
+ " damping=0.09\n",
+ " )\n",
+ " net.set_edge_smooth('dynamic')\n",
+ " net.show(filename)"
+ ],
+ "metadata": {
+ "id": "gHtGi05rZlf6"
+ },
+ "execution_count": 27,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "save_network_html(kb, filename=filename)\n",
+ "IPython.display.HTML(filename=filename)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 751
+ },
+ "id": "9z4dTONkgfA-",
+ "outputId": "19168f6e-3223-4fe7-fc85-524d30815fe7"
+ },
+ "execution_count": 28,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "network_3_google.html\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "