From 98deabb3fd078ea704f3b3a9329c9551a83aec79 Mon Sep 17 00:00:00 2001 From: James Wexler Date: Wed, 8 May 2019 15:10:10 -0400 Subject: [PATCH] fix notebooks to work in firefox --- colab_facets.ipynb | 464 +++++++++++++--------------- facets_dive/Dive_demo.ipynb | 14 +- facets_overview/Overview_demo.ipynb | 53 +--- 3 files changed, 243 insertions(+), 288 deletions(-) diff --git a/colab_facets.ipynb b/colab_facets.ipynb index 879628e..caafa53 100644 --- a/colab_facets.ipynb +++ b/colab_facets.ipynb @@ -1,253 +1,227 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { "colab": { - "name": "Facets Dive and Overview Colab Example", - "version": "0.3.2", - "views": {}, - "default_view": {}, - "provenance": [] + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - "kernelspec": { - "name": "python2", - "display_name": "Python 2" - } + "colab_type": "code", + "id": "blPpZw5R3Bb4" + }, + "outputs": [], + "source": [ + "# Load UCI census train and test data into dataframes.\n", + "import pandas as pd\n", + "features = [\"Age\", \"Workclass\", \"fnlwgt\", \"Education\", \"Education-Num\", \"Marital Status\",\n", + " \"Occupation\", \"Relationship\", \"Race\", \"Sex\", \"Capital Gain\", \"Capital Loss\",\n", + " \"Hours per week\", \"Country\", \"Target\"]\n", + "train_data = pd.read_csv(\n", + " \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n", + " names=features,\n", + " sep=r'\\s*,\\s*',\n", + " engine='python',\n", + " na_values=\"?\")\n", + "test_data = pd.read_csv(\n", + " \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n", + " names=features,\n", + " sep=r'\\s*,\\s*',\n", + " skiprows=[0],\n", + " engine='python',\n", + " na_values=\"?\")" + ] }, - "cells": [ - { - "metadata": { - "id": "blPpZw5R3Bb4", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# Load UCI census train and test data into dataframes.\n", - "import pandas as pd\n", - "features = [\"Age\", \"Workclass\", \"fnlwgt\", \"Education\", \"Education-Num\", \"Marital Status\",\n", - " \"Occupation\", \"Relationship\", \"Race\", \"Sex\", \"Capital Gain\", \"Capital Loss\",\n", - " \"Hours per week\", \"Country\", \"Target\"]\n", - "train_data = pd.read_csv(\n", - " \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n", - " names=features,\n", - " sep=r'\\s*,\\s*',\n", - " engine='python',\n", - " na_values=\"?\")\n", - "test_data = pd.read_csv(\n", - " \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test\",\n", - " names=features,\n", - " sep=r'\\s*,\\s*',\n", - " skiprows=[0],\n", - " engine='python',\n", - " na_values=\"?\")" - ], - "execution_count": 0, - "outputs": [] + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "base_uri": "https://localhost:8080/", + "height": 617 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 4749, + "status": "ok", + "timestamp": 1532523415979, + "user": { + "displayName": "James Wexler", + "photoUrl": "//lh4.googleusercontent.com/-TJBPojJ2kd8/AAAAAAAAAAI/AAAAAAAAABE/YrSFlsiqR80/s50-c-k-no/photo.jpg", + "userId": "104529426628068202733" + }, + "user_tz": 240 }, - { - "metadata": { - "id": "XtOzRy8Z3M36", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - }, - "base_uri": "https://localhost:8080/", - "height": 617 - }, - "outputId": "9efa442d-1e11-416e-d57f-e57b6e7e16e4", - "executionInfo": { - "status": "ok", - "timestamp": 1532523415979, - "user_tz": 240, - "elapsed": 4749, - "user": { - "displayName": "James Wexler", - "photoUrl": "//lh4.googleusercontent.com/-TJBPojJ2kd8/AAAAAAAAAAI/AAAAAAAAABE/YrSFlsiqR80/s50-c-k-no/photo.jpg", - "userId": "104529426628068202733" - } - } - }, - "cell_type": "code", - "source": [ - "\n", - "# Display the Dive visualization for the training data.\n", - "from IPython.core.display import display, HTML\n", - "\n", - "jsonstr = train_data.to_json(orient='records')\n", - "HTML_TEMPLATE = \"\"\"\n", - " \n", - " \"\"\"\n", - "html = HTML_TEMPLATE.format(jsonstr=jsonstr)\n", - "display(HTML(html))" - ], - "execution_count": 14, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "\n", - " \n", - " " - ] - }, - "metadata": { - "tags": [] - } - } - ] + "id": "XtOzRy8Z3M36", + "outputId": "9efa442d-1e11-416e-d57f-e57b6e7e16e4" + }, + "outputs": [], + "source": [ + "\n", + "# Display the Dive visualization for the training data.\n", + "from IPython.core.display import display, HTML\n", + "\n", + "jsonstr = train_data.to_json(orient='records')\n", + "HTML_TEMPLATE = \"\"\"\n", + " \n", + " \n", + " \n", + " \"\"\"\n", + "html = HTML_TEMPLATE.format(jsonstr=jsonstr)\n", + "display(HTML(html))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "base_uri": "https://localhost:8080/", + "height": 125 }, - { - "metadata": { - "id": "B22HH9kyeyQd", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - }, - "base_uri": "https://localhost:8080/", - "height": 125 - }, - "outputId": "323a4d74-8d40-480b-ac9f-58ccf7a4f990", - "executionInfo": { - "status": "ok", - "timestamp": 1532522957138, - "user_tz": 240, - "elapsed": 3967, - "user": { - "displayName": "James Wexler", - "photoUrl": "//lh4.googleusercontent.com/-TJBPojJ2kd8/AAAAAAAAAAI/AAAAAAAAABE/YrSFlsiqR80/s50-c-k-no/photo.jpg", - "userId": "104529426628068202733" - } - } - }, - "cell_type": "code", - "source": [ - "# Clone the facets github repo to get access to the python feature stats generation code\n", - "!git clone https://github.com/pair-code/facets.git" - ], - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Cloning into 'facets'...\n", - "remote: Counting objects: 900, done.\u001b[K\n", - "remote: Compressing objects: 100% (33/33), done.\u001b[K\n", - "remote: Total 900 (delta 21), reused 14 (delta 7), pack-reused 860\u001b[K\n", - "Receiving objects: 100% (900/900), 17.44 MiB | 21.33 MiB/s, done.\n", - "Resolving deltas: 100% (530/530), done.\n" - ], - "name": "stdout" - } - ] + "colab_type": "code", + "executionInfo": { + "elapsed": 3967, + "status": "ok", + "timestamp": 1532522957138, + "user": { + "displayName": "James Wexler", + "photoUrl": "//lh4.googleusercontent.com/-TJBPojJ2kd8/AAAAAAAAAAI/AAAAAAAAABE/YrSFlsiqR80/s50-c-k-no/photo.jpg", + "userId": "104529426628068202733" + }, + "user_tz": 240 }, - { - "metadata": { - "id": "mjv5Kr1Mflq7", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# Add the path to the feature stats generation code.\n", - "import sys\n", - "sys.path.insert(0, '/content/facets/facets_overview/python/')\n", - "\n", - "# Create the feature stats for the datasets and stringify it.\n", - "import base64\n", - "from generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n", - "\n", - "gfsg = GenericFeatureStatisticsGenerator()\n", - "proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': train_data},\n", - " {'name': 'test', 'table': test_data}])\n", - "protostr = base64.b64encode(proto.SerializeToString()).decode(\"utf-8\")" - ], - "execution_count": 0, - "outputs": [] + "id": "B22HH9kyeyQd", + "outputId": "323a4d74-8d40-480b-ac9f-58ccf7a4f990" + }, + "outputs": [], + "source": [ + "# Clone the facets github repo to get access to the python feature stats generation code\n", + "!git clone https://github.com/pair-code/facets.git" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!ls /content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "b7zs2p2_goJa", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - }, - "base_uri": "https://localhost:8080/", - "height": 1028 - }, - "outputId": "22e211df-972f-49b9-f271-75e0d4ba68ee", - "executionInfo": { - "status": "ok", - "timestamp": 1532523370507, - "user_tz": 240, - "elapsed": 369, - "user": { - "displayName": "James Wexler", - "photoUrl": "//lh4.googleusercontent.com/-TJBPojJ2kd8/AAAAAAAAAAI/AAAAAAAAABE/YrSFlsiqR80/s50-c-k-no/photo.jpg", - "userId": "104529426628068202733" - } - } - }, - "cell_type": "code", - "source": [ - "# Display the facets overview visualization for this data\n", - "from IPython.core.display import display, HTML\n", - "\n", - "HTML_TEMPLATE = \"\"\"\n", - " \n", - " \"\"\"\n", - "html = HTML_TEMPLATE.format(protostr=protostr)\n", - "display(HTML(html))" - ], - "execution_count": 13, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "\n", - " \n", - " " - ] - }, - "metadata": { - "tags": [] - } - } - ] - } - ] -} \ No newline at end of file + "colab_type": "code", + "id": "mjv5Kr1Mflq7" + }, + "outputs": [], + "source": [ + "# Add the path to the feature stats generation code.\n", + "import sys\n", + "sys.path.insert(0, '/content/facets/facets_overview/python/')\n", + "\n", + "# Create the feature stats for the datasets and stringify it.\n", + "import base64\n", + "from generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n", + "\n", + "gfsg = GenericFeatureStatisticsGenerator()\n", + "proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': train_data},\n", + " {'name': 'test', 'table': test_data}])\n", + "protostr = base64.b64encode(proto.SerializeToString()).decode(\"utf-8\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "base_uri": "https://localhost:8080/", + "height": 1028 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 369, + "status": "ok", + "timestamp": 1532523370507, + "user": { + "displayName": "James Wexler", + "photoUrl": "//lh4.googleusercontent.com/-TJBPojJ2kd8/AAAAAAAAAAI/AAAAAAAAABE/YrSFlsiqR80/s50-c-k-no/photo.jpg", + "userId": "104529426628068202733" + }, + "user_tz": 240 + }, + "id": "b7zs2p2_goJa", + "outputId": "22e211df-972f-49b9-f271-75e0d4ba68ee" + }, + "outputs": [], + "source": [ + "# Display the facets overview visualization for this data\n", + "from IPython.core.display import display, HTML\n", + "\n", + "HTML_TEMPLATE = \"\"\"\n", + " \n", + " \n", + " \n", + " \"\"\"\n", + "html = HTML_TEMPLATE.format(protostr=protostr)\n", + "display(HTML(html))" + ] + } + ], + "metadata": { + "colab": { + "default_view": {}, + "name": "Facets Dive and Overview Colab Example", + "provenance": [], + "version": "0.3.2", + "views": {} + }, + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.16" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/facets_dive/Dive_demo.ipynb b/facets_dive/Dive_demo.ipynb index 6881886..11d9aff 100644 --- a/facets_dive/Dive_demo.ipynb +++ b/facets_dive/Dive_demo.ipynb @@ -57,7 +57,9 @@ "from IPython.core.display import display, HTML\n", "\n", "# Create Facets template \n", - "HTML_TEMPLATE = \"\"\"\n", + "HTML_TEMPLATE = \"\"\"\n", + " \n", + " \n", " \n", " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Display the facets overview visualization for this data\n", "from IPython.core.display import display, HTML\n", "\n", - "HTML_TEMPLATE = \"\"\"\n", + "HTML_TEMPLATE = \"\"\"\n", + " \n", + " \n", " \n", "