Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

masakhane audit notebook #163

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/MasakhaneAudit(1).ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"MasakhaneAudit.ipynb","provenance":[{"file_id":"https://github.com/u20772077/masakhane-mt/blob/master/benchmarks/MasakhaneAudit.ipynb","timestamp":1628494944201}],"collapsed_sections":[],"authorship_tag":"ABX9TyNrFhO//FLD/Che6PZtFVmU"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"id":"8sjn6OuYaXhZ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1628863876164,"user_tz":-120,"elapsed":87267,"user":{"displayName":"Thapelo Sindane","photoUrl":"","userId":"08710202319891453084"}},"outputId":"da69e384-498c-49c6-c096-bf050e9cd810"},"source":["# Mount google drive\n","from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"5sbVCxWn2BHh","executionInfo":{"status":"ok","timestamp":1628863885302,"user_tz":-120,"elapsed":451,"user":{"displayName":"Thapelo Sindane","photoUrl":"","userId":"08710202319891453084"}}},"source":["# TODO: Set your source and target languages. Keep in mind, these traditionally use language codes as found here:\n","# These will also become the suffix's of all vocab and corpus files used throughout\n","import os\n","source_language = \"fr\"\n","target_language = \"swc\" \n","lc = False # If True, lowercase the data.\n","seed = 42 # Random seed for shuffling.\n","tag = \"baseline\" # Give a unique name to your folder - this is to ensure you don't rewrite any models you've already submitted\n","\n","os.environ[\"src\"] = source_language # Sets them in bash as well, since we often use bash scripts\n","os.environ[\"tgt\"] = target_language\n","os.environ[\"tag\"] = tag\n","\n","# This will save it to a folder in our gdrive instead!\n","!mkdir -p \"/content/drive/My Drive/masakhane/$src-$tgt-$tag\"\n","os.environ[\"gdrive_path\"] = \"/content/drive/My Drive/masakhane/%s-%s-%s\" % (source_language, target_language, tag)"],"execution_count":3,"outputs":[]},{"cell_type":"code","metadata":{"id":"m1RJVSRF2LST","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1628684730795,"user_tz":-120,"elapsed":357,"user":{"displayName":"Thapelo Sindane","photoUrl":"","userId":"08710202319891453084"}},"outputId":"204e8b9e-1e88-4386-d46d-856366ec45a3"},"source":["!echo $gdrive_path"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/content/drive/My Drive/masakhane/en-zu-baseline\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"zYTiLeqo2XOP","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1628864052248,"user_tz":-120,"elapsed":162591,"user":{"displayName":"Thapelo Sindane","photoUrl":"","userId":"08710202319891453084"}},"outputId":"888b0f0d-d6e7-4bb0-8f8a-7f07850ea32a"},"source":["# Install JoeyNMT\n","! git clone https://github.com/joeynmt/joeynmt.git\n","! cd joeynmt; pip3 install ."],"execution_count":4,"outputs":[{"output_type":"stream","text":["Cloning into 'joeynmt'...\n","remote: Enumerating objects: 3127, done.\u001b[K\n","remote: Counting objects: 100% (176/176), done.\u001b[K\n","remote: Compressing objects: 100% (85/85), done.\u001b[K\n","remote: Total 3127 (delta 101), reused 142 (delta 91), pack-reused 2951\u001b[K\n","Receiving objects: 100% (3127/3127), 8.09 MiB | 3.04 MiB/s, done.\n","Resolving deltas: 100% (2130/2130), done.\n","Processing /content/joeynmt\n","\u001b[33m DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.\n"," pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.\u001b[0m\n","Requirement already satisfied: future in /usr/local/lib/python3.7/dist-packages (from joeynmt==1.3) (0.16.0)\n","Requirement already satisfied: pillow in /usr/local/lib/python3.7/dist-packages (from joeynmt==1.3) (7.1.2)\n","Collecting numpy==1.20.1\n"," Downloading numpy-1.20.1-cp37-cp37m-manylinux2010_x86_64.whl (15.3 MB)\n","\u001b[K |████████████████████████████████| 15.3 MB 94 kB/s \n","\u001b[?25hRequirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.7/dist-packages (from joeynmt==1.3) (57.2.0)\n","Collecting torch==1.8.0\n"," Downloading torch-1.8.0-cp37-cp37m-manylinux1_x86_64.whl (735.5 MB)\n","\u001b[K |████████████████████████████████| 735.5 MB 13 kB/s \n","\u001b[?25hRequirement already satisfied: tensorboard>=1.15 in /usr/local/lib/python3.7/dist-packages (from joeynmt==1.3) (2.5.0)\n","Collecting torchtext==0.9.0\n"," Downloading torchtext-0.9.0-cp37-cp37m-manylinux1_x86_64.whl (7.1 MB)\n","\u001b[K |████████████████████████████████| 7.1 MB 16.9 MB/s \n","\u001b[?25hCollecting sacrebleu>=1.3.6\n"," Downloading sacrebleu-2.0.0-py3-none-any.whl (90 kB)\n","\u001b[K |████████████████████████████████| 90 kB 7.8 MB/s \n","\u001b[?25hCollecting subword-nmt\n"," Downloading subword_nmt-0.3.7-py2.py3-none-any.whl (26 kB)\n","Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from joeynmt==1.3) (3.2.2)\n","Requirement already satisfied: seaborn in /usr/local/lib/python3.7/dist-packages (from joeynmt==1.3) (0.11.1)\n","Collecting pyyaml>=5.1\n"," Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)\n","\u001b[K |████████████████████████████████| 636 kB 57.6 MB/s \n","\u001b[?25hCollecting pylint\n"," Downloading pylint-2.9.6-py3-none-any.whl (375 kB)\n","\u001b[K |████████████████████████████████| 375 kB 49.7 MB/s \n","\u001b[?25hCollecting six==1.12\n"," Downloading six-1.12.0-py2.py3-none-any.whl (10 kB)\n","Collecting wrapt==1.11.1\n"," Downloading wrapt-1.11.1.tar.gz (27 kB)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch==1.8.0->joeynmt==1.3) (3.7.4.3)\n","Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from torchtext==0.9.0->joeynmt==1.3) (2.23.0)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from torchtext==0.9.0->joeynmt==1.3) (4.41.1)\n","Collecting portalocker\n"," Downloading portalocker-2.3.0-py2.py3-none-any.whl (15 kB)\n","Requirement already satisfied: tabulate>=0.8.9 in /usr/local/lib/python3.7/dist-packages (from sacrebleu>=1.3.6->joeynmt==1.3) (0.8.9)\n","Collecting colorama\n"," Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)\n","Requirement already satisfied: regex in /usr/local/lib/python3.7/dist-packages (from sacrebleu>=1.3.6->joeynmt==1.3) (2019.12.20)\n","Requirement already satisfied: grpcio>=1.24.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=1.15->joeynmt==1.3) (1.34.1)\n","Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=1.15->joeynmt==1.3) (1.32.1)\n","Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=1.15->joeynmt==1.3) (0.6.1)\n","Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=1.15->joeynmt==1.3) (1.0.1)\n","Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=1.15->joeynmt==1.3) (0.4.4)\n","Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=1.15->joeynmt==1.3) (3.3.4)\n","Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=1.15->joeynmt==1.3) (1.8.0)\n","Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=1.15->joeynmt==1.3) (0.12.0)\n","Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=1.15->joeynmt==1.3) (3.17.3)\n","Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=1.15->joeynmt==1.3) (0.36.2)\n","Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.15->joeynmt==1.3) (4.2.2)\n","Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.15->joeynmt==1.3) (4.7.2)\n","Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=1.15->joeynmt==1.3) (0.2.8)\n","Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=1.15->joeynmt==1.3) (1.3.0)\n","Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from markdown>=2.6.8->tensorboard>=1.15->joeynmt==1.3) (4.6.1)\n","Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard>=1.15->joeynmt==1.3) (0.4.8)\n","Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0->joeynmt==1.3) (3.0.4)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0->joeynmt==1.3) (2021.5.30)\n","Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0->joeynmt==1.3) (2.10)\n","Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0->joeynmt==1.3) (1.24.3)\n","Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=1.15->joeynmt==1.3) (3.1.1)\n","Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->markdown>=2.6.8->tensorboard>=1.15->joeynmt==1.3) (3.5.0)\n","Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->joeynmt==1.3) (1.3.1)\n","Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->joeynmt==1.3) (0.10.0)\n","Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->joeynmt==1.3) (2.4.7)\n","Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->joeynmt==1.3) (2.8.1)\n","Collecting isort<6,>=4.2.5\n"," Downloading isort-5.9.3-py3-none-any.whl (106 kB)\n","\u001b[K |████████████████████████████████| 106 kB 57.8 MB/s \n","\u001b[?25hCollecting mccabe<0.7,>=0.6\n"," Downloading mccabe-0.6.1-py2.py3-none-any.whl (8.6 kB)\n","Requirement already satisfied: toml>=0.7.1 in /usr/local/lib/python3.7/dist-packages (from pylint->joeynmt==1.3) (0.10.2)\n","Collecting astroid<2.7,>=2.6.5\n"," Downloading astroid-2.6.6-py3-none-any.whl (231 kB)\n","\u001b[K |████████████████████████████████| 231 kB 60.3 MB/s \n","\u001b[?25hCollecting typed-ast<1.5,>=1.4.0\n"," Downloading typed_ast-1.4.3-cp37-cp37m-manylinux1_x86_64.whl (743 kB)\n","\u001b[K |████████████████████████████████| 743 kB 55.4 MB/s \n","\u001b[?25hCollecting lazy-object-proxy>=1.4.0\n"," Downloading lazy_object_proxy-1.6.0-cp37-cp37m-manylinux1_x86_64.whl (55 kB)\n","\u001b[K |████████████████████████████████| 55 kB 3.1 MB/s \n","\u001b[?25hRequirement already satisfied: pandas>=0.23 in /usr/local/lib/python3.7/dist-packages (from seaborn->joeynmt==1.3) (1.1.5)\n","Requirement already satisfied: scipy>=1.0 in /usr/local/lib/python3.7/dist-packages (from seaborn->joeynmt==1.3) (1.4.1)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.23->seaborn->joeynmt==1.3) (2018.9)\n","Building wheels for collected packages: joeynmt, wrapt\n"," Building wheel for joeynmt (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for joeynmt: filename=joeynmt-1.3-py3-none-any.whl size=85116 sha256=ebd954f3e3fa0d0255410ed79d5a197bd50b75d50fbaa64af1658ccd6ae85898\n"," Stored in directory: /tmp/pip-ephem-wheel-cache-xyemcf06/wheels/0a/f4/bf/6c9d3b8efbfece6cd209f865be37382b02e7c3584df2e28ca4\n"," Building wheel for wrapt (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for wrapt: filename=wrapt-1.11.1-cp37-cp37m-linux_x86_64.whl size=68437 sha256=4a73682adff84c22d3626cbfbdab8fa38b0487815daa6c0b1345b1385c2a37ea\n"," Stored in directory: /root/.cache/pip/wheels/4e/58/9d/da8bad4545585ca52311498ff677647c95c7b690b3040171f8\n","Successfully built joeynmt wrapt\n","Installing collected packages: six, wrapt, typed-ast, numpy, lazy-object-proxy, torch, portalocker, mccabe, isort, colorama, astroid, torchtext, subword-nmt, sacrebleu, pyyaml, pylint, joeynmt\n"," Attempting uninstall: six\n"," Found existing installation: six 1.15.0\n"," Uninstalling six-1.15.0:\n"," Successfully uninstalled six-1.15.0\n"," Attempting uninstall: wrapt\n"," Found existing installation: wrapt 1.12.1\n"," Uninstalling wrapt-1.12.1:\n"," Successfully uninstalled wrapt-1.12.1\n"," Attempting uninstall: numpy\n"," Found existing installation: numpy 1.19.5\n"," Uninstalling numpy-1.19.5:\n"," Successfully uninstalled numpy-1.19.5\n"," Attempting uninstall: torch\n"," Found existing installation: torch 1.9.0+cu102\n"," Uninstalling torch-1.9.0+cu102:\n"," Successfully uninstalled torch-1.9.0+cu102\n"," Attempting uninstall: torchtext\n"," Found existing installation: torchtext 0.10.0\n"," Uninstalling torchtext-0.10.0:\n"," Successfully uninstalled torchtext-0.10.0\n"," Attempting uninstall: pyyaml\n"," Found existing installation: PyYAML 3.13\n"," Uninstalling PyYAML-3.13:\n"," Successfully uninstalled PyYAML-3.13\n","\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n","torchvision 0.10.0+cu102 requires torch==1.9.0, but you have torch 1.8.0 which is incompatible.\n","tensorflow 2.5.0 requires numpy~=1.19.2, but you have numpy 1.20.1 which is incompatible.\n","tensorflow 2.5.0 requires six~=1.15.0, but you have six 1.12.0 which is incompatible.\n","tensorflow 2.5.0 requires wrapt~=1.12.1, but you have wrapt 1.11.1 which is incompatible.\n","google-colab 1.0.0 requires six~=1.15.0, but you have six 1.12.0 which is incompatible.\n","google-api-python-client 1.12.8 requires six<2dev,>=1.13.0, but you have six 1.12.0 which is incompatible.\n","google-api-core 1.26.3 requires six>=1.13.0, but you have six 1.12.0 which is incompatible.\n","datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\n","albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.\u001b[0m\n","Successfully installed astroid-2.6.6 colorama-0.4.4 isort-5.9.3 joeynmt-1.3 lazy-object-proxy-1.6.0 mccabe-0.6.1 numpy-1.20.1 portalocker-2.3.0 pylint-2.9.6 pyyaml-5.4.1 sacrebleu-2.0.0 six-1.12.0 subword-nmt-0.3.7 torch-1.8.0 torchtext-0.9.0 typed-ast-1.4.3 wrapt-1.11.1\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"R2io82oo2boq","executionInfo":{"status":"ok","timestamp":1628864082289,"user_tz":-120,"elapsed":9001,"user":{"displayName":"Thapelo Sindane","photoUrl":"","userId":"08710202319891453084"}}},"source":["# copy files from drive to joeynmt\n","!cp -r \"$gdrive_path/models/${src}${tgt}_transformer/\" /content/joeynmt/"],"execution_count":5,"outputs":[]},{"cell_type":"code","metadata":{"id":"UJJAq5rJ298H","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1628864098346,"user_tz":-120,"elapsed":2738,"user":{"displayName":"Thapelo Sindane","photoUrl":"","userId":"08710202319891453084"}},"outputId":"77962a11-414f-480a-bf7d-8c8342f460b1"},"source":["\n","# Test our model\n","! cd joeynmt; python3 -m joeynmt test \"$gdrive_path/models/${src}${tgt}_transformer/config.yaml\""],"execution_count":6,"outputs":[{"output_type":"stream","text":["2021-08-13 14:14:57,463 - INFO - root - Hello! This is Joey-NMT (version 1.3).\n","2021-08-13 14:14:57,466 - INFO - joeynmt.data - Building vocabulary...\n","2021-08-13 14:14:57,816 - INFO - joeynmt.data - Loading dev data...\n","Traceback (most recent call last):\n"," File \"/usr/lib/python3.7/runpy.py\", line 193, in _run_module_as_main\n"," \"__main__\", mod_spec)\n"," File \"/usr/lib/python3.7/runpy.py\", line 85, in _run_code\n"," exec(code, run_globals)\n"," File \"/content/joeynmt/joeynmt/__main__.py\", line 48, in <module>\n"," main()\n"," File \"/content/joeynmt/joeynmt/__main__.py\", line 38, in main\n"," output_path=args.output_path, save_attention=args.save_attention)\n"," File \"/content/joeynmt/joeynmt/prediction.py\", line 293, in test\n"," data_cfg=cfg[\"data\"], datasets=[\"dev\", \"test\"])\n"," File \"/content/joeynmt/joeynmt/data.py\", line 122, in load_data\n"," fields=(src_field, trg_field))\n"," File \"/usr/local/lib/python3.7/dist-packages/torchtext/legacy/datasets/translation.py\", line 34, in __init__\n"," with io.open(src_path, mode='r', encoding='utf-8') as src_file, \\\n","FileNotFoundError: [Errno 2] No such file or directory: 'data/frswc/dev.bpe.fr'\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"2PLtdcA58BLp"},"source":[""],"execution_count":null,"outputs":[]}]}
Loading