diff --git a/notebooks/EnglishTTS.ipynb b/notebooks/EnglishTTS.ipynb index 0adba40..7133018 100644 --- a/notebooks/EnglishTTS.ipynb +++ b/notebooks/EnglishTTS.ipynb @@ -16,11 +16,11 @@ }, "cells": [ { + "cell_type": "markdown", "metadata": { "id": "YmNSTzuGFiDj", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "# English Text To Speech\n", "\n", @@ -35,11 +35,11 @@ ] }, { + "cell_type": "markdown", "metadata": { "id": "N4vqXzZI2fro", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "## Setup\n", "\n", @@ -47,56 +47,42 @@ ] }, { + "cell_type": "code", "metadata": { "id": "G4-CKFr22uBB", "colab_type": "code", "colab": {} }, - "cell_type": "code", "source": [ "import os\n", "from os.path import exists, join, expanduser\n", "\n", "project_name = \"pytorch-dc-tts\"\n", "if not exists(project_name):\n", - " ! git clone --quiet https://github.com/tugstugi/$project_name" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "DXTz7S0H3K9r", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "! cd /content/$project_name && pip install -q -r requirements.txt" + " ! git clone --quiet https://github.com/tugstugi/{project_name}\n", + " ! cd {project_name} && pip install -q -r requirements.txt" ], "execution_count": 0, "outputs": [] }, { + "cell_type": "markdown", "metadata": { "id": "n5XKzALS3P1r", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "### Download pretrained models" ] }, { + "cell_type": "code", "metadata": { "id": "Dc5b0-HV3eh7", "colab_type": "code", "colab": {} }, - "cell_type": "code", "source": [ - "! cd /content/$project_name\n", - "\n", "# download text2mel\n", "if not exists(\"ljspeech-text2mel.pth\"):\n", " ! wget -q -O ljspeech-text2mel.pth https://www.dropbox.com/s/4t13ugxzzgnocbj/step-300K.pth\n", @@ -109,11 +95,11 @@ "outputs": [] }, { + "cell_type": "markdown", "metadata": { "id": "TiZA7qgn7kCj", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "## Synthesize\n", "\n", @@ -121,15 +107,15 @@ ] }, { + "cell_type": "code", "metadata": { "id": "LjqMQn2y6j58", "colab_type": "code", "colab": {} }, - "cell_type": "code", "source": [ "import sys\n", - "sys.path.append(join(\"/content\", project_name))\n", + "sys.path.append(project_name)\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\") # ignore warnings in this notebook\n", @@ -150,26 +136,30 @@ "outputs": [] }, { + "cell_type": "code", "metadata": { "id": "eVxfJD6I7yca", "colab_type": "code", "colab": {} }, - "cell_type": "code", "source": [ "torch.set_grad_enabled(False)\n", - "text2mel = torch.load(\"ljspeech-text2mel.pth\").eval()\n", - "ssrn = torch.load(\"ljspeech-ssrn.pth\").eval()" + "text2mel = Text2Mel(vocab)\n", + "text2mel.load_state_dict(torch.load(\"ljspeech-text2mel.pth\").state_dict())\n", + "text2mel = text2mel.eval()\n", + "ssrn = SSRN()\n", + "ssrn.load_state_dict(torch.load(\"ljspeech-ssrn.pth\").state_dict())\n", + "ssrn = ssrn.eval()" ], "execution_count": 0, "outputs": [] }, { + "cell_type": "markdown", "metadata": { "id": "h_VB9q8q_Wq1", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "### Allowed characters\n", "\n", @@ -179,12 +169,12 @@ ] }, { + "cell_type": "code", "metadata": { "id": "GtgxZbfG_DgM", "colab_type": "code", "colab": {} }, - "cell_type": "code", "source": [ "SENTENCES = [\n", " \"The birch canoe slid on the smooth planks.\",\n", @@ -213,26 +203,26 @@ "outputs": [] }, { + "cell_type": "markdown", "metadata": { "id": "yT7GG7OA_0Tp", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "### Synthetize on CPU" ] }, { + "cell_type": "code", "metadata": { "id": "jLU2p4Gq_12d", "colab_type": "code", - "outputId": "b8ec835c-70c8-41f8-9c50-680870836daf", + "outputId": "67c50529-6922-4258-9cbd-1717c737e575", "colab": { "base_uri": "https://localhost:8080/", - "height": 1517 + "height": 1550 } }, - "cell_type": "code", "source": [ "# synthetize by one by one because there is a batch processing bug!\n", "for i in range(len(SENTENCES)):\n", @@ -276,7 +266,7 @@ "text/html": [ "\n", " \n", " " @@ -302,7 +292,7 @@ "text/html": [ "\n", " \n", " " @@ -328,7 +318,7 @@ "text/html": [ "\n", " \n", " " @@ -354,7 +344,7 @@ "text/html": [ "\n", " \n", " " @@ -380,7 +370,7 @@ "text/html": [ "\n", " \n", " " @@ -406,7 +396,7 @@ "text/html": [ "\n", " \n", " " @@ -432,7 +422,7 @@ "text/html": [ "\n", " \n", " " @@ -458,7 +448,7 @@ "text/html": [ "\n", " \n", " " @@ -484,7 +474,7 @@ "text/html": [ "\n", " \n", " " @@ -510,7 +500,7 @@ "text/html": [ "\n", " \n", " " @@ -536,7 +526,7 @@ "text/html": [ "\n", " \n", " " @@ -562,7 +552,7 @@ "text/html": [ "\n", " \n", " " @@ -588,7 +578,7 @@ "text/html": [ "\n", " \n", " " @@ -614,7 +604,7 @@ "text/html": [ "\n", " \n", " " @@ -640,7 +630,7 @@ "text/html": [ "\n", " \n", " " @@ -666,7 +656,7 @@ "text/html": [ "\n", " \n", " " @@ -692,7 +682,7 @@ "text/html": [ "\n", " \n", " " @@ -718,7 +708,7 @@ "text/html": [ "\n", " \n", " " @@ -744,7 +734,7 @@ "text/html": [ "\n", " \n", " " @@ -770,7 +760,7 @@ "text/html": [ "\n", " \n", " " diff --git a/notebooks/MongolianTTS.ipynb b/notebooks/MongolianTTS.ipynb index ebd0868..c260ca2 100644 --- a/notebooks/MongolianTTS.ipynb +++ b/notebooks/MongolianTTS.ipynb @@ -5,7 +5,8 @@ "colab": { "name": "MongolianTTS.ipynb", "version": "0.3.2", - "provenance": [] + "provenance": [], + "collapsed_sections": [] }, "kernelspec": { "name": "python3", @@ -15,11 +16,11 @@ }, "cells": [ { + "cell_type": "markdown", "metadata": { "id": "YmNSTzuGFiDj", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "# Mongolian Text To Speech\n", "\n", @@ -34,11 +35,11 @@ ] }, { + "cell_type": "markdown", "metadata": { "id": "N4vqXzZI2fro", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "## Setup\n", "\n", @@ -46,56 +47,42 @@ ] }, { + "cell_type": "code", "metadata": { "id": "G4-CKFr22uBB", "colab_type": "code", "colab": {} }, - "cell_type": "code", "source": [ "import os\n", "from os.path import exists, join, expanduser\n", "\n", "project_name = \"pytorch-dc-tts\"\n", "if not exists(project_name):\n", - " ! git clone --quiet https://github.com/tugstugi/$project_name" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "DXTz7S0H3K9r", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "! cd /content/$project_name && pip install -q -r requirements.txt" + " ! git clone --quiet https://github.com/tugstugi/{project_name}\n", + " ! cd {project_name} && pip install -q -r requirements.txt" ], "execution_count": 0, "outputs": [] }, { + "cell_type": "markdown", "metadata": { "id": "n5XKzALS3P1r", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "### Download pretrained models" ] }, { + "cell_type": "code", "metadata": { "id": "Dc5b0-HV3eh7", "colab_type": "code", "colab": {} }, - "cell_type": "code", "source": [ - "! cd /content/$project_name\n", - "\n", "# download text2mel\n", "if not exists(\"mbspeech-text2mel.pth\"):\n", " ! wget -q -O mbspeech-text2mel.pth https://www.dropbox.com/s/wu26k6tu5hz8hq1/step-200K.pth\n", @@ -108,11 +95,11 @@ "outputs": [] }, { + "cell_type": "markdown", "metadata": { "id": "TiZA7qgn7kCj", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "## Synthesize\n", "\n", @@ -120,15 +107,15 @@ ] }, { + "cell_type": "code", "metadata": { "id": "LjqMQn2y6j58", "colab_type": "code", "colab": {} }, - "cell_type": "code", "source": [ "import sys\n", - "sys.path.append(join(\"/content\", project_name))\n", + "sys.path.append(project_name)\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\") # ignore warnings in this notebook\n", @@ -149,26 +136,30 @@ "outputs": [] }, { + "cell_type": "code", "metadata": { "id": "eVxfJD6I7yca", "colab_type": "code", "colab": {} }, - "cell_type": "code", "source": [ "torch.set_grad_enabled(False)\n", - "text2mel = torch.load(\"mbspeech-text2mel.pth\").eval()\n", - "ssrn = torch.load(\"mbspeech-ssrn.pth\").eval()" + "text2mel = Text2Mel(vocab)\n", + "text2mel.load_state_dict(torch.load(\"mbspeech-text2mel.pth\").state_dict())\n", + "text2mel = text2mel.eval()\n", + "ssrn = SSRN()\n", + "ssrn.load_state_dict(torch.load(\"mbspeech-ssrn.pth\").state_dict())\n", + "ssrn = ssrn.eval()" ], "execution_count": 0, "outputs": [] }, { + "cell_type": "markdown", "metadata": { "id": "h_VB9q8q_Wq1", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "### Allowed characters\n", "\n", @@ -178,12 +169,12 @@ ] }, { + "cell_type": "code", "metadata": { "id": "GtgxZbfG_DgM", "colab_type": "code", "colab": {} }, - "cell_type": "code", "source": [ "SENTENCES = [\n", " \"Хэнтий, Хангай, Соёны өндөр сайхан нуруунууд. Хойд зүгийн чимэг болсон ой хөвч уулнууд.\",\n", @@ -195,26 +186,26 @@ "outputs": [] }, { + "cell_type": "markdown", "metadata": { "id": "yT7GG7OA_0Tp", "colab_type": "text" }, - "cell_type": "markdown", "source": [ "### Synthetize on CPU" ] }, { + "cell_type": "code", "metadata": { "id": "jLU2p4Gq_12d", "colab_type": "code", - "outputId": "1c102d58-90d5-4593-e71d-d02554035d17", + "outputId": "4af92580-0186-46a1-d5f6-aecd4ab846f1", "colab": { "base_uri": "https://localhost:8080/", - "height": 242 + "height": 246 } }, - "cell_type": "code", "source": [ "# synthetize by one by one because there is a batch processing bug!\n", "for i in range(len(SENTENCES)):\n", @@ -258,7 +249,7 @@ "text/html": [ "\n", " \n", " " @@ -284,7 +275,7 @@ "text/html": [ "\n", " \n", " " @@ -310,7 +301,7 @@ "text/html": [ "\n", " \n", " "