modify readme file

purveshpatel511 · May 5, 2020 · ceb1648 · ceb1648
1 parent 911dda8
commit ceb1648
Show file tree

Hide file tree

Showing 5 changed files with 242 additions and 9 deletions.
diff --git a/.ipynb_checkpoints/imageCaptioning-checkpoint.ipynb b/.ipynb_checkpoints/imageCaptioning-checkpoint.ipynb
@@ -0,0 +1,233 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "JDAgp_sTX_st"
+   },
+   "outputs": [],
+   "source": [
+    "######## Install Tensorflow version 1.13.2\n",
+    "!pip install tensorflow==1.13.2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "LH2Ikk20YiH_"
+   },
+   "outputs": [],
+   "source": [
+    "######## Official git repo\n",
+    "##\n",
+    "##     https://www.github.com/purvesh-linux11/imageCaptioning\n",
+    "##\n",
+    "##### repo is made private. import project files from drive.\n",
+    "\n",
+    "######## Copy Project from drive\n",
+    "!cp -r /driv/path /content/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ImportError",
+     "evalue": "cannot import name 'configuration' from 'im2txt' (/home/matrix11/.local/lib/python3.8/site-packages/im2txt/__init__.py)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-1-96434638bdaf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mim2txt\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mconfiguration\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mim2txt\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0minference_wrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mim2txt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minference_utils\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcaption_generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mim2txt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minference_utils\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mvocabulary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mImportError\u001b[0m: cannot import name 'configuration' from 'im2txt' (/home/matrix11/.local/lib/python3.8/site-packages/im2txt/__init__.py)"
+     ]
+    }
+   ],
+   "source": [
+    "from im2txt import configuration\n",
+    "from im2txt import inference_wrapper\n",
+    "from im2txt.inference_utils import caption_generator\n",
+    "from im2txt.inference_utils import vocabulary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "-ErYFuETaEGX"
+   },
+   "outputs": [],
+   "source": [
+    "from __future__ import absolute_import\n",
+    "from __future__ import division\n",
+    "from __future__ import print_function\n",
+    "\n",
+    "import math\n",
+    "import os\n",
+    "from os import listdir, path\n",
+    "\n",
+    "import tensorflow as tf\n",
+    "import cv2\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from IPython import display\n",
+    "\n",
+    "import PIL.Image\n",
+    "import PIL.ImageOps\n",
+    "import PIL.ImageFont\n",
+    "import PIL.ImageDraw\n",
+    "import textwrap\n",
+    "from io import BytesIO\n",
+    "\n",
+    "from im2txt import configuration\n",
+    "from im2txt import inference_wrapper\n",
+    "from im2txt.inference_utils import caption_generator\n",
+    "from im2txt.inference_utils import vocabulary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "fN-tgbJMbNHt"
+   },
+   "outputs": [],
+   "source": [
+    "# Choose the trained model --> current is 2\n",
+    "model_number = \"2\"\n",
+    "model_path = \"/content/model.ckpt-\"+model_number+\"000000\"   # Give model path\n",
+    "vocab_path = \"/content/word_counts\"+model_number+\".txt\"     # Give word_counts file path\n",
+    "tf.logging.set_verbosity(tf.logging.INFO)\n",
+    "\n",
+    "# Build the inference graph.\n",
+    "g = tf.Graph()\n",
+    "with g.as_default():\n",
+    "    model = inference_wrapper.InferenceWrapper()\n",
+    "    restore_fn = model.build_graph_from_config(configuration.ModelConfig(), model_path)\n",
+    "g.finalize()\n",
+    "\n",
+    "# Create the vocabulary.\n",
+    "vocab = vocabulary.Vocabulary(vocab_path) \n",
+    "\n",
+    "#######################################################\n",
+    "### if tensorflow version is <1.13.2 then you have to check variables name as per tensorflow version\n",
+    "\n",
+    "OLD_CHECKPOINT_FILE = \"/content/model.ckpt-2000000\"\n",
+    "NEW_CHECKPOINT_FILE = \"/content/model.ckpt-2000000\"\n",
+    "\n",
+    "import tensorflow as tf\n",
+    "vars_to_rename = {\n",
+    "    \"lstm/BasicLSTMCell/Linear/Matrix\": \"lstm/basic_lstm_cell/kernel\",\n",
+    "    \"lstm/BasicLSTMCell/Linear/Bias\": \"lstm/basic_lstm_cell/bias\",\n",
+    "}\n",
+    "new_checkpoint_vars = {}\n",
+    "reader = tf.train.NewCheckpointReader(OLD_CHECKPOINT_FILE)\n",
+    "for old_name in reader.get_variable_to_shape_map():\n",
+    "  if old_name in vars_to_rename:\n",
+    "    new_name = vars_to_rename[old_name]\n",
+    "  else:\n",
+    "    new_name = old_name\n",
+    "  new_checkpoint_vars[new_name] = tf.Variable(reader.get_tensor(old_name))\n",
+    "\n",
+    "init = tf.global_variables_initializer()\n",
+    "saver = tf.train.Saver(new_checkpoint_vars)\n",
+    "\n",
+    "with tf.Session() as sess:\n",
+    "  sess.run(init)\n",
+    "  saver.save(sess, NEW_CHECKPOINT_FILE)\n",
+    "\n",
+    "#######################################################\n",
+    "\n",
+    "sess = tf.Session(graph=g)\n",
+    "# Load the model from checkpoint.\n",
+    "restore_fn(sess)\n",
+    "\n",
+    "# Prepare the caption generator. Here we are implicitly using the default\n",
+    "# beam search parameters. See caption_generator.py for a description of the\n",
+    "# available beam search parameters.\n",
+    "generator = caption_generator.CaptionGenerator(model, vocab)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "DIZLzNZIbgU-"
+   },
+   "outputs": [],
+   "source": [
+    "image_path = \"./photos/\"    #### provide path where image is stored\n",
+    "filenames = listdir(image_path)\n",
+    "font = PIL.ImageFont.truetype(\"./fonts/Ubuntu-B.ttf\", 16)\n",
+    "\n",
+    "for file in filenames:\n",
+    "    try:\n",
+    "        print(image_path+file)\n",
+    "        img = PIL.Image.open(image_path+file).convert('RGBA')\n",
+    "        box = PIL.Image.new('RGBA', img.size, (255,255,255,0))\n",
+    "        draw = PIL.ImageDraw.Draw(box)\n",
+    "        image = open(image_path+file,'rb').read() # Read the image as bytes\n",
+    "        captions = generator.beam_search(sess, image)\n",
+    "        for i, caption in enumerate(captions):\n",
+    "            # Ignore begin and end words.\n",
+    "            sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]\n",
+    "            sentence = \" \".join(sentence)\n",
+    "            print(\"  %d) %s (p=%f)\" % (i, sentence, math.exp(caption.logprob)))\n",
+    "            if i==0:\n",
+    "                margin = offset = 0\n",
+    "                for line in textwrap.wrap(sentence, width=52):\n",
+    "                    offset_a = offset\n",
+    "                    offset += font.getsize(line)[1]\n",
+    "                    draw.rectangle(((0, offset_a), (img.size[0], offset)), fill=(0,0,0,128))\n",
+    "                    draw.text((margin, offset_a), line, (255,255,255),font=font)\n",
+    "                out = PIL.Image.alpha_composite(img, box)\n",
+    "                out.show()\n",
+    "                out.save(\"./results/model_\"+model_number+\"-image_\"+file) # Provide destination path to store captioned image. ### In colab, comment this.\n",
+    "    except KeyboardInterrupt:\n",
+    "        break"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "TPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "imageCaptioning.ipynb",
+   "private_outputs": true,
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/README.md b/README.md
@@ -17,12 +17,12 @@ Run ```imageCaptioning.ipynb``` file for generating caption.
 
 
 ### Example
-![dog.jpg](photos/dog.jpg)
+![plane.jpg](photos/plane.jpg)
 
 caption generated for this image is,
 ```
-./photos/dog.jpg
-0): a dog with a red collar is sitting on a couch .
-1): a dog with a red collar is sitting on the floor .
-2): a dog with a red collar is sitting on a bench .
+./photos/plane.jpeg
+0): a plane is flying in blue sky .
+1): a plane is flying in cloudy sky .
+2): a plane is flying in blue and cloudy sky .
 ``` 
diff --git a/photos/dog.jpg b/photos/dog.jpg
diff --git a/photos/plane.jpeg b/photos/plane.jpeg
diff --git a/results/logs.txt b/results/logs.txt
@@ -1,4 +1,4 @@
-./photos/dog.jpg
-0): a dog with a red collar is sitting on a couch .
-1): a dog with a red collar is sitting on the floor .
-2): a dog with a red collar is sitting on a bench .
+./photos/plane.jpeg
+0): a plane is flying in blue sky .
+1): a plane is flying in cloudy sky .
+2): a plane is flying in blue and cloudy sky .