Skip to content

Commit

Permalink
modify readme file
Browse files Browse the repository at this point in the history
  • Loading branch information
purveshpatel511 committed May 5, 2020
1 parent 911dda8 commit ceb1648
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 9 deletions.
233 changes: 233 additions & 0 deletions .ipynb_checkpoints/imageCaptioning-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "JDAgp_sTX_st"
},
"outputs": [],
"source": [
"######## Install Tensorflow version 1.13.2\n",
"!pip install tensorflow==1.13.2"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "LH2Ikk20YiH_"
},
"outputs": [],
"source": [
"######## Official git repo\n",
"##\n",
"## https://www.github.com/purvesh-linux11/imageCaptioning\n",
"##\n",
"##### repo is made private. import project files from drive.\n",
"\n",
"######## Copy Project from drive\n",
"!cp -r /driv/path /content/"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "cannot import name 'configuration' from 'im2txt' (/home/matrix11/.local/lib/python3.8/site-packages/im2txt/__init__.py)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-1-96434638bdaf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mim2txt\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mconfiguration\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mim2txt\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0minference_wrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mim2txt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minference_utils\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcaption_generator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mim2txt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minference_utils\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mvocabulary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mImportError\u001b[0m: cannot import name 'configuration' from 'im2txt' (/home/matrix11/.local/lib/python3.8/site-packages/im2txt/__init__.py)"
]
}
],
"source": [
"from im2txt import configuration\n",
"from im2txt import inference_wrapper\n",
"from im2txt.inference_utils import caption_generator\n",
"from im2txt.inference_utils import vocabulary"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "-ErYFuETaEGX"
},
"outputs": [],
"source": [
"from __future__ import absolute_import\n",
"from __future__ import division\n",
"from __future__ import print_function\n",
"\n",
"import math\n",
"import os\n",
"from os import listdir, path\n",
"\n",
"import tensorflow as tf\n",
"import cv2\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from IPython import display\n",
"\n",
"import PIL.Image\n",
"import PIL.ImageOps\n",
"import PIL.ImageFont\n",
"import PIL.ImageDraw\n",
"import textwrap\n",
"from io import BytesIO\n",
"\n",
"from im2txt import configuration\n",
"from im2txt import inference_wrapper\n",
"from im2txt.inference_utils import caption_generator\n",
"from im2txt.inference_utils import vocabulary"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "fN-tgbJMbNHt"
},
"outputs": [],
"source": [
"# Choose the trained model --> current is 2\n",
"model_number = \"2\"\n",
"model_path = \"/content/model.ckpt-\"+model_number+\"000000\" # Give model path\n",
"vocab_path = \"/content/word_counts\"+model_number+\".txt\" # Give word_counts file path\n",
"tf.logging.set_verbosity(tf.logging.INFO)\n",
"\n",
"# Build the inference graph.\n",
"g = tf.Graph()\n",
"with g.as_default():\n",
" model = inference_wrapper.InferenceWrapper()\n",
" restore_fn = model.build_graph_from_config(configuration.ModelConfig(), model_path)\n",
"g.finalize()\n",
"\n",
"# Create the vocabulary.\n",
"vocab = vocabulary.Vocabulary(vocab_path) \n",
"\n",
"#######################################################\n",
"### if tensorflow version is <1.13.2 then you have to check variables name as per tensorflow version\n",
"\n",
"OLD_CHECKPOINT_FILE = \"/content/model.ckpt-2000000\"\n",
"NEW_CHECKPOINT_FILE = \"/content/model.ckpt-2000000\"\n",
"\n",
"import tensorflow as tf\n",
"vars_to_rename = {\n",
" \"lstm/BasicLSTMCell/Linear/Matrix\": \"lstm/basic_lstm_cell/kernel\",\n",
" \"lstm/BasicLSTMCell/Linear/Bias\": \"lstm/basic_lstm_cell/bias\",\n",
"}\n",
"new_checkpoint_vars = {}\n",
"reader = tf.train.NewCheckpointReader(OLD_CHECKPOINT_FILE)\n",
"for old_name in reader.get_variable_to_shape_map():\n",
" if old_name in vars_to_rename:\n",
" new_name = vars_to_rename[old_name]\n",
" else:\n",
" new_name = old_name\n",
" new_checkpoint_vars[new_name] = tf.Variable(reader.get_tensor(old_name))\n",
"\n",
"init = tf.global_variables_initializer()\n",
"saver = tf.train.Saver(new_checkpoint_vars)\n",
"\n",
"with tf.Session() as sess:\n",
" sess.run(init)\n",
" saver.save(sess, NEW_CHECKPOINT_FILE)\n",
"\n",
"#######################################################\n",
"\n",
"sess = tf.Session(graph=g)\n",
"# Load the model from checkpoint.\n",
"restore_fn(sess)\n",
"\n",
"# Prepare the caption generator. Here we are implicitly using the default\n",
"# beam search parameters. See caption_generator.py for a description of the\n",
"# available beam search parameters.\n",
"generator = caption_generator.CaptionGenerator(model, vocab)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "DIZLzNZIbgU-"
},
"outputs": [],
"source": [
"image_path = \"./photos/\" #### provide path where image is stored\n",
"filenames = listdir(image_path)\n",
"font = PIL.ImageFont.truetype(\"./fonts/Ubuntu-B.ttf\", 16)\n",
"\n",
"for file in filenames:\n",
" try:\n",
" print(image_path+file)\n",
" img = PIL.Image.open(image_path+file).convert('RGBA')\n",
" box = PIL.Image.new('RGBA', img.size, (255,255,255,0))\n",
" draw = PIL.ImageDraw.Draw(box)\n",
" image = open(image_path+file,'rb').read() # Read the image as bytes\n",
" captions = generator.beam_search(sess, image)\n",
" for i, caption in enumerate(captions):\n",
" # Ignore begin and end words.\n",
" sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]\n",
" sentence = \" \".join(sentence)\n",
" print(\" %d) %s (p=%f)\" % (i, sentence, math.exp(caption.logprob)))\n",
" if i==0:\n",
" margin = offset = 0\n",
" for line in textwrap.wrap(sentence, width=52):\n",
" offset_a = offset\n",
" offset += font.getsize(line)[1]\n",
" draw.rectangle(((0, offset_a), (img.size[0], offset)), fill=(0,0,0,128))\n",
" draw.text((margin, offset_a), line, (255,255,255),font=font)\n",
" out = PIL.Image.alpha_composite(img, box)\n",
" out.show()\n",
" out.save(\"./results/model_\"+model_number+\"-image_\"+file) # Provide destination path to store captioned image. ### In colab, comment this.\n",
" except KeyboardInterrupt:\n",
" break"
]
}
],
"metadata": {
"accelerator": "TPU",
"colab": {
"collapsed_sections": [],
"name": "imageCaptioning.ipynb",
"private_outputs": true,
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ Run ```imageCaptioning.ipynb``` file for generating caption.


### Example
![dog.jpg](photos/dog.jpg)
![plane.jpg](photos/plane.jpg)

caption generated for this image is,
```
./photos/dog.jpg
0): a dog with a red collar is sitting on a couch .
1): a dog with a red collar is sitting on the floor .
2): a dog with a red collar is sitting on a bench .
./photos/plane.jpeg
0): a plane is flying in blue sky .
1): a plane is flying in cloudy sky .
2): a plane is flying in blue and cloudy sky .
```
Binary file removed photos/dog.jpg
Binary file not shown.
Binary file added photos/plane.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 4 additions & 4 deletions results/logs.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
./photos/dog.jpg
0): a dog with a red collar is sitting on a couch .
1): a dog with a red collar is sitting on the floor .
2): a dog with a red collar is sitting on a bench .
./photos/plane.jpeg
0): a plane is flying in blue sky .
1): a plane is flying in cloudy sky .
2): a plane is flying in blue and cloudy sky .

0 comments on commit ceb1648

Please sign in to comment.