ironhack-labs · martijnooo · Nov 4, 2025
diff --git a/reuters_mlflow.ipynb b/reuters_mlflow.ipynb
@@ -0,0 +1,308 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c24f79d3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading data...\n",
+      "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz\n",
+      "\u001b[1m2110848/2110848\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 0us/step\n",
+      "8982 train sequences\n",
+      "2246 test sequences\n",
+      "46 classes\n",
+      "Vectorizing sequence data...\n",
+      "x_train shape: (8982, 1000)\n",
+      "x_test shape: (2246, 1000)\n",
+      "Convert class vector to binary class matrix (for use with categorical_crossentropy)\n",
+      "y_train shape: (8982, 46)\n",
+      "y_test shape: (2246, 46)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from __future__ import print_function\n",
+    "\n",
+    "import numpy as np\n",
+    "import keras\n",
+    "from keras.datasets import reuters\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers import Dense, Dropout, Activation\n",
+    "from tensorflow.keras.preprocessing.text import Tokenizer\n",
+    "\n",
+    "import mlflow\n",
+    "\n",
+    "max_words = 1000\n",
+    "batch_size = 32\n",
+    "epochs = 5\n",
+    "\n",
+    "print('Loading data...')\n",
+    "(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,\n",
+    "                                                         test_split=0.2)\n",
+    "print(len(x_train), 'train sequences')\n",
+    "print(len(x_test), 'test sequences')\n",
+    "\n",
+    "num_classes = np.max(y_train) + 1\n",
+    "print(num_classes, 'classes')\n",
+    "\n",
+    "print('Vectorizing sequence data...')\n",
+    "tokenizer = Tokenizer(num_words=max_words)\n",
+    "x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')\n",
+    "x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')\n",
+    "print('x_train shape:', x_train.shape)\n",
+    "print('x_test shape:', x_test.shape)\n",
+    "\n",
+    "print('Convert class vector to binary class matrix '\n",
+    "      '(for use with categorical_crossentropy)')\n",
+    "y_train = keras.utils.to_categorical(y_train, num_classes)\n",
+    "y_test = keras.utils.to_categorical(y_test, num_classes)\n",
+    "print('y_train shape:', y_train.shape)\n",
+    "print('y_test shape:', y_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "39bf87b7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025/11/04 14:05:21 INFO mlflow.tracking.fluent: Experiment with name 'Iron' does not exist. Creating a new experiment.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<Experiment: artifact_location='mlflow-artifacts:/195699277622829892', creation_time=1762261521516, experiment_id='195699277622829892', last_update_time=1762261521516, lifecycle_stage='active', name='Iron', tags={}>"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mlflow.set_tracking_uri(\"http://127.0.0.1:5000\")\n",
+    "mlflow.set_experiment(\"Iron\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "4d8e57f0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025/11/04 14:05:22 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '108204e58f894339b74c6380b8125882', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Building model...\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/5\n",
+      "\u001b[1m248/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.5868 - loss: 1.8561"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 6ms/step - accuracy: 0.6798 - loss: 1.4193 - val_accuracy: 0.7597 - val_loss: 1.0717\n",
+      "Epoch 2/5\n",
+      "\u001b[1m248/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.8058 - loss: 0.8170"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - accuracy: 0.8116 - loss: 0.7831 - val_accuracy: 0.7976 - val_loss: 0.9072\n",
+      "Epoch 3/5\n",
+      "\u001b[1m252/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.8686 - loss: 0.5380"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - accuracy: 0.8659 - loss: 0.5439 - val_accuracy: 0.8076 - val_loss: 0.8660\n",
+      "Epoch 4/5\n",
+      "\u001b[1m242/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.9074 - loss: 0.4091"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - accuracy: 0.9000 - loss: 0.4229 - val_accuracy: 0.8120 - val_loss: 0.8558\n",
+      "Epoch 5/5\n",
+      "\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - accuracy: 0.9205 - loss: 0.3258 - val_accuracy: 0.8031 - val_loss: 0.9003\n",
+      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 49ms/step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025/11/04 14:05:29 WARNING mlflow.models.model: `artifact_path` is deprecated. Please use `name` instead.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "🏃 View run aged-sow-379 at: http://127.0.0.1:5000/#/experiments/195699277622829892/runs/108204e58f894339b74c6380b8125882\n",
+      "🧪 View experiment at: http://127.0.0.1:5000/#/experiments/195699277622829892\n",
+      "\u001b[1m71/71\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.7947 - loss: 0.8667\n",
+      "Test score: 0.8667305111885071\n",
+      "Test accuracy: 0.7947462201118469\n"
+     ]
+    }
+   ],
+   "source": [
+    "mlflow.tensorflow.autolog()\n",
+    "print('Building model...')\n",
+    "model = Sequential()\n",
+    "model.add(Dense(512, input_shape=(max_words,)))\n",
+    "model.add(Activation('relu'))\n",
+    "model.add(Dropout(0.5))\n",
+    "model.add(Dense(num_classes))\n",
+    "model.add(Activation('softmax'))\n",
+    "\n",
+    "model.compile(loss='categorical_crossentropy',\n",
+    "              optimizer='adam',\n",
+    "              metrics=['accuracy'])\n",
+    "\n",
+    "history = model.fit(x_train, y_train,\n",
+    "                    batch_size=batch_size,\n",
+    "                    epochs=epochs,\n",
+    "                    verbose=1,\n",
+    "                    validation_split=0.1)\n",
+    "score = model.evaluate(x_test, y_test,\n",
+    "                       batch_size=batch_size, verbose=1)\n",
+    "print('Test score:', score[0])\n",
+    "print('Test accuracy:', score[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "84e441c8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.01"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mlflow.log_param(\"batch_size\", batch_size)\n",
+    "mlflow.log_param(\"learning_rate\", 0.01)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "cb7f9454",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mlflow.log_metric(\"value\", score[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "75f8483b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mlflow.set_tag(\"project\", \"reuters_classification\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv (3.12.7)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}