From fc6c6496e114dd36ed875afa9536f0bca75d605d Mon Sep 17 00:00:00 2001 From: martijnooo Date: Tue, 4 Nov 2025 14:13:50 +0100 Subject: [PATCH] exercise solved --- reuters_mlflow.ipynb | 308 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 308 insertions(+) create mode 100644 reuters_mlflow.ipynb diff --git a/reuters_mlflow.ipynb b/reuters_mlflow.ipynb new file mode 100644 index 0000000..ccf751f --- /dev/null +++ b/reuters_mlflow.ipynb @@ -0,0 +1,308 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "id": "c24f79d3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading data...\n", + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz\n", + "\u001b[1m2110848/2110848\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 0us/step\n", + "8982 train sequences\n", + "2246 test sequences\n", + "46 classes\n", + "Vectorizing sequence data...\n", + "x_train shape: (8982, 1000)\n", + "x_test shape: (2246, 1000)\n", + "Convert class vector to binary class matrix (for use with categorical_crossentropy)\n", + "y_train shape: (8982, 46)\n", + "y_test shape: (2246, 46)\n" + ] + } + ], + "source": [ + "from __future__ import print_function\n", + "\n", + "import numpy as np\n", + "import keras\n", + "from keras.datasets import reuters\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense, Dropout, Activation\n", + "from tensorflow.keras.preprocessing.text import Tokenizer\n", + "\n", + "import mlflow\n", + "\n", + "max_words = 1000\n", + "batch_size = 32\n", + "epochs = 5\n", + "\n", + "print('Loading data...')\n", + "(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,\n", + " test_split=0.2)\n", + "print(len(x_train), 'train sequences')\n", + "print(len(x_test), 'test sequences')\n", + "\n", + "num_classes = np.max(y_train) + 1\n", + "print(num_classes, 'classes')\n", + "\n", + "print('Vectorizing sequence data...')\n", + "tokenizer = Tokenizer(num_words=max_words)\n", + "x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')\n", + "x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')\n", + "print('x_train shape:', x_train.shape)\n", + "print('x_test shape:', x_test.shape)\n", + "\n", + "print('Convert class vector to binary class matrix '\n", + " '(for use with categorical_crossentropy)')\n", + "y_train = keras.utils.to_categorical(y_train, num_classes)\n", + "y_test = keras.utils.to_categorical(y_test, num_classes)\n", + "print('y_train shape:', y_train.shape)\n", + "print('y_test shape:', y_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "39bf87b7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/11/04 14:05:21 INFO mlflow.tracking.fluent: Experiment with name 'Iron' does not exist. Creating a new experiment.\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mlflow.set_tracking_uri(\"http://127.0.0.1:5000\")\n", + "mlflow.set_experiment(\"Iron\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4d8e57f0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025/11/04 14:05:22 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '108204e58f894339b74c6380b8125882', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Building model...\n" + ] + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/5\n",
+      "\u001b[1m248/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.5868 - loss: 1.8561"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 6ms/step - accuracy: 0.6798 - loss: 1.4193 - val_accuracy: 0.7597 - val_loss: 1.0717\n",
+      "Epoch 2/5\n",
+      "\u001b[1m248/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.8058 - loss: 0.8170"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - accuracy: 0.8116 - loss: 0.7831 - val_accuracy: 0.7976 - val_loss: 0.9072\n",
+      "Epoch 3/5\n",
+      "\u001b[1m252/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.8686 - loss: 0.5380"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - accuracy: 0.8659 - loss: 0.5439 - val_accuracy: 0.8076 - val_loss: 0.8660\n",
+      "Epoch 4/5\n",
+      "\u001b[1m242/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.9074 - loss: 0.4091"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - accuracy: 0.9000 - loss: 0.4229 - val_accuracy: 0.8120 - val_loss: 0.8558\n",
+      "Epoch 5/5\n",
+      "\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - accuracy: 0.9205 - loss: 0.3258 - val_accuracy: 0.8031 - val_loss: 0.9003\n",
+      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 49ms/step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025/11/04 14:05:29 WARNING mlflow.models.model: `artifact_path` is deprecated. Please use `name` instead.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "🏃 View run aged-sow-379 at: http://127.0.0.1:5000/#/experiments/195699277622829892/runs/108204e58f894339b74c6380b8125882\n",
+      "🧪 View experiment at: http://127.0.0.1:5000/#/experiments/195699277622829892\n",
+      "\u001b[1m71/71\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.7947 - loss: 0.8667\n",
+      "Test score: 0.8667305111885071\n",
+      "Test accuracy: 0.7947462201118469\n"
+     ]
+    }
+   ],
+   "source": [
+    "mlflow.tensorflow.autolog()\n",
+    "print('Building model...')\n",
+    "model = Sequential()\n",
+    "model.add(Dense(512, input_shape=(max_words,)))\n",
+    "model.add(Activation('relu'))\n",
+    "model.add(Dropout(0.5))\n",
+    "model.add(Dense(num_classes))\n",
+    "model.add(Activation('softmax'))\n",
+    "\n",
+    "model.compile(loss='categorical_crossentropy',\n",
+    "              optimizer='adam',\n",
+    "              metrics=['accuracy'])\n",
+    "\n",
+    "history = model.fit(x_train, y_train,\n",
+    "                    batch_size=batch_size,\n",
+    "                    epochs=epochs,\n",
+    "                    verbose=1,\n",
+    "                    validation_split=0.1)\n",
+    "score = model.evaluate(x_test, y_test,\n",
+    "                       batch_size=batch_size, verbose=1)\n",
+    "print('Test score:', score[0])\n",
+    "print('Test accuracy:', score[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "84e441c8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.01"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mlflow.log_param(\"batch_size\", batch_size)\n",
+    "mlflow.log_param(\"learning_rate\", 0.01)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "cb7f9454",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mlflow.log_metric(\"value\", score[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "75f8483b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mlflow.set_tag(\"project\", \"reuters_classification\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv (3.12.7)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}