Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
308 changes: 308 additions & 0 deletions reuters_mlflow.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"id": "c24f79d3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading data...\n",
"Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz\n",
"\u001b[1m2110848/2110848\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 0us/step\n",
"8982 train sequences\n",
"2246 test sequences\n",
"46 classes\n",
"Vectorizing sequence data...\n",
"x_train shape: (8982, 1000)\n",
"x_test shape: (2246, 1000)\n",
"Convert class vector to binary class matrix (for use with categorical_crossentropy)\n",
"y_train shape: (8982, 46)\n",
"y_test shape: (2246, 46)\n"
]
}
],
"source": [
"from __future__ import print_function\n",
"\n",
"import numpy as np\n",
"import keras\n",
"from keras.datasets import reuters\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense, Dropout, Activation\n",
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
"\n",
"import mlflow\n",
"\n",
"max_words = 1000\n",
"batch_size = 32\n",
"epochs = 5\n",
"\n",
"print('Loading data...')\n",
"(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,\n",
" test_split=0.2)\n",
"print(len(x_train), 'train sequences')\n",
"print(len(x_test), 'test sequences')\n",
"\n",
"num_classes = np.max(y_train) + 1\n",
"print(num_classes, 'classes')\n",
"\n",
"print('Vectorizing sequence data...')\n",
"tokenizer = Tokenizer(num_words=max_words)\n",
"x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')\n",
"x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')\n",
"print('x_train shape:', x_train.shape)\n",
"print('x_test shape:', x_test.shape)\n",
"\n",
"print('Convert class vector to binary class matrix '\n",
" '(for use with categorical_crossentropy)')\n",
"y_train = keras.utils.to_categorical(y_train, num_classes)\n",
"y_test = keras.utils.to_categorical(y_test, num_classes)\n",
"print('y_train shape:', y_train.shape)\n",
"print('y_test shape:', y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "39bf87b7",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025/11/04 14:05:21 INFO mlflow.tracking.fluent: Experiment with name 'Iron' does not exist. Creating a new experiment.\n"
]
},
{
"data": {
"text/plain": [
"<Experiment: artifact_location='mlflow-artifacts:/195699277622829892', creation_time=1762261521516, experiment_id='195699277622829892', last_update_time=1762261521516, lifecycle_stage='active', name='Iron', tags={}>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mlflow.set_tracking_uri(\"http://127.0.0.1:5000\")\n",
"mlflow.set_experiment(\"Iron\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "4d8e57f0",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025/11/04 14:05:22 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '108204e58f894339b74c6380b8125882', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Building model...\n"
]
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
],
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/5\n",
"\u001b[1m248/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.5868 - loss: 1.8561"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 6ms/step - accuracy: 0.6798 - loss: 1.4193 - val_accuracy: 0.7597 - val_loss: 1.0717\n",
"Epoch 2/5\n",
"\u001b[1m248/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.8058 - loss: 0.8170"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - accuracy: 0.8116 - loss: 0.7831 - val_accuracy: 0.7976 - val_loss: 0.9072\n",
"Epoch 3/5\n",
"\u001b[1m252/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.8686 - loss: 0.5380"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - accuracy: 0.8659 - loss: 0.5439 - val_accuracy: 0.8076 - val_loss: 0.8660\n",
"Epoch 4/5\n",
"\u001b[1m242/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.9074 - loss: 0.4091"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - accuracy: 0.9000 - loss: 0.4229 - val_accuracy: 0.8120 - val_loss: 0.8558\n",
"Epoch 5/5\n",
"\u001b[1m253/253\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - accuracy: 0.9205 - loss: 0.3258 - val_accuracy: 0.8031 - val_loss: 0.9003\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 49ms/step\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025/11/04 14:05:29 WARNING mlflow.models.model: `artifact_path` is deprecated. Please use `name` instead.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"🏃 View run aged-sow-379 at: http://127.0.0.1:5000/#/experiments/195699277622829892/runs/108204e58f894339b74c6380b8125882\n",
"🧪 View experiment at: http://127.0.0.1:5000/#/experiments/195699277622829892\n",
"\u001b[1m71/71\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - accuracy: 0.7947 - loss: 0.8667\n",
"Test score: 0.8667305111885071\n",
"Test accuracy: 0.7947462201118469\n"
]
}
],
"source": [
"mlflow.tensorflow.autolog()\n",
"print('Building model...')\n",
"model = Sequential()\n",
"model.add(Dense(512, input_shape=(max_words,)))\n",
"model.add(Activation('relu'))\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(num_classes))\n",
"model.add(Activation('softmax'))\n",
"\n",
"model.compile(loss='categorical_crossentropy',\n",
" optimizer='adam',\n",
" metrics=['accuracy'])\n",
"\n",
"history = model.fit(x_train, y_train,\n",
" batch_size=batch_size,\n",
" epochs=epochs,\n",
" verbose=1,\n",
" validation_split=0.1)\n",
"score = model.evaluate(x_test, y_test,\n",
" batch_size=batch_size, verbose=1)\n",
"print('Test score:', score[0])\n",
"print('Test accuracy:', score[1])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "84e441c8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.01"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mlflow.log_param(\"batch_size\", batch_size)\n",
"mlflow.log_param(\"learning_rate\", 0.01)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "cb7f9454",
"metadata": {},
"outputs": [],
"source": [
"mlflow.log_metric(\"value\", score[1])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "75f8483b",
"metadata": {},
"outputs": [],
"source": [
"mlflow.set_tag(\"project\", \"reuters_classification\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv (3.12.7)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}