ironhack-labs · Aminmoh9 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+# Ignore MLflow SQLite database
+data/mlflow.db
diff --git a/data/reuters_mlflow.py b/data/reuters_mlflow.py
@@ -0,0 +1,95 @@
+'''Train and evaluate a simple MLP on the Reuters newswire topic classification task 
+with MLflow tracking'''
+
+from __future__ import print_function
+
+
+import numpy as np
+import keras
+from keras.datasets import reuters
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Activation
+from tensorflow.keras.preprocessing.text import Tokenizer
+import mlflow
+
+mlflow.set_tracking_uri("http://127.0.0.1:5000")
+
+print(f"Tracking URI: {mlflow.get_tracking_uri()}")
+
+import mlflow.tensorflow
+
+import warnings
+warnings.filterwarnings('ignore')
+
+#parameters
+max_words = 4000
+batch_size = 128
+epochs = 20
+learning_rate = 0.001 # Define learning rate for logging
+
+print('Loading data...')
+(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,
+                                                         test_split=0.2)
+print(len(x_train), 'train sequences')
+print(len(x_test), 'test sequences')
+
+num_classes = np.max(y_train) + 1
+print(num_classes, 'classes')
+
+# Set experiment name
+mlflow.set_experiment("reuters_classification")
+
+with mlflow.start_run():
+    # Manual logging of parameters
+    mlflow.log_param('learning_rate', learning_rate)
+    mlflow.log_param('batch_size', batch_size)
+    mlflow.log_param('epochs', epochs)
+    mlflow.log_param('max_words', max_words)
+    mlflow.set_tag('project', 'reuters_classification')
+
+    print('Vectorizing sequence data...')
+    tokenizer = Tokenizer(num_words=max_words)
+    x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
+    x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
+    print('x_train shape:', x_train.shape)
+    print('x_test shape:', x_test.shape)
+
+    print('Convert class vector to binary class matrix '
+          '(for use with categorical_crossentropy)')
+    y_train = keras.utils.to_categorical(y_train, num_classes)
+    y_test = keras.utils.to_categorical(y_test, num_classes)
+    print('y_train shape:', y_train.shape)
+    print('y_test shape:', y_test.shape)
+
+    print('Building model...')
+    model = Sequential()
+    model.add(Dense(256, input_shape=(max_words,)))   
+    model.add(Activation('relu'))
+    model.add(Dropout(0.5))
+    model.add(Dense(128))                            
+    model.add(Activation('relu'))
+    model.add(Dropout(0.5))
+    model.add(Dense(num_classes))
+    model.add(Activation('softmax'))
+
+    # Enable Mlflow autologging before compiling
+    mlflow.tensorflow.autolog()
+
+    model.compile(loss='categorical_crossentropy',
+                  optimizer='adam',
+                  metrics=['accuracy'])
+
+    history = model.fit(x_train, y_train,
+                        batch_size=batch_size,
+                        epochs=epochs,
+                        verbose=1,
+                        validation_split=0.1)
+    score = model.evaluate(x_test, y_test,
+                           batch_size=batch_size, verbose=1)
+
+    # Manual logging of final metrics
+    mlflow.log_metric('final_test_loss', score[0])
+    mlflow.log_metric('final_test_accuracy', score[1])
+
+    print('Test score:', score[0])
+    print('Test accuracy:', score[1])
diff --git a/screenshots/best_model.png b/screenshots/best_model.png
diff --git a/screenshots/experiment.png b/screenshots/experiment.png
diff --git a/screenshots/mterics_comparison.png b/screenshots/mterics_comparison.png
diff --git a/screenshots/runs.png b/screenshots/runs.png
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Ignore MLflow SQLite database
		data/mlflow.db