jax_cifar10_cnn_classification.py

# File name: jax_cifar10_cnn_classification.py

import jax
import jax.numpy as jnp
import numpy as np
import tensorflow_datasets as tfds

# Define the CNN model architecture
def jax_cnn_model(inputs):
    """
    Define a simple CNN model for CIFAR-10 classification.
    
    Parameters:
    inputs (jax.numpy.DeviceArray): Input data

    Returns:
    jax.numpy.DeviceArray: Output logits for each class
    """
    # Convolutional layer 1
    x = jax.lax.conv_general_dilated(inputs, jax.random.normal(jax.random.PRNGKey(0), (3, 3, 3, 32)), (1, 1), 'SAME')
    x = jax.nn.relu(x)
    x = jax.lax.avg_pool(x, (2, 2), (2, 2), 'VALID')

    # Convolutional layer 2
    x = jax.lax.conv_general_dilated(x, jax.random.normal(jax.random.PRNGKey(1), (3, 3, 32, 64)), (1, 1), 'SAME')
    x = jax.nn.relu(x)
    x = jax.lax.avg_pool(x, (2, 2), (2, 2), 'VALID')

    # Flatten the feature maps
    x = x.reshape((x.shape[0], -1))

    # Fully connected layer
    x = jax.nn.relu(jax.lax.dot_general(x, jax.random.normal(jax.random.PRNGKey(2), (x.shape[-1], 256)), (((1,), (0,)), ((), ()))))

    # Output layer
    x = jax.lax.dot_general(x, jax.random.normal(jax.random.PRNGKey(3), (256, 10)), (((1,), (0,)), ((), ())))
    return x

# Define the loss function
def jax_loss_fn(params, inputs, labels):
    """
    Compute the loss for the CNN model.
    
    Parameters:
    params (dict): Model parameters
    inputs (jax.numpy.DeviceArray): Input data
    labels (jax.numpy.DeviceArray): True labels

    Returns:
    jax.numpy.DeviceArray: Computed loss
    """
    logits = jax_cnn_model(inputs)
    one_hot_labels = jax.nn.one_hot(labels, num_classes=10)
    loss = jnp.mean(jax.nn.softmax_cross_entropy_with_logits(logits, one_hot_labels))
    return loss

# Define the accuracy metric
def jax_accuracy(params, inputs, labels):
    """
    Compute the accuracy of the CNN model.
    
    Parameters:
    params (dict): Model parameters
    inputs (jax.numpy.DeviceArray): Input data
    labels (jax.numpy.DeviceArray): True labels

    Returns:
    float: Computed accuracy
    """
    logits = jax_cnn_model(inputs)
    predictions = jnp.argmax(logits, axis=-1)
    return jnp.mean(predictions == labels)

# Load and preprocess the CIFAR-10 dataset
def jax_load_data():
    """
    Load and preprocess the CIFAR-10 dataset.
    
    Returns:
    tuple: Preprocessed training and test datasets
    """
    ds_train, ds_test = tfds.load('cifar10', split=['train', 'test'], as_supervised=True)
    ds_train = ds_train.map(lambda x, y: (x / 255.0, y)).batch(32).prefetch(1)
    ds_test = ds_test.map(lambda x, y: (x / 255.0, y)).batch(32).prefetch(1)
    return ds_train, ds_test

# Train the model
def jax_train(params, optimizer, ds_train, num_epochs):
    """
    Train the CNN model.
    
    Parameters:
    params (dict): Model parameters
    optimizer (jax.experimental.optimizers.Optimizer): Optimizer for training
    ds_train (tf.data.Dataset): Training dataset
    num_epochs (int): Number of epochs to train

    Returns:
    dict: Trained model parameters
    """
    for epoch in range(num_epochs):
        for batch in ds_train:
            inputs, labels = batch
            loss_value, grads = jax.value_and_grad(jax_loss_fn)(params, inputs, labels)
            params = optimizer.update(grads, params)
        print(f"Epoch {epoch + 1}, Loss: {loss_value:.4f}")
    return params

# Evaluate the model
def jax_evaluate(params, ds_test):
    """
    Evaluate the CNN model.
    
    Parameters:
    params (dict): Model parameters
    ds_test (tf.data.Dataset): Test dataset

    Returns:
    float: Computed test accuracy
    """
    accuracies = []
    for batch in ds_test:
        inputs, labels = batch
        acc = jax_accuracy(params, inputs, labels)
        accuracies.append(acc)
    return jnp.mean(jnp.array(accuracies))

# Main function
def main():
    """
    Main function to run the training and evaluation of the CNN model.
    """
    # Initialize model parameters
    rng = jax.random.PRNGKey(0)
    params = jax.random.normal(rng, (3, 32, 32, 32))

    # Initialize optimizer
    optimizer = jax.optim.Adam(learning_rate=0.001)

    # Load and preprocess data
    ds_train, ds_test = jax_load_data()

    # Train the model
    params = jax_train(params, optimizer, ds_train, num_epochs=10)

    # Evaluate the model
    accuracy = jax_evaluate(params, ds_test)
    print(f"Test Accuracy: {accuracy:.4f}")

if __name__ == "__main__":
    main()

# Possible Errors and Solutions:
# 1. ImportError: No module named 'tensorflow_datasets'.
#    Solution: Ensure you have TensorFlow Datasets installed. Use `pip install tensorflow-datasets`.

# 2. AttributeError: module 'jax' has no attribute 'lax'.
#    Solution: Ensure you have the correct version of JAX installed. Use `pip install --upgrade jax jaxlib`.

# 3. RuntimeError: CUDA out of memory.
#    Solution: Reduce the batch size or the model size to fit the available GPU memory.

# 4. TypeError: 'float' object is not iterable.
#    Solution: Check the data pipeline and ensure that the input data is batched correctly.

# 5. ValueError: operands could not be broadcast together with shapes.
#    Solution: Ensure that the shapes of the arrays involved in operations are compatible.