# ====================================
# Common imports used across classification models
# ====================================

import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    ConfusionMatrixDisplay
)

from pathlib import Path
import matplotlib.pyplot as plt
import joblib

# ====================================
# TensorFlow / Keras imports
# ====================================

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# ====================================
# Dataset loading
# ====================================

data = load_breast_cancer(as_frame=True)

X = data.data
y = data.target

# ====================================
# Train-test split
# ====================================

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

# ====================================
# Feature scaling
# ====================================

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ====================================
# Model definition (Keras)
# ====================================

model = keras.Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(64, activation="relu"),
    layers.Dense(32, activation="relu"),
    layers.Dense(2)  # logits for 2 classes
])

# ====================================
# Model compilation
# ====================================

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"]
)

# ====================================
# Model training
# ====================================

history = model.fit(
    X_train_scaled,
    y_train,
    epochs=50,
    batch_size=32,
    verbose=0
)

# ====================================
# Training history visualization
# ====================================

history_dict = history.history

epochs = range(1, len(history_dict["loss"]) + 1)

plt.figure(figsize=(12, 4))

# Loss plot
plt.subplot(1, 2, 1)
plt.plot(epochs, history_dict["loss"], marker="o")
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)

# Accuracy plot
plt.subplot(1, 2, 2)
plt.plot(epochs, history_dict["accuracy"], marker="o")
plt.title("Training Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.grid(True)

plt.tight_layout()
plt.show()

# ====================================
# Predictions with TensorFlow / Keras
# ====================================

# Raw model outputs (logits)
logits = model.predict(X_test_scaled)

# Predicted class labels (0 or 1)
y_pred = np.argmax(logits, axis=1)

# Convert logits to probabilities using softmax
y_pred_proba = tf.nn.softmax(logits, axis=1).numpy()

4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step

# ====================================
# Accuracy
# ====================================

accuracy = accuracy_score(y_test, y_pred)
accuracy

0.9736842105263158

# ====================================
# Confusion matrix
# ====================================

cm = confusion_matrix(y_test, y_pred)
cm

array([[41,  2],
       [ 1, 70]])

# ====================================
# Confusion matrix visualization
# ====================================

disp = ConfusionMatrixDisplay(
    confusion_matrix=cm,
    display_labels=[0, 1]
)

disp.plot(cmap="Blues")
plt.title("Confusion Matrix – Deep Learning (TensorFlow / Keras)")
plt.show()

# ====================================
# Classification report
# ====================================

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

# ====================================
# Model persistence (TensorFlow / Keras)
# ====================================

# Define model directory
model_dir = Path("models/supervised_learning/classification/deep_learning_tensorflow")

# Create directory if it does not exist
model_dir.mkdir(parents=True, exist_ok=True)

# Save the trained Keras model
model.save(model_dir / "tensorflow_keras_model")

# Save the scaler (part of the preprocessing pipeline)
joblib.dump(scaler, model_dir / "scaler.joblib")

# ====================================
# Imports
# ====================================

import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    ConfusionMatrixDisplay
)

from pathlib import Path
import matplotlib.pyplot as plt
import joblib

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


# ====================================
# Dataset loading
# ====================================

data = load_breast_cancer(as_frame=True)

X = data.data
y = data.target


# ====================================
# Train-test split
# ====================================

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)


# ====================================
# Feature scaling
# ====================================

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# ====================================
# Model definition (Keras)
# ====================================

model = keras.Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(64, activation="relu"),
    layers.Dense(32, activation="relu"),
    layers.Dense(2)  # logits for 2 classes
])


# ====================================
# Model compilation
# ====================================

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"]
)


# ====================================
# Model training
# ====================================

history = model.fit(
    X_train_scaled,
    y_train,
    epochs=50,
    batch_size=32,
    verbose=0
)


# ====================================
# Training history visualization
# ====================================

history_dict = history.history
epochs = range(1, len(history_dict["loss"]) + 1)

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(epochs, history_dict["loss"], marker="o")
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(epochs, history_dict["accuracy"], marker="o")
plt.title("Training Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.grid(True)

plt.tight_layout()
plt.show()


# ====================================
# Predictions
# ====================================

logits = model.predict(X_test_scaled)
y_pred = np.argmax(logits, axis=1)
y_pred_proba = tf.nn.softmax(logits, axis=1).numpy()


# ====================================
# Model evaluation
# ====================================

accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

accuracy
cm

print(classification_report(y_test, y_pred))


# ====================================
# Confusion matrix visualization
# ====================================

disp = ConfusionMatrixDisplay(
    confusion_matrix=cm,
    display_labels=[0, 1]
)

disp.plot(cmap="Blues")
plt.title("Confusion Matrix – Deep Learning (TensorFlow / Keras)")
plt.show()


# ====================================
# Model persistence
# ====================================

model_dir = Path("models/supervised_learning/classification/deep_learning_tensorflow")
model_dir.mkdir(parents=True, exist_ok=True)

model.save(model_dir / "tensorflow_keras_model")
joblib.dump(scaler, model_dir / "scaler.joblib")

Deep Learning – Classification (TensorFlow / Keras)¶

Notebook Roadmap (standard ML-Methods)¶

How this notebook should be read¶

What is Deep Learning (in this context)?¶

Why TensorFlow / Keras?¶

Execution model: eager and graph¶

What you should expect from the results¶

1. Project setup and common pipeline¶

What changes with TensorFlow / Keras¶

2. Dataset loading¶

What we have after this step¶

3. Train-test split¶

What we have after this step¶

4. Feature scaling (why we do it)¶

Why we use standardization here¶

5. What is this model? (Deep Learning with TensorFlow / Keras)¶

What do we want to achieve?¶

How does a neural network solve this problem?¶

What happens inside the model?¶

Why multiple layers?¶

What does Keras abstract away?¶

How learning happens conceptually¶

Key takeaway¶

6. Model training (TensorFlow / Keras)¶

What we just did (step by step)¶

1. Defining the model structure¶

2. Why the output layer has 2 neurons¶

3. Compiling the model¶

4. Loss function choice¶

5. Training with .fit()¶

Key takeaway¶

7. Model behavior and key parameters¶

Model capacity and architecture¶

Depth and non-linearity¶

Training dynamics observed in practice¶

Overfitting behavior¶

Role of training parameters¶

Key takeaway¶

8. Predictions¶

What the model outputs¶

From logits to class labels¶

From logits to probabilities¶

Important consistency note¶

9. Model evaluation¶

How to interpret these results¶

Confusion matrix¶

Classification report¶

Framework independence¶

Key takeaway¶

10. When to use it and when not to¶

When to use TensorFlow / Keras¶

When NOT to use TensorFlow / Keras¶

Practical warning signs¶

Key takeaway¶

11. Model persistence¶

What exactly we saved¶

Why we also save the scaler¶

How the model will be reused¶

12. Mathematical formulation (deep dive)¶

Representation of the data¶

Linear layers in Keras¶

Non-linear activation¶

Layer composition¶

Output layer and logits¶

Softmax and class probabilities¶

Loss function¶

Gradient-based optimization¶

Learning perspective¶

Final takeaway¶

13. Final summary – Code only¶

5. Training with `.fit()`¶