# Common imports used across classification models

import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    ConfusionMatrixDisplay
)

from pathlib import Path
import matplotlib.pyplot as plt

# ====================================
# PyTorch imports
# ====================================

import torch
import torch.nn as nn
import torch.optim as optim

# ====================================
# Dataset loading
# ====================================

data = load_breast_cancer(as_frame=True)

X = data.data
y = data.target

# ====================================
# Train-test split
# ====================================

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

# Feature scaling

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ====================================
# Convert data to PyTorch tensors
# ====================================

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# ====================================
# Define the neural network model
# ====================================

class NeuralNetwork(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.out = nn.Linear(32, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.out(x)
        return x


model = NeuralNetwork(input_dim=X_train_tensor.shape[1])

# ====================================
# Loss function and optimizer
# ====================================

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ====================================
# Training loop
# ====================================

num_epochs = 50

for epoch in range(num_epochs):
    model.train()

    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/50], Loss: 0.5893
Epoch [20/50], Loss: 0.4426
Epoch [30/50], Loss: 0.2967
Epoch [40/50], Loss: 0.1967
Epoch [50/50], Loss: 0.1361

# ====================================
# Predictions with PyTorch
# ====================================

model.eval()
with torch.no_grad():

    logits = model(X_test_tensor)

    y_pred = torch.argmax(logits, dim=1)

    y_pred_proba = torch.softmax(logits, dim=1)

# ====================================
# Convert PyTorch tensors to NumPy
# ====================================

# sklearn metrics expect NumPy arrays
y_test_np = y_test_tensor.numpy()
y_pred_np = y_pred.numpy()

# ====================================
# Accuracy
# ====================================

accuracy = accuracy_score(y_test_np, y_pred_np)
accuracy

0.9824561403508771

# ====================================
# Confusion matrix
# ====================================

cm = confusion_matrix(y_test_np, y_pred_np)
cm

array([[42,  1],
       [ 1, 70]])

# ====================================
# Confusion matrix visualization
# ====================================

disp = ConfusionMatrixDisplay(
    confusion_matrix=cm,
    display_labels=[0, 1]
)

disp.plot(cmap="Blues")
plt.title("Confusion Matrix – Deep Learning (PyTorch)")
plt.show()

# ====================================
# Model persistence (PyTorch)
# ====================================

# Define model directory
model_dir = Path("models/supervised_learning/classification/deep_learning_pytorch")

# Create directory if it does not exist
model_dir.mkdir(parents=True, exist_ok=True)

# Save model parameters (state_dict)
torch.save(model.state_dict(), model_dir / "pytorch_model_state_dict.pt")

# Save scaler (part of the preprocessing pipeline)
import joblib
joblib.dump(scaler, model_dir / "scaler.joblib")

['models\\supervised_learning\\classification\\deep_learning_pytorch\\scaler.joblib']

# ====================================
# Imports
# ====================================

import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    ConfusionMatrixDisplay
)

from pathlib import Path
import matplotlib.pyplot as plt
import joblib

import torch
import torch.nn as nn
import torch.optim as optim


# ====================================
# Dataset loading
# ====================================

data = load_breast_cancer(as_frame=True)

X = data.data
y = data.target


# ====================================
# Train-test split
# ====================================

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)


# ====================================
# Feature scaling
# ====================================

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# ====================================
# Convert to PyTorch tensors
# ====================================

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)


# ====================================
# Model definition
# ====================================

class NeuralNetwork(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.out = nn.Linear(32, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.out(x)
        return x


model = NeuralNetwork(input_dim=X_train_tensor.shape[1])


# ====================================
# Loss and optimizer
# ====================================

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


# ====================================
# Training loop
# ====================================

num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


# ====================================
# Predictions
# ====================================

model.eval()
with torch.no_grad():
    logits = model(X_test_tensor)
    y_pred = torch.argmax(logits, dim=1)
    y_pred_proba = torch.softmax(logits, dim=1)


# ====================================
# Model evaluation
# ====================================

y_test_np = y_test_tensor.numpy()
y_pred_np = y_pred.numpy()

accuracy = accuracy_score(y_test_np, y_pred_np)
cm = confusion_matrix(y_test_np, y_pred_np)

accuracy
cm

print(classification_report(y_test_np, y_pred_np))


# ====================================
# Confusion matrix visualization
# ====================================

disp = ConfusionMatrixDisplay(
    confusion_matrix=cm,
    display_labels=[0, 1]
)

disp.plot(cmap="Blues")
plt.title("Confusion Matrix – Deep Learning (PyTorch)")
plt.show()


# ====================================
# Model persistence
# ====================================

model_dir = Path("models/supervised_learning/classification/deep_learning_pytorch")
model_dir.mkdir(parents=True, exist_ok=True)

torch.save(model.state_dict(), model_dir / "pytorch_model_state_dict.pt")
joblib.dump(scaler, model_dir / "scaler.joblib")

Deep Learning – Classification (PyTorch)¶

Notebook Roadmap (standard ML-Methods)¶

How this notebook should be read¶

What is Deep Learning (in this context)?¶

Why PyTorch?¶

Execution model: eager execution¶

What you should expect from the results¶

1. Project setup and common pipeline¶

What changes with PyTorch¶

2. Dataset loading¶

What we have after this step¶

3. Train-test split¶

What we have after this step¶

4. Feature scaling (why we do it)¶

Why we use standardization here¶

Why scaling is essential here¶

5. What is this model? (Deep Learning with PyTorch)¶

What do we want to achieve?¶

What does the model do, step by step?¶

What is a neuron, technically?¶

Why multiple layers?¶

What makes PyTorch different here?¶

How learning happens conceptually¶

Key takeaway¶

6. Model training (PyTorch)¶

What we just did (step by step, in detail)¶

1. Converting data to PyTorch tensors¶

2. Defining the neural network architecture¶

3. Defining the forward pass¶

4. Choosing the loss function¶

5. Choosing the optimizer¶

6. Writing the training loop¶

7. What learning means in practice¶

Key takeaway¶

7. Model behavior and key parameters¶

Architecture and model capacity¶

Role of hidden layers¶

Activation functions and non-linearity¶

Optimization behavior¶

Training dynamics¶

Generalization behavior¶

Key takeaway¶

8. Predictions¶

What the model outputs¶

From logits to class labels¶

From logits to probabilities¶

Important difference from training¶

9. Model evaluation¶

How to read these results¶

Confusion matrix¶

Classification report¶

Important note¶

Key takeaway¶

10. When to use it and when not to¶

When to use Deep Learning with PyTorch¶

When NOT to use Deep Learning with PyTorch¶

Practical warning signs¶

Key takeaway¶

11. Model persistence¶

What exactly we saved¶

Why we also save the scaler¶

How the model will be reused¶

12. Mathematical formulation (deep dive)¶

Representation of the data¶

Linear transformation in a neural network layer¶

Non-linear activation¶

Layer composition¶

Output layer and logits¶

From logits to probabilities¶

Loss function¶

Gradient-based optimization¶

Learning perspective¶

Final takeaway¶

13. Final summary – Code only¶