Model Evaluation

Model Evaluation#

In this lesson, we’ll evaluate the model experiments from our Hydra workflow. We want identify the model with the best relative performance.

Import required packages.

import os

import hydra
import torch
from omegaconf import OmegaConf

from ml_pipeline.datasets.datamodule import BurnScarsDataModule
from ml_pipeline.model.lightningmodule import BurnScarsSegmentationModel

Configure Hydra.

# load hydra configuration
with hydra.initialize(config_path="../../config", version_base="1.3.0"):
    cfg = hydra.compose(
        config_name="config",
        overrides=["seed=0", "author=devseed", "name=test-exp-nb-1"],
        return_hydra_config=True,
    )

Check the parameters of the datamodule.

print(OmegaConf.to_yaml(cfg.datamodule))

image_query:
  bbox:
  - -119.1
  - 36.2
  - -118.2
  - 36.9
  datetime:
  - '2021-08-15T00:00:00Z'
  - '2021-09-15T23:59:59Z'
  collections:
  - HLSS30.v2.0
vector_url: https://gist.githubusercontent.com/weiji14/286032ac2498d10e050ba585257dd50d/raw/c897c7c1b3b8354ec8c6e8327df38fcfee79b4ef/burn_scars.geojson
batch_size: 4

Check the static parameters of the model.

print(OmegaConf.to_yaml(cfg.model))

encoder_name: resnet18
encoder_depth: 5
encoder_weights: null
in_channels: 5
classes: 1
activation: null
lr: 0.001

Only the weights of the best performing model experiment were written to disk.

# use the best model ckpt for evaluation
!pwd
!ls logs/checkpoint/

/home/runner/work/ml-pipeline/ml-pipeline/jbook/docs

ls: cannot access 'logs/checkpoint/': No such file or directory

CKPT = os.path.join(cfg.callbacks.model_checkpoint.dirpath, "last.ckpt")
CKPT

'/home/runner/work/ml-pipeline/ml-pipeline/jbook/docs/logs/checkpoint/last.ckpt'

Load the data and model modules for prediction.

# Load the datamodule with predict setup
datamodule = BurnScarsDataModule(**cfg.datamodule)
datamodule.setup(stage="predict")

# Loads the model with the best trained checkpoint weights for evaluation
model = BurnScarsSegmentationModel.load_from_checkpoint(CKPT)
_ = model.eval()  # set the model to evaluation mode

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[8], line 2
# Loads the model with the best trained checkpoint weights for evaluation
----> 2 model = BurnScarsSegmentationModel.load_from_checkpoint(CKPT)
_ = model.eval()  # set the model to evaluation mode

File ~/micromamba-root/envs/mlpipeline/lib/python3.9/site-packages/lightning/pytorch/core/module.py:1537, in LightningModule.load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
@classmethod
def load_from_checkpoint(
   cls,
   (...)
   **kwargs: Any,
) -> Self:
   r"""
   Primary way of loading a model from a checkpoint. When Lightning saves a checkpoint
   it stores the arguments passed to ``__init__``  in the checkpoint under ``"hyper_parameters"``.
   (...)
       y_hat = pretrained_model(x)
   """
-> 1537     loaded = _load_from_checkpoint(
       cls,
       checkpoint_path,
       map_location,
       hparams_file,
       strict,
       **kwargs,
   )
   return cast(Self, loaded)

File ~/micromamba-root/envs/mlpipeline/lib/python3.9/site-packages/lightning/pytorch/core/saving.py:63, in _load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
def _load_from_checkpoint(
   cls: Union[Type["pl.LightningModule"], Type["pl.LightningDataModule"]],
   checkpoint_path: Union[_PATH, IO],
   (...)
   **kwargs: Any,
) -> Union["pl.LightningModule", "pl.LightningDataModule"]:
   with pl_legacy_patch():
---> 63         checkpoint = pl_load(checkpoint_path, map_location=map_location)
   # convert legacy checkpoints to the new format
   checkpoint = _pl_migrate_checkpoint(
       checkpoint, checkpoint_path=(checkpoint_path if isinstance(checkpoint_path, (str, Path)) else None)
   )

File ~/micromamba-root/envs/mlpipeline/lib/python3.9/site-packages/lightning/fabric/utilities/cloud_io.py:51, in _load(path_or_url, map_location)
   return torch.hub.load_state_dict_from_url(
       str(path_or_url),
       map_location=map_location,  # type: ignore[arg-type]
   )
fs = get_filesystem(path_or_url)
---> 51 with fs.open(path_or_url, "rb") as f:
   return torch.load(f, map_location=map_location)

File ~/micromamba-root/envs/mlpipeline/lib/python3.9/site-packages/fsspec/spec.py:1241, in AbstractFileSystem.open(self, path, mode, block_size, cache_options, compression, **kwargs)
else:
   ac = kwargs.pop("autocommit", not self._intrans)
-> 1241     f = self._open(
       path,
       mode=mode,
       block_size=block_size,
       autocommit=ac,
       cache_options=cache_options,
       **kwargs,
   )
   if compression is not None:
       from fsspec.compression import compr

File ~/micromamba-root/envs/mlpipeline/lib/python3.9/site-packages/fsspec/implementations/local.py:184, in LocalFileSystem._open(self, path, mode, block_size, **kwargs)
if self.auto_mkdir and "w" in mode:
   self.makedirs(self._parent(path), exist_ok=True)
--> 184 return LocalFileOpener(path, mode, fs=self, **kwargs)

File ~/micromamba-root/envs/mlpipeline/lib/python3.9/site-packages/fsspec/implementations/local.py:315, in LocalFileOpener.__init__(self, path, mode, autocommit, fs, compression, **kwargs)
self.compression = get_compression(path, compression)
self.blocksize = io.DEFAULT_BUFFER_SIZE
--> 315 self._open()

File ~/micromamba-root/envs/mlpipeline/lib/python3.9/site-packages/fsspec/implementations/local.py:320, in LocalFileOpener._open(self)
if self.f is None or self.f.closed:
   if self.autocommit or "w" not in self.mode:
--> 320         self.f = open(self.path, mode=self.mode)
       if self.compression:
           compress = compr[self.compression]

FileNotFoundError: [Errno 2] No such file or directory: '/home/runner/work/ml-pipeline/ml-pipeline/jbook/docs/logs/checkpoint/last.ckpt'

Load the validation partition.

# load the validation dataloader
val_dataloader = datamodule.val_dataloader()

def run_prediction(model, dataloader):
    """
    Loop through the dataloader & get model predictions.

    Args:
        model: model with trained weights
        dataloader: dataloader to run inference on

    Returns:
        preds: prediction results
        masks: ground truth labels
    """
    preds = []
    masks = []
    with torch.inference_mode():
        for batch in dataloader:
            image, mask = batch
            pred = model(image)
            preds.extend(pred.detach().numpy())
            masks.extend(mask.detach().numpy())
    return preds, masks

Get model predictions and their respective ground truths from the test partition.

preds, targets = run_prediction(model=model, dataloader=val_dataloader)

# We have the trained the model on a tiny subset of the burn scars dataset to
# show the capabilities of ml-pipeline.
# Hence, the model is overfitting on NON-FLOODED class.
# To properly display the f1,precision,recall score & the confusion matrix, adding here a couple of predictions for FLOODED class.
preds.extend([1, 1])
targets.extend([1, 0])

Generate confusion matrices, precision, recall and f1-scores.

from sklearn.metrics import confusion_matrix, classification_report

print(classification_report(preds, targets))

confusion_matrix(preds, targets, labels=[0, 1])