Source code for medsegpy.evaluation.sem_seg_evaluation

import copy
import logging
import os
import time
from typing import Sequence, Union

import h5py
import numpy as np
from fvcore.common.file_io import PathManager

from medsegpy.config import Config
from medsegpy.data import MetadataCatalog

from .build import EVALUATOR_REGISTRY
from .evaluator import DatasetEvaluator
from .metrics import Metric, MetricsManager


[docs]def get_stats_string(manager: MetricsManager):
    """Returns formatted metrics manager summary string.

    Args:
        manager (MetricsManager): The manager whose results to format.

    Returns:
        str: A formatted string detailing manager results.
    """
    inference_runtimes = np.asarray(manager.runtimes)

    s = "============ Overall Summary ============\n"
    s += "%s\n" % manager.summary()
    s += "Inference time (Mean +/- Std. Dev.): " "{:0.2f} +/- {:0.2f} seconds.\n".format(
        np.mean(inference_runtimes), np.std(inference_runtimes)
    )
    return s


[docs]@EVALUATOR_REGISTRY.register()
class SemSegEvaluator(DatasetEvaluator):
    """Evaluator for semantic segmentation-related tasks."""

[docs]    def __init__(
        self,
        dataset_name: str,
        cfg: Config,
        output_folder: str = None,
        save_raw_data: bool = False,
        stream_evaluation: bool = True,
    ):
        """
        Args:
            dataset_name (str): name of the dataset to be evaluated.
            cfg:
            output_folder (str): an output directory to dump results.
            save_raw_data (:obj:`bool`, optional): Save probs, labels, ground
                truth masks to h5 file.
            stream_evaluation (:obj:`bool`, optional): If `True`, evaluates
                data as it comes in to avoid holding too many objects in memory.
        """
        self._config = cfg
        self._dataset_name = dataset_name
        self._output_folder = (
            output_folder if output_folder else os.path.join(cfg.OUTPUT_DIR, "test_results")
        )
        PathManager.mkdirs(self._output_folder)
        self._num_classes = cfg.get_num_classes()
        self._ignore_label = 0

        self._logger = logging.getLogger(__name__)

        meta = MetadataCatalog.get(dataset_name)
        self._meta = meta
        cat_ids = cfg.CATEGORIES
        contiguous_id_map = meta.get("category_id_to_contiguous_id")
        contiguous_ids = [
            contiguous_id_map[tuple(x) if isinstance(x, list) else x] for x in cat_ids
        ]
        categories = meta.get("categories")
        categories = [categories[c_id] for c_id in contiguous_ids]
        self.spacing = meta.get("spacing", None)

        self._categories = categories

        self._metrics_manager = None
        self._predictions = None
        self._scan_cnt = 0
        self._results_str = ""

        self._save_raw_data = save_raw_data
        self.stream_evaluation = stream_evaluation

        self._output_activation = cfg.LOSS[1]
        self._output_includes_background = cfg.INCLUDE_BACKGROUND

    def _get_metrics(self) -> Sequence[Union[str, Metric]]:
        """"""
        return self._config.TEST_METRICS

    def reset(self):
        self._metrics_manager = MetricsManager(
            class_names=self._categories, metrics=self._get_metrics()
        )
        self._predictions = []
        self._scan_cnt = 0
        self._results_str = ""

[docs]    def process(self, inputs, outputs):
        """
        See :class:`DatasetEvaluator` in evaluator.py
        for argument details.
        """
        output_activation = self._output_activation
        includes_bg = self._output_includes_background

        for input, output in zip(inputs, outputs):
            # Copy because we may modify below
            input = copy.deepcopy(input)
            output = copy.deepcopy(output)

            y_pred = output["y_pred"]

            if output_activation == "sigmoid":
                labels = (y_pred > 0.5).astype(np.uint8)
            elif output_activation == "softmax":
                labels = np.zeros_like(y_pred, dtype=np.uint8)
                l_argmax = np.argmax(y_pred, axis=-1)
                for c in range(labels.shape[-1]):
                    labels[l_argmax == c, c] = 1
                labels = labels.astype(np.uint)
            else:
                raise ValueError("output activation {} not supported".format(output_activation))

            # background is always excluded from analysis
            if includes_bg:
                y_true = output["y_true"][..., 1:]
                y_pred = output["y_pred"][..., 1:]
                labels = labels[..., 1:]
                # if y_true.ndim == 3:
                #     y_true = y_true[..., np.newaxis]
                #     y_pred = y_pred[..., np.newaxis]
                #     labels = labels[..., np.newaxis]
                output["y_true"] = y_true
                output["y_pred"] = y_pred

            time_elapsed = output["time_elapsed"]
            if self.stream_evaluation:
                self.eval_single_scan(input, output, labels, time_elapsed)
            else:
                self._predictions.append((input, output, labels, time_elapsed))

    def eval_single_scan(self, input, output, labels, time_elapsed):
        metrics_manager = self._metrics_manager
        spacing = input["scan_spacing"] if "scan_spacing" in input else self.spacing
        logger = self._logger
        save_raw_data = self._save_raw_data
        output_dir = self._output_folder

        self._scan_cnt += 1
        scan_cnt = self._scan_cnt
        scan_id = input["scan_id"]
        y_true: np.ndarray = output["y_true"]
        x: np.ndarray = np.squeeze(input["x"])

        metrics_kwargs = {"spacing": spacing} if spacing is not None else {}

        summary = metrics_manager(
            scan_id, y_true=y_true, y_pred=labels, x=x, runtime=time_elapsed, **metrics_kwargs
        )

        logger_info_str = "Scan #{:03d} (name = {}, {:0.2f}s) = {}".format(
            scan_cnt, scan_id, time_elapsed, summary
        )
        self._results_str = self._results_str + logger_info_str + "\n"
        logger.info(logger_info_str)

        if output_dir and save_raw_data:
            save_name = "{}/{}.pred".format(output_dir, scan_id)
            with h5py.File(save_name, "w") as h5f:
                h5f.create_dataset("probs", data=output["y_pred"])
                h5f.create_dataset("labels", data=labels)

[docs]    def evaluate(self):
        """Evaluates popular medical segmentation metrics specified in config.

        * Evaluate on popular medical segmentation metrics. For supported
          segmentation metrics, see :class:`MetricsManager`.
        * Save overlay images.
        * Save probability predictions.

        Note, by default coefficient of variation (CV) is calculated as a
        root-mean-squared quantity rather than mean.
        """
        output_dir = self._output_folder
        logger = self._logger

        if self._predictions:
            for input, output, labels, time_elapsed in self._predictions:
                self.eval_single_scan(input, output, labels, time_elapsed)

        results_str = self._results_str
        stats_string = get_stats_string(self._metrics_manager)
        logger.info("--" * 20)
        logger.info("\n" + stats_string)
        logger.info("--" * 20)
        if output_dir:
            test_results_summary_path = os.path.join(output_dir, "results.txt")

            # Write details to test file
            with open(test_results_summary_path, "w+") as f:
                f.write("Results generated on %s\n" % time.strftime("%X %x %Z"))
                f.write("Weights Loaded: %s\n" % os.path.basename(self._config.TEST_WEIGHT_PATH))

                f.write("--" * 20)
                f.write("\n")
                f.write(results_str)
                f.write("--" * 20)
                f.write("\n")
                f.write(stats_string)

        # df = self._metrics_manager.data_frame()
        # df.to_csv(os.path.join(output_dir, "metrics.csv"), header=True, index=True)  # noqa

        df = self._metrics_manager.data()["scan_data"]
        df.to_csv(os.path.join(output_dir, "metrics.csv"), header=True, index=True)

        # TODO: Convert segmentation metrics to valid results matrix.
        return {}