Metric Code Examples

Viewing the Python code for metrics available in the system is accessible from the application interface:

On the metrics catalog page (Control Panel > Metrics), select the desired metric, hover over the menu symbol (three dots) on the right side of the corresponding row and click "View".
A screen form with information about the metric will open. Scroll down the page and click the "Open code" button in the bottom right corner of the screen form.

Example of a Scalar Metric with a Traffic Light without a Chart

The flags is_scalar = True and is_signal = True are specified. The methods scalar and signal are declared and implemented. The methods __call__ and save are declared, but the implementation is omitted.

from typing import Literal

import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error


class rvc_3_MAPE:
    """
    Mean Absolute Percentage Error (MAPE). Average absolute error in percentage

    Attributes:
        __desc__ (str): Description of the class.
        __tags__ (list[str]): List of tags associated with the class.
        is_scalar (bool): Whether the metric is scalar or not.
        is_signal (bool): Whether the metric has signal or not.

    """

    __desc__ = "Mean Absolute Percentage Error (MAPE). Average absolute error in percentage"
    __tags__ = ["core", "regression", "scalar"]
    is_scalar = True
    is_signal = True

    def __init__(
        self,
        df: pd.DataFrame,
        predict_column: str,
        target_column: str,
        threshold_yellow: float = 0.3,
        threshold_red: float = 0.4,
    ):
        if df.empty:
            raise Exception("Dataframe is empty")
        if target_column not in df:
            raise ValueError(f"Field {target_column} does not exist in the dataframe")
        if predict_column not in df:
            raise ValueError(f"Field {predict_column} does not exist in the dataframe")

        self.predict_column = predict_column
        self.target_column = target_column
        self.df = df.astype({self.predict_column: "float", self.target_column: "float"})
        self.threshold_yellow = threshold_yellow
        self.threshold_red = threshold_red

    def __call__(self) -> None:
        pass

    def scalar(self) -> int | float:
        df = self.df.loc[:, [self.target_column, self.predict_column]].dropna()[
            abs(self.df[self.target_column]) > 0
        ]

        self.scalar_value = mean_absolute_percentage_error(
            y_pred=df[self.predict_column],
            y_true=df[self.target_column],
        )

        return self.scalar_value

    def signal(self) -> Literal["red", "yellow", "green"]:
        signal_light = "green"

        if self.scalar_value > self.threshold_red:
            signal_light = "red"
        elif self.scalar_value > self.threshold_yellow:
            signal_light = "yellow"

        return signal_light

    def save(self, output_dir: str) -> dict[str, str] | None:
        pass

Result:

Example of a Scalar Metric with a Traffic Light and a Chart

The flags is_scalar = True and is_signal = True are specified. The methods scalar and signal are declared and implemented. The methods __call__ and save are declared and implemented.

from typing import Any, Dict, Literal, Optional

import numpy as np
import pandas as pd
import plotly.graph_objects as go


class r_2_5_KS_on_scale:
    """
    Kolmogorov-Smirnov Test

    Shows how well the model's score separates "good" clients from "bad" ones in terms of the rating scale.

    Attributes:
        __desc__ (str): Description of the class.
        __tags__ (list[str]): List of tags associated with the class.
        is_scalar (bool): Whether the metric is scalar or not.
        is_signal (bool): Whether the metric has signal or not.

    """

    __desc__ = "KS-test on scale. Kolmogorov-Smirnov Test"
    __tags__ = ["risk", "scalar"]

    is_scalar = True
    is_signal = True

    def __init__(
        self,
        df: pd.DataFrame,
        scale_column: str,
        target_column: str,
        threshold_yellow: float = 10,
        threshold_red: float = 30,
    ):
        self.scale_column = scale_column
        self.target_column = target_column
        self.df = df.astype({self.target_column: "float"})
        self.threshold_yellow = threshold_yellow
        self.threshold_red = threshold_red

        if self.df.empty:
            raise Exception("Dataframe is empty")
        if self.target_column not in self.df:
            raise ValueError(f"Field {self.target_column} does not exist in the dataframe")
        if self.scale_column not in self.df:
            raise ValueError(f"Field {self.scale_column} does not exist in the dataframe")
        if self.df[self.scale_column].nunique() > 100:
            raise Exception("Error: the scale variable is not categorical")

    def __call__(self) -> None:
        dataset = self.df.loc[:, [self.target_column, self.scale_column]].dropna()

        # rating scale digit number
        # (the method of obtaining depends on the data format in the self.scale column)
        dataset["bin_number"] = dataset[self.scale_column].map(
            lambda x: int(x.split("_")[-1])
        )  # dataset[self.scale].astype('category').cat.codes#

        dataset = dataset.sort_values(by=["bin_number"], ascending=False)

        good_cnt = dataset[dataset[self.target_column] == 0].shape[0]
        bad_cnt = dataset[dataset[self.target_column] == 1].shape[0]

        gr_bad = (
            pd.DataFrame(
                dataset.groupby("bin_number", observed=False)[self.target_column].sum()
            ).cumsum()
            / bad_cnt
        )
        dataset["target_inverse"] = np.where(dataset[self.target_column] == 1, 0, 1)
        gr_good = (
            pd.DataFrame(
                dataset.groupby("bin_number", observed=False)["target_inverse"].sum()
            ).cumsum()
            / good_cnt
        )

        ks_calc_temp = pd.merge(gr_good, gr_bad, how="left", left_index=True, right_index=True)
        ks_calc_temp["diff"] = 0
        ks_calc_temp["diff"] = abs(
            ks_calc_temp.iloc[:, 0:1].values - ks_calc_temp.iloc[:, 1:2].values
        )

        self.scalar_value = 100 * ks_calc_temp["diff"].max()
        ks_result = round(self.scalar_value, 2)
        result_idx = ks_calc_temp["diff"].argmax()

        x_value1 = gr_bad.index.values.tolist()
        y_value1 = gr_bad[self.target_column].values.astype("float").tolist()
        x_value2 = gr_good.index.values.tolist()
        y_value2 = gr_good["target_inverse"].values.astype("float").tolist()
        x_value3 = [
            float(gr_bad.index.values[result_idx]),
            float(gr_bad.index.values[result_idx]),
        ]
        y_value3 = [
            float(gr_bad[self.target_column].values[result_idx]),
            float(gr_good["target_inverse"].values[result_idx]),
        ]

        line1 = go.Scatter(
            mode="lines",
            x=x_value1,
            y=y_value1,
            name="bad",
            line={"width": 3},
            marker={"color": "#63666A"},
        )
        line2 = go.Scatter(
            mode="lines",
            x=x_value2,
            y=y_value2,
            name="good",
            line={"width": 3},
            marker={"color": "#3eb489"},
        )
        line3 = go.Scatter(
            mode="lines",
            x=x_value3,
            y=y_value3,
            name=f"KS-statistic = {ks_result.astype('float')}",
            marker={"color": "black"},
        )
        self.fig = go.Figure(data=[line1, line2, line3])
        self.fig.layout = self.custom_layout()

    def scalar(self) -> int | float:
        return self.scalar_value

    def signal(self) -> Literal["red", "yellow", "green"]:
        signal_light = "green"

        if self.scalar_value > self.threshold_red:
            signal_light = "red"
        elif self.scalar_value > self.threshold_yellow:
            signal_light = "yellow"

        return signal_light

    def custom_layout(self) -> Optional[Dict[str, Any]]:
        return {
            "title": {"text": "<b>Kolmogorov-Smirnov Test</b>", "x": 0.1, "y": 0.97},
            "legend": {"yanchor": "bottom", "y": 0.05, "xanchor": "right", "x": 1},
            "yaxis": {"title": "Cumulative Share", "side": "left"},
            "xaxis": {
                "title": "Rating Scale Digit",
                "side": "left",
                "type": "category",
                "domain": [0, 0.8],
            },
            "margin": {"t": 35, "b": 5, "l": 5, "r": 5},
        }

    def save(self, output_dir: str) -> dict[str, str] | None:
        self.fig.write_html(
            f"{output_dir}/data.html",
            config={"displaylogo": False},  # remove the plotly logo
        )
        return {f"scale_{self.scale_column}": f"{output_dir}/data.html"}

Result:

Example of Metric Code with Chart, but without Scalar and Signal

The flags is_scalar = False and is_signal = False are specified. The methods scalar and signal are not declared. The methods __call__ and save are declared and implemented.

from typing import Any, Dict, Optional

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from scipy.stats import norm


class r_6_2_Binomial_test:
    """
    Checks the average default level in the rating scale bucket against
    the confidence interval built on the model scores.

    Attributes:
        __desc__ (str): Description of the class.
        __tags__ (list[str]): List of tags associated with the class.
        is_scalar (bool): Whether the metric is scalar or not.
        is_signal (bool): Whether the metric has signal or not.

    """

    __desc__ = "Binomial Test. Биномиальный тест"
    __tags__ = ["risk"]

    is_scalar = False
    is_signal = False

    def __init__(
        self,
        df: pd.DataFrame,
        predict_column: str,
        target_column: str,
        scale_column: str,
        confidence_level: float = 0.99,
    ):
        if predict_column not in df.columns:
            raise ValueError(
                f"Invalid column name for 'predict_column'. "
                f"There is not colomn '{predict_column}' in the dataframe"
            )
        if target_column not in df.columns:
            raise ValueError(
                f"Invalid column name for 'target_column'. "
                f"There is not colomn '{target_column}' in the dataframe"
            )
        if scale_column not in df.columns:
            raise ValueError(
                f"Invalid column name for 'scale_column'. "
                f"There is not colomn '{scale_column}' in the dataframe"
            )

        self.predict_column = predict_column
        self.target_column = target_column
        self.scale_column = scale_column
        self.df = df.astype({self.predict_column: "float", self.target_column: "float"})
        self.confidence_level = confidence_level

        if self.df.empty:
            raise ValueError("Dataframe is empty")
        if self.df[self.scale_column].nunique() > 100:
            raise Exception("Error: scale variable is not categorical")

    def __call__(self) -> None:
        data_gr = (
            self.df[[self.scale_column, self.target_column, self.predict_column]]
            .groupby([self.scale_column], observed=False)
            .agg({self.target_column: ["sum", "count"], self.predict_column: ["mean"]})
            .reset_index()
        )

        data_gr.columns = [self.scale_column, self.target_column, "cnt_all", self.predict_column]
        data_gr["target_prc"] = data_gr[self.target_column] / data_gr["cnt_all"]

        data_gr["CI_LEFT"] = data_gr[self.predict_column] - norm.ppf(
            self.confidence_level
        ) * np.sqrt(
            (data_gr[self.predict_column] * (1 - data_gr[self.predict_column])) / data_gr["cnt_all"]
        )
        data_gr["CI_RIGHT"] = data_gr[self.predict_column] + norm.ppf(
            self.confidence_level
        ) * np.sqrt(
            (data_gr[self.predict_column] * (1 - data_gr[self.predict_column])) / data_gr["cnt_all"]
        )

        data_gr["color"] = data_gr.apply(
            lambda x: "green"
            if (x["target_prc"] >= x["CI_LEFT"]) & (x["target_prc"] <= x["CI_RIGHT"])
            else "red",
            axis=1,
        )

        data_gr = data_gr.sort_values(self.scale_column, key=lambda x: x.str[-3:])
        # sorting above is specific to the particular df,
        # pay attention to the format of entries in the scale column
        # when running on new data

        line1 = go.Scatter(
            mode="lines",
            x=data_gr[self.scale_column].tolist(),
            y=data_gr["CI_RIGHT"].tolist(),
            name="Upper CI Bound",
            marker={"color": "#23654D"},
            xaxis="x1",
            yaxis="y1",
        )
        line2 = go.Scatter(
            mode="lines",
            x=data_gr[self.scale_column].tolist(),
            y=data_gr["CI_LEFT"].tolist(),
            name="Lower CI Bound",
            marker={"color": "#23654D"},
            xaxis="x1",
            yaxis="y1",
        )
        line3 = go.Scatter(
            mode="markers",
            x=data_gr[self.scale_column].tolist(),
            y=data_gr["target_prc"].tolist(),
            name="Actual Default Probability",
            marker={"color": data_gr["color"].tolist(), "size": 24},
            xaxis="x1",
            yaxis="y1",
        )

        self.fig = go.Figure(data=[line1, line2, line3])
        self.fig.layout = self.custom_layout()

    def custom_layout(self) -> Optional[Dict[str, Any]]:
        return {
            "title": {"text": "<b>Binomial Test</b>", "x": 0.1, "y": 0.97},
            "legend": {"yanchor": "bottom", "y": 0.01, "xanchor": "left", "x": 1},
            "yaxis": {"title": "Default Probability", "side": "left"},
            "xaxis": {
                "title": "Rating Scale Tier",
                "side": "right",
                "type": "category",
                "domain": [0, 1],
            },
            "margin": {"t": 35, "b": 5, "l": 5, "r": 5},
        }

    def save(self, output_dir: str) -> dict[str, str] | None:
        self.fig.write_html(
            f"{output_dir}/data.html",
            config={"displaylogo": False},  # remove the plotly logo
        )
        return {f"scale_{self.scale_column}": f"{output_dir}/data.html"}

Result:

Example of Metric Code with Multiple Charts

The methods __call__ and save are declared and implemented.

In the save method, charts are created in a loop.

from typing import Any, Dict, Optional

import pandas as pd
import plotly.graph_objects as go


class cd_2_4_Density_Distr_features:
    """
    Density Distribution for Selected Columns
    Attributes:
        __desc__ (str): Description of the class.
        __tags__ (list[str]): List of tags associated with the class.
        is_scalar (bool): Whether the metric is scalar or not.
        is_signal (bool): Whether the metric has signal or not.

    """

    __desc__ = (
        "Density Distribution for Selected Columns. Density distribution for selected fields"
    )
    __tags__ = ["core", "data"]

    is_scalar = False
    is_signal = False

    def __init__(
        self,
        df: pd.DataFrame,
        field_columns: str,
        categorial_threshold: int = 10,
        split_charts: bool = False,
    ):
        self.df = df
        self.categorial_threshold = categorial_threshold
        self.split_charts = split_charts
        self.field_columns = [x.strip() for x in field_columns.split(",")]
        if self.df.empty:
            raise Exception("Dataframe is empty")
        for field in self.field_columns:
            if field not in self.df:
                raise ValueError(f"Field {field} does not exist in the dataframe")

    def __call__(self) -> None:
        self.df = self.df[self.field_columns]
        charts_dict = self.create_fields_charts()

        if self.split_charts:
            self.figs = {
                column_name: go.Figure(
                    data=[chart], layout=self.custom_layout(column_name=column_name)
                )
                for column_name, chart in charts_dict.items()
            }
        else:
            self.fig = go.Figure(data=list(charts_dict.values()), layout=self.custom_layout())

    def create_fields_charts(self):
        signal = {}
        self.min_x = 0
        self.max_x = 0
        counted_labels = []

        # loop through all columns in df
        for columnName, columnData in self.df.items():
            # if the data in the column is not numeric, skip it
            if not pd.api.types.is_numeric_dtype(columnData):
                print(f'Column "{columnName}" type is not numeric')
                continue

            counted_labels.append(columnName)
            visible_mode = (
                "legendonly" if columnName != counted_labels[0] and not self.split_charts else True
            )

            # if the data in the column is categorical, build a histogram
            if columnData.nunique() <= self.categorial_threshold:
                freq_df = (
                    columnData.value_counts(normalize=True, sort=False, dropna=True)
                    .reset_index()
                    .sort_values(columnName)
                )

                freq_df["percent"] = freq_df["proportion"] * 100
                if self.split_charts:
                    freq_df[columnName] = freq_df[columnName].astype("string")

                elem = go.Bar(
                    x=freq_df[columnName].tolist(),
                    y=freq_df["percent"].tolist(),
                    name=columnName,
                    opacity=0.7,
                    marker=dict(line=dict(color="black", width=1.0)),
                    visible=visible_mode,
                )
                signal[columnName] = elem
                continue

            # otherwise - linear density distribution graph
            vals = columnData.dropna().values
            nbucket = int(len(vals) / 10) + 1
            den_x = []
            den_y = []
            wgth = (max(vals) - min(vals)) / nbucket  # width of one interval
            minval = min(vals)
            self.max_x = max(max(vals), self.max_x)
            self.min_x = min(min(vals), self.min_x)
            self.max_pos = 0

            for i in range(0, nbucket):
                count = 0
                for j in vals:
                    if (minval + i * wgth) <= j < (minval + (i * wgth) + wgth):
                        count = count + 1
                den_x.append(round((minval + i * wgth + wgth / 2), 6))
                den_y.append(round(count * 100 / (len(vals)), 6))

            elem = go.Scatter(
                x=den_x,
                y=den_y,
                name=columnName,
                mode="lines",
                line_width=4,
                line_dash="solid",
                visible=visible_mode,
            )

            self.max_pos = max(max(den_y), self.max_pos)

            signal[columnName] = elem

        return signal

    def custom_layout(self, column_name: str | None = None) -> Optional[Dict[str, Any]]:
        column_info = (
            " of selected columns" if column_name is None else f" for column <b>{column_name}</b>"
        )
        return {
            "title": {"text": f"<b>Density Distribution</b>{column_info}", "x": 0.1, "y": 0.98},
            "legend": {"yanchor": "bottom", "y": 0.05, "xanchor": "right", "x": 1},
            "xaxis": {
                "title": "Value",
                "side": "left",
                "showgrid": True,
                "zeroline": True,
                "gridcolor": "#bdbdbd",
                "gridwidth": 1.5,
                "zerolinecolor": "#969696",
                "zerolinewidth": 3,
            },
            "yaxis": {
                "title": "Probability, %",
                "side": "left",
                "showgrid": True,
                "zeroline": True,
                "gridcolor": "#bdbdbd",
                "gridwidth": 1.5,
                "zerolinecolor": "#969696",
                "zerolinewidth": 3,
            },
            "margin": {"t": 45, "b": 5, "l": 5, "r": 5},
        }

    def save(self, output_dir: str) -> dict[str, str] | None:
        if self.split_charts:
            result = {}
            for column_name, fig in self.figs.items():
                file_path = f"{output_dir}/data_{column_name}.html"
                fig.write_html(
                    file_path,
                    config={"displaylogo": False},  # remove the plotly logo
                )

                result[column_name] = file_path
            return result
        else:
            self.fig.write_html(
                f"{output_dir}/data.html",
                config={"displaylogo": False},  # remove the plotly logo
            )

            return {"fig_name": f"{output_dir}/data.html"}

Result:

Example of Metric Code that Saves the Result as an Image

The methods __call__ and save are declared and implemented.

In the save method, the graphs are saved as images, not HTML files.

from typing import Any, Dict, Literal, Optional

import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt


class ROC_AUC_img:
    """
    ROC-AUC Value and ROC Curve Plot

    Attributes:
        __desc__ (str): Description of the class.
        __tags__ (list[str]): List of tags associated with the class.
        is_scalar (bool): Whether the metric is scalar or not.
        is_signal (bool): Whether the metric has signal or not.

    """

    __desc__ = "ROC Curve Plot, ROC-AUC Value"
    __tags__ = ["core", "classification", "scalar"]

    is_scalar = True
    is_signal = True

    def __init__(
        self,
        df: pd.DataFrame,
        predict_column: str,
        target_column: str,
        threshold_yellow: float = 0.75,
        threshold_red: float = 0.65,
    ):
        self.predict_column = predict_column
        self.target_column = target_column
        self.df = df.astype({self.predict_column: "float", self.target_column: "float"})
        self.threshold_yellow = threshold_yellow
        self.threshold_red = threshold_red

        if self.df.empty:
            raise Exception("Dataframe is empty")
        if self.target_column not in self.df:
            raise ValueError(f"Field {self.target_column} does not exist in the dataframe")
        if self.predict_column not in self.df:
            raise ValueError(f"Field {self.predict_column} does not exist in the dataframe")
        if self.predict_column == self.target_column:
            raise Exception("Error. Check the selection of columns for calculation")

    def __call__(self) -> None:
        temp = self.df.loc[:, [self.target_column, self.predict_column]].dropna()
        preds = temp[self.predict_column]
        y_test = temp[self.target_column]

        fpr, tpr, threshold = roc_curve(y_test, preds)
        fpr = np.around(fpr, decimals=4).tolist()
        tpr = np.around(tpr, decimals=4).tolist()
        base_roc = np.around(np.linspace(0, 1, 10), decimals=2).tolist()

        self.scalar_value = float(
            roc_auc_score(temp[self.target_column], temp[self.predict_column])
        )
        self.fig, ax = plt.subplots()
        ax.plot(fpr, tpr)
        ax.set(xlabel='False Positive Rate', ylabel='True Positive Rate', title='ROC Curve')
        ax.grid()

    def scalar(self) -> int | float:
        return self.scalar_value

    def signal(self) -> Literal["red", "yellow", "green"]:
        signal_light = "green"

        if self.scalar_value < self.threshold_red:
            signal_light = "red"
        elif self.scalar_value < self.threshold_yellow:
            signal_light = "yellow"

        return signal_light

    def save(self, output_dir: str) -> dict[str, str] | None:
        self.fig.savefig(f"{output_dir}/data.svg")
        return {"svg": f"{output_dir}/data.svg"}

Result: