Source code for rarity.features.feat_loss_clusters

# Copyright 2021 AI Singapore. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union, List, Dict
import math
import pandas as pd

import dash
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate
import dash_table
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc

from rarity.app import app
from rarity.data_loader import CSVDataLoader, DataframeLoader
from rarity.interpreters.structured_data import IntLossClusterer
from rarity.visualizers import loss_clusters as viz_clusters
from rarity.visualizers import shared_viz_component as viz_shared
from rarity.utils import style_configs
from rarity.utils.common_functions import (is_active_trace, is_reset, is_regression, is_classification,
                                            detected_legend_filtration, detected_single_xaxis, detected_single_yaxis,
                                            detected_bimodal, get_min_max_offset, get_min_max_cluster, get_effective_xaxis_cluster,
                                            get_adjusted_dfs_based_on_legend_filtration, conditional_sliced_df, insert_index_col,
                                            dataframe_prep_on_model_count_by_yaxis_slice, new_dataframe_prep_based_on_effective_index)


[docs]def fig_plot_offset_clusters_reg(data_loader: Union[CSVDataLoader, DataframeLoader], num_cluster: int):
    '''
    For use in regression task only.
    Function to output collated info packs used to display final graph objects by cluster groups along with calculated silhouette scores

    Arguments:
        data_loader (:class:`~rarity.data_loader.CSVDataLoader` or :class:`~rarity.data_loader.DataframeLoader`):
            Class object from data_loader module
        num_cluster (int):
            Number of cluster to form

    Returns:

             Compact outputs consist of the followings

            - df (:obj:`~pd.DataFrame`): dataframes for overview visualization need with offset values included
            - fig_obj_cluster (:obj:`~plotly.graph_objects.Figure`): figure displaying violin plot outlining cluster groups by offset values
            - ls_cluster_score (:obj:`List[str]`): list of silhouette scores, indication of clustering quality
            - fig_obj_elbow (:obj:`~plotly.graph_objects.Figure`): figure displaying line plot outlining the change in sum of squared distances \
                along the cluster range
    '''
    df, ls_cluster_score, ls_cluster_range, ls_ssd = IntLossClusterer(data_loader).xform(num_cluster, None, 'All')
    models = data_loader.get_model_list()
    analysis_type = data_loader.get_analysis_type()

    fig_obj_cluster = viz_clusters.plot_offset_clusters(df, analysis_type)
    fig_obj_elbow = viz_clusters.plot_optimum_cluster_via_elbow_method(ls_cluster_range, ls_ssd, models)
    return df, fig_obj_cluster, ls_cluster_score, fig_obj_elbow


[docs]def fig_plot_logloss_clusters_cls(data_loader: Union[CSVDataLoader, DataframeLoader],
                                    num_cluster: int,
                                    log_func: math.log = math.log,
                                    specific_dataset: str = 'All'):
    '''
    For use in classification task only.
    Function to output collated info packs used to display final graph objects by cluster groups along with calculated silhouette scores

    Arguments:
        data_loader (:class:`~rarity.data_loader.CSVDataLoader` or :class:`~rarity.data_loader.DataframeLoader`):
            Class object from data_loader module
        num_cluster (int):
            Number of cluster to form
        log_funct (:obj:`math.log`):
            Mathematics logarithm function used to calculate log-loss between yTrue and yPred
        specific_dataset (str):
            Default to 'All' indicating to include all miss-predict labels. Other options flexibly expand depending on class labels

    Returns:

             Compact outputs consist of the followings

            - ls_dfs_viz (:obj:`List[~pd.DataFrame]`): dataframes for overview visualization need with offset values included
            - fig_obj_cluster (:obj:`~plotly.graph_objects.Figure`): figure displaying violin plot outlining cluster groups by offset values
            - ls_cluster_score (:obj:`List[str]`): list of silhouette scores, indication of clustering quality
            - fig_obj_elbow (:obj:`~plotly.graph_objects.Figure`): figure displaying line plot outlining the change in sum of squared distances \
                along the cluster range
            - ls_class_labels (:obj:`List[str]`): list of all class labels
            - ls_class_labels_misspred (:obj:`List[str]`): list of class labels with minimum of 1 miss-prediction
            - df_features (:obj:`~pandas.DataFrame`): dataframe storing all features used in dataset
    '''
    compact_outputs = IntLossClusterer(data_loader).xform(num_cluster, log_func, specific_dataset)
    ls_dfs_viz, ls_class_labels, ls_class_labels_misspred = compact_outputs[0], compact_outputs[1], compact_outputs[2]
    ls_cluster_score, ls_cluster_range, ls_ssd = compact_outputs[3], compact_outputs[4], compact_outputs[5]
    df_features = data_loader.get_features()
    analysis_type = data_loader.get_analysis_type()
    models = data_loader.get_model_list()

    fig_obj_cluster = viz_clusters.plot_logloss_clusters(ls_dfs_viz, analysis_type)
    fig_obj_elbow = viz_clusters.plot_optimum_cluster_via_elbow_method(ls_cluster_range, ls_ssd, models)
    return ls_dfs_viz, fig_obj_cluster, ls_cluster_score, fig_obj_elbow, ls_class_labels, ls_class_labels_misspred, df_features


[docs]def table_with_relayout_datapoints(data: dash_table.DataTable, customized_cols: List[str], header: Dict, exp_format: str):
    '''
    Create table outlining dataframe content

    Arguments:
        data (:obj:`~dash_table.DataTable`):
            dictionary like format storing dataframe info under 'record' key
        customized_cols (:obj:`List[str]`):
            list of customized column names
        header (:obj:`Dict`):
            dictionary format storing the style info for table header
        exp_format (str):
            text info indicating the export format

    Returns:
        :obj:`~dash_table.DataTable`:
            table object outlining the dataframe content with specific styles
    '''
    tab_obj = viz_shared.reponsive_table_to_filtered_datapoints(data, customized_cols, header, exp_format)
    return tab_obj


[docs]def convert_cluster_relayout_data_to_df_reg(relayout_data: Dict, df: pd.DataFrame, models: List[str]):
    '''
    For use in regression task only.
    Convert raw data format from relayout selection range by user into the correct df fit for viz purpose

    Arguments:
        relayout_data (:obj:`Dict`):
            dictionary like data containing selection range indices returned from plotly graph
        df (:obj:`~pandas.DataFrame`):
            dataframe tap-out from interpreters pipeline
        models (:obj:`List[str]`):
            model names defined by user during spin-up of Tenjin app

    Returns:
        :obj:`~pandas.DataFrame`:
            dataframe fit for the responsive table-graph filtering
    '''
    if detected_single_xaxis(relayout_data):
        x_cluster = get_effective_xaxis_cluster(relayout_data)
        df_filtered_x = df[df[f'cluster_{models[0]}'] == x_cluster]

        if detected_bimodal(models):
            df_filtered_x_m2 = df[df[f'cluster_{models[1]}'] == x_cluster]
            df_filtered_x = pd.concat([df_filtered_x, df_filtered_x_m2]).drop_duplicates()

        y_start_idx, y_stop_idx = get_min_max_offset(df_filtered_x, models)
        df_final = dataframe_prep_on_model_count_by_yaxis_slice(df_filtered_x, models, y_start_idx, y_stop_idx)

    elif detected_single_yaxis(relayout_data):
        y_start_idx = relayout_data['yaxis.range[0]']
        y_stop_idx = relayout_data['yaxis.range[1]']
        df_filtered_y = dataframe_prep_on_model_count_by_yaxis_slice(df, models, y_start_idx, y_stop_idx)

        x_start_idx, x_stop_idx = get_min_max_cluster(df_filtered_y, models, y_start_idx, y_stop_idx)
        x_start_idx = x_start_idx if x_start_idx >= 1 else 1
        x_stop_idx = x_stop_idx if x_stop_idx <= 8 else 8

        condition_min_cluster = df_filtered_y[f'cluster_{models[0]}'] >= x_start_idx
        condition_max_cluster = df_filtered_y[f'cluster_{models[0]}'] <= x_stop_idx
        df_final = conditional_sliced_df(df_filtered_y, condition_min_cluster, condition_max_cluster)

        if detected_bimodal(models):
            condition_min_cluster_m2 = df_filtered_y[f'cluster_{models[1]}'] >= x_start_idx
            condition_max_cluster_m2 = df_filtered_y[f'cluster_{models[1]}'] <= x_stop_idx
            df_final_m2 = conditional_sliced_df(df_filtered_y, condition_min_cluster_m2, condition_max_cluster_m2)
            df_final = pd.concat([df_final, df_final_m2]).drop_duplicates()

    else:  # a complete range is provided by user (with proper x-y coordinates)
        x_cluster = get_effective_xaxis_cluster(relayout_data)
        y_start_idx = relayout_data['yaxis.range[0]']
        y_stop_idx = relayout_data['yaxis.range[1]']
        df_filtered = df[df[f'cluster_{models[0]}'] == x_cluster]

        if detected_bimodal(models):
            df_filtered_m2 = df[df[f'cluster_{models[1]}'] == x_cluster]
            df_filtered = pd.concat([df_filtered, df_filtered_m2]).drop_duplicates()

        df_final = dataframe_prep_on_model_count_by_yaxis_slice(df_filtered, models, y_start_idx, y_stop_idx)
    return df_final


[docs]def convert_cluster_relayout_data_to_df_cls(relayout_data: Dict, dfs_viz: List[pd.DataFrame], df_features: pd.DataFrame, models: List[str]):
    '''
    For use in classification task only.
    Convert raw data format from relayout selection range by user into the correct df fit for viz purpose

    Arguments:
        relayout_data (:obj:`Dict`):
            dictionary like data containing selection range indices returned from plotly graph
        dfs_viz (:obj:`List[~pd.DataFrame]`):
            list of dataframes for overview visualization need with offset values included
        df_features (:obj:`~pandas.DataFrame`):
            dataframe storing all features used in dataset
        models (:obj:`List[str]`):
            model names defined by user during spin-up of Tenjin app

    Returns:

             Compact outputs consist of the followings

            - df_final_features (:obj:`~pd.DataFrame`): dataframe storing all features based on slicing info from relayout_data
            - df_final_probs (:obj:`~pd.DataFrame`): dataframe storing probability values by class label corresponding to \
                the slicing relayout_data
    '''
    if detected_single_xaxis(relayout_data):
        x_cluster = get_effective_xaxis_cluster(relayout_data)
        df_final_probs = dfs_viz[0][dfs_viz[0]['cluster'] == x_cluster]

        if detected_bimodal(models):
            df_final_probs_m2 = dfs_viz[1][dfs_viz[1]['cluster'] == x_cluster]
            df_final_probs = pd.concat([df_final_probs, df_final_probs_m2]).drop_duplicates()
            df_final_probs = df_final_probs.sort_values('index')  # so that index of different models will appear together row-row

        df_final_features = new_dataframe_prep_based_on_effective_index(df_features, df_final_probs)

    elif detected_single_yaxis(relayout_data):
        y_start_idx = relayout_data['yaxis.range[0]']
        y_stop_idx = relayout_data['yaxis.range[1]']

        condition_min_loss = dfs_viz[0]['lloss'] >= y_start_idx
        condition_max_loss = dfs_viz[0]['lloss'] <= y_stop_idx
        df_final_probs = conditional_sliced_df(dfs_viz[0], condition_min_loss, condition_max_loss)

        if detected_bimodal(models):
            condition_min_loss_m2 = dfs_viz[1]['lloss'] >= y_start_idx
            condition_max_loss_m2 = dfs_viz[1]['lloss'] <= y_stop_idx
            df_final_probs_m2 = conditional_sliced_df(dfs_viz[1], condition_min_loss_m2, condition_max_loss_m2)

            df_final_probs = pd.concat([df_final_probs, df_final_probs_m2]).drop_duplicates()
            df_final_probs = df_final_probs.sort_values('index')  # so that index of different models will appear together row-row

        df_final_features = new_dataframe_prep_based_on_effective_index(df_features, df_final_probs)

    else:  
        '''
        detected_single_xaxis or a complete range is provided by user (with proper x-y coordinates) 
        will have same results due to the setup of dfs_viz for cls (loss values are tight to cluster group)
        '''
        x_cluster = get_effective_xaxis_cluster(relayout_data)
        df_filtered_x = dfs_viz[0][dfs_viz[0]['cluster'] == x_cluster]

        y_start_idx = relayout_data['yaxis.range[0]']
        y_stop_idx = relayout_data['yaxis.range[1]']
        condition_min_loss = df_filtered_x['lloss'] >= y_start_idx
        condition_max_loss = df_filtered_x['lloss'] <= y_stop_idx
        df_final_probs = conditional_sliced_df(df_filtered_x, condition_min_loss, condition_max_loss)

        if detected_bimodal(models):
            df_filtered_x_m2 = dfs_viz[1][dfs_viz[1]['cluster'] == x_cluster]
            condition_min_loss_m2 = df_filtered_x_m2['lloss'] >= y_start_idx
            condition_max_loss_m2 = df_filtered_x_m2['lloss'] <= y_stop_idx
            df_final_probs_m2 = conditional_sliced_df(df_filtered_x_m2, condition_min_loss_m2, condition_max_loss_m2)

            df_final_probs = pd.concat([df_final_probs, df_final_probs_m2]).drop_duplicates()
            df_final_probs = df_final_probs.sort_values('index')  # so that index of different models will appear together row-row

        df_final_features = new_dataframe_prep_based_on_effective_index(df_features, df_final_probs)
    return df_final_features, df_final_probs


def _display_score(ls_cluster_score: List[float], models: List[str]):
    '''
    Internal function to tap-out text field for silhouette score
    '''
    score_text = f'Silhouette score: {ls_cluster_score[0]}'
    if detected_bimodal(models):
        score_text = f'Silhouette score: {ls_cluster_score[0]} [ {models[0]} ] ' \
                    f'{ls_cluster_score[1]} [ {models[1]} ]'
    return score_text


[docs]class LossClusters:
    '''
    Main integration for feature component on Loss Clusters.

    Arguments:
        data_loader (:class:`~rarity.data_loader.CSVDataLoader` or :class:`~rarity.data_loader.DataframeLoader`):
            Class object from data_loader module

    Important Attributes:

        analysis_type (str):
            Analysis type defined by user during initial inputs preparation via data_loader stage.
        model_names (:obj:`List[str]`):
            model names defined by user during initial inputs preparation via data_loader stage.
        is_bimodal (bool):
            to indicate if analysis involves 2 models
        num_clusters (int):
            Number of cluster to form
        log_funct (:obj:`math.log`):
            Mathematics logarithm function used to calculate log-loss between yTrue and yPred
        specific_dataset (str):
            Default to 'All' indicating to include all miss-predict labels. Other options flexibly expand depending on class labels

    Returns:
        :obj:`~dash_core_components.Container`:
            styled dash components displaying graph and/or table objects
    '''
    def __init__(self, data_loader: Union[CSVDataLoader, DataframeLoader]):
        self.data_loader = data_loader
        self.analysis_type = data_loader.get_analysis_type()
        self.model_names = data_loader.get_model_list()
        self.is_bimodal = True if len(self.model_names) > 1 else False
        self.num_cluster = 4

        # instantiate at this stage due to shared use with callbacks
        if is_regression(self.analysis_type):
            self.compact_outputs_reg = fig_plot_offset_clusters_reg(self.data_loader, self.num_cluster)
            self.df, self.offset_clusters_reg = self.compact_outputs_reg[0], self.compact_outputs_reg[1]
            self.ls_cluster_score, self.optimum_elbow_reg = self.compact_outputs_reg[2], self.compact_outputs_reg[3]
            self.cols_table_reg = [col.replace('_', ' ') for col in self.df.columns]
            self.score_text = _display_score(self.ls_cluster_score, self.model_names)

        elif is_classification(self.analysis_type):
            self.log_func = math.log
            self.specific_dataset = 'All'

            self.compact_outputs_cls = fig_plot_logloss_clusters_cls(self.data_loader, self.num_cluster, self.log_func, self.specific_dataset)
            self.ls_dfs_viz, self.lloss_clusters_cls, = self.compact_outputs_cls[0], self.compact_outputs_cls[1]
            self.ls_cluster_score, self.optimum_elbow_cls = self.compact_outputs_cls[2], self.compact_outputs_cls[3]
            self.ls_class_labels, self.ls_class_labels_misspred = self.compact_outputs_cls[4], self.compact_outputs_cls[5]
            self.score_text = _display_score(self.ls_cluster_score, self.model_names)

    def show(self):
        '''
        Method to tapout styled html for loss clusters
        '''
        if is_regression(self.analysis_type):
            lloss_clusters = dbc.Container([
                                    dbc.Row(html.Div(
                                        html.H5('Optimum Cluster via Elbow Method', className='h5__cluster-section-title'))),
                                    dbc.Row(
                                        dcc.Graph(id='fig-optimum-cluster-reg',
                                                figure=self.optimum_elbow_reg,),
                                        justify='center', className='border__optimum-cluster'),
                                    dbc.Row(html.H5('Log-Loss Clustering via KMean', className='h5__cluster-section-title')),
                                    dbc.Row([
                                            dbc.Col([
                                                dbc.Row(html.Div(html.H6('Select No. of Cluster'), className='h6__cluster-instruction')),
                                                dbc.Row(dbc.Select(id='select-num-cluster-reg',
                                                            options=style_configs.OPTIONS_NO_OF_CLUSTERS,
                                                            value='4'), className='params__select-cluster')
                                            ], width=6),
                                            dbc.Col(width=4),
                                            dbc.Col(
                                                dbc.Row(dcc.Loading(id='loading-output-loss-cluster-reg',
                                                                    type='circle', color='#a80202'),
                                                        justify='right', className='loading__loss-cluster'), width=1),
                                            dbc.Col(
                                                dbc.Row(dbc.Button("Update",
                                                                    id='button-num-cluster-update-reg',
                                                                    n_clicks=0,
                                                                    color="info", 
                                                                    className='button__update-dataset'),
                                                        justify='right'))], className='border__select-dataset'),
                                    dbc.Row(dbc.Col(dbc.Row(
                                        html.Div(self.score_text,
                                                id='text-score-cluster-reg',
                                                className='text__score-cluster-reg'), justify='right'))),
                                    dbc.Row(
                                        dcc.Graph(id='fig-loss-cluster-reg',
                                                    figure=self.offset_clusters_reg),
                                        justify='center', className='border__common-cluster-plot-reg'),

                                    html.Div(html.H6(style_configs.INSTRUCTION_TEXT_SHARED), className='h6__dash-table-instruction-cluster-reg'),
                                    html.Div(id='alert-to-reset-cluster-reg'),
                                    html.Div(id='table-feat-prob-cluster-reg', className='div__table-proba-misspred'),
                                    html.Br()], fluid=True)
            return lloss_clusters

        elif is_classification(self.analysis_type):
            options_misspred_dataset = [{'label': 'All', 'value': 'All'}] + \
                                        [{'label': f'class {label}', 'value': f'class {label}'} for label in self.ls_class_labels_misspred]

            lloss_clusters = dbc.Container([
                                    dbc.Row(html.H5('Loss Cluster Analysis for ALL Miss Predictions',
                                        id='title-after-misspred-dataset-selection-cls',
                                        className='h5__cluster-section-title')),
                                    dbc.Row([
                                        dbc.Col([
                                            dbc.Row(html.Div(html.H6('Select Miss-Predict Dataset'), className='h6__cluster-instruction')),
                                            dbc.Row(dbc.Select(id='select-misspred-dataset-cls',
                                                        options=options_misspred_dataset,
                                                        value='All'), className='params__select-cluster')
                                        ], width=4),
                                        dbc.Col(width=6),
                                        dbc.Col(
                                                dbc.Row(dcc.Loading(id='loading-output-misspred-dataset-cls',
                                                                    type='circle', color='#a80202'),
                                                        justify='right', className='loading__loss-cluster'), width=1),
                                        dbc.Col(
                                            dbc.Row(dbc.Button("Update",
                                                                id='button-misspred-dataset-update-cls',
                                                                n_clicks=0,
                                                                color="info",
                                                                className='button__update-dataset'),
                                                    justify='right'))], className='border__select-dataset'),
                                    html.Div(id='alert-clustering-error-cls'),
                                    dbc.Row(dcc.Graph(id='fig-cls-optimum-cluster', figure=self.optimum_elbow_cls,),
                                        justify='center', className='border__optimum-cluster'),
                                    dbc.Row(html.H5('Log-Loss Clustering via KMean on ALL Miss Predictions',
                                                    id='title-after-losscluster-params-selection-cls',
                                                    className='h5__cluster-section-title-kmean')),
                                    dbc.Row([
                                        dbc.Col([
                                            dbc.Row(html.Div(html.H6('Select No. of Cluster'), className='h6__cluster-instruction')),
                                            dbc.Row(dbc.Select(id='select-num-cluster-cls',
                                                        options=style_configs.OPTIONS_NO_OF_CLUSTERS,
                                                        value='4'), className='params__select-cluster')
                                        ], width=4),
                                        dbc.Col([
                                            dbc.Row(html.Div(html.H6('Select Logarithm Method'), className='h6__cluster-instruction')),
                                            dbc.Row(dbc.Select(id='select-log-method-cls',
                                                        options=[{'label': 'LOG', 'value': 'log'},
                                                                {'label': 'LOG1P', 'value': 'log1p'},
                                                                {'label': 'LOG2', 'value': 'log2'},
                                                                {'label': 'LOG10', 'value': 'log10'}],
                                                        value='log'), className='params__select-cluster')
                                        ], width=4),
                                        dbc.Col(width=2),
                                        dbc.Col(
                                                dbc.Row(dcc.Loading(id='loading-output-loss-cluster-cls',
                                                                    type='circle', color='#a80202'),
                                                        justify='right', className='loading__loss-cluster'), width=1),
                                        dbc.Col(
                                            dbc.Row(dbc.Button("Update",
                                                                id='button-logloss-update-cls',
                                                                n_clicks=0,
                                                                color="info",
                                                                className='button__update-dataset'),
                                                    justify='right'))], className='border__select-dataset'),
                                    dbc.Row(dbc.Col(dbc.Row(
                                        html.Div(self.score_text,
                                                id='text-score-cluster-cls',
                                                className='text__score-cluster-cls'), justify='right'))),
                                    dbc.Row(
                                        dcc.Graph(id='fig-loss-cluster-cls',
                                                    figure=self.lloss_clusters_cls,),
                                        justify='center', className='border__common-cluster-plot-cls'),

                                    # data-table, appeared only after data range selection on fig-loss-cluster-cls by user
                                    html.Div(html.H6(style_configs.INSTRUCTION_TEXT_SHARED), className='h6__dash-table-instruction-cls'),
                                    html.Div(id='alert-to-reset-loss-cluster-cls'),
                                    html.Div(id='table-title-features-loss-cluster'),
                                    html.Div(id='show-feat-table-loss-cluster', className='div__table-proba-misspred'),
                                    html.Br(),
                                    html.Div(id='table-title-probs-loss-cluster'),
                                    html.Div(id='show-prob-table-loss-cluster', className='div__table-proba-misspred'),
                                    html.Br()], fluid=True)
            return lloss_clusters

    def callbacks(self):
        @app.callback(
            Output('loading-output-loss-cluster-reg', 'children'),
            Output('text-score-cluster-reg', 'children'),
            Output('fig-loss-cluster-reg', 'figure'),
            Input('button-num-cluster-update-reg', 'n_clicks'),
            State('select-num-cluster-reg', 'value'))
        def update_fig_based_on_selected_num_cluster(click_count, selected_no_cluster):
            '''
            Callbacks functionalities specific to param - select no. of clusters [ regression ]
            '''
            if click_count > 0:
                _, fig_obj_cluster_reg, ls_cluster_score_reg, _ = fig_plot_offset_clusters_reg(self.data_loader, int(selected_no_cluster))
                score_text_reg = _display_score(ls_cluster_score_reg, self.model_names)
                return '', score_text_reg, fig_obj_cluster_reg
            else:
                raise PreventUpdate

        @app.callback(
            Output('alert-to-reset-cluster-reg', 'children'),
            Output('table-feat-prob-cluster-reg', 'children'),
            Input('fig-loss-cluster-reg', 'relayoutData'),
            Input('fig-loss-cluster-reg', 'restyleData'),
            State('select-num-cluster-reg', 'value'))
        def display_table_based_on_selected_range_reg(relayout_data, restyle_data, selected_no_cluster):
            '''
            Callbacks functionalities specific to reponse from fig-obj to data-table [ regression ]
            '''
            if relayout_data is not None:
                df_usr_select_cluster, _, _, _ = fig_plot_offset_clusters_reg(self.data_loader, int(selected_no_cluster))
                try:
                    # to limit table cell having values with long decimals for better viz purpose
                    df_usr_select_cluster = df_usr_select_cluster.round(2)
                except TypeError:
                    df_usr_select_cluster

                if is_reset(relayout_data):
                    alert_obj_reg = None
                    table_obj_reg = None

                elif is_active_trace(relayout_data):
                    models = self.model_names
                    if restyle_data is not None:  # [{'visible': ['legendonly']}, [1]]
                        if detected_legend_filtration(restyle_data):
                            model_to_exclude_from_view = self.model_names[restyle_data[1][0]]
                            models = [model for model in self.model_names if model != model_to_exclude_from_view]

                    default_header = style_configs.default_header_style()
                    alert_obj_reg = style_configs.activate_alert()

                    df_final = convert_cluster_relayout_data_to_df_reg(relayout_data, df_usr_select_cluster, models)
                    df_final.columns = self.cols_table_reg  # to have customized column names displayed on table

                    data_relayout_reg = df_final.to_dict('records')
                    table_obj_reg = table_with_relayout_datapoints(data_relayout_reg, self.cols_table_reg, default_header, 'csv')
                return alert_obj_reg, table_obj_reg
            else:
                raise PreventUpdate

        @app.callback(
            Output('loading-output-misspred-dataset-cls', 'children'),
            Output('loading-output-loss-cluster-cls', 'children'),
            Output('alert-clustering-error-cls', 'children'),
            Output('title-after-misspred-dataset-selection-cls', 'children'),
            Output('fig-cls-optimum-cluster', 'figure'),
            Output('title-after-losscluster-params-selection-cls', 'children'),
            Output('text-score-cluster-cls', 'children'),
            Output('fig-loss-cluster-cls', 'figure'),
            Input('button-misspred-dataset-update-cls', 'n_clicks'),
            Input('button-logloss-update-cls', 'n_clicks'),
            State('select-misspred-dataset-cls', 'value'),
            State('select-num-cluster-cls', 'value'),
            State('select-log-method-cls', 'value'))
        def update_loss_cluster_tab_based_on_selected_misspred_dataset(click_count_dataset,
                                                                        click_count_params,
                                                                        selected_dataset,
                                                                        selected_cluster,
                                                                        selected_method):
            '''
            Callbacks functionalities specific to all params selection [ classification ]
            '''
            ctx = dash.callback_context
            triggered_button = ctx.triggered[0]['prop_id'].split('.')[0]
            triggered_button_value = ctx.triggered[0]['value']

            current_dataset_name = ctx.states['select-misspred-dataset-cls.value']
            specific_dataset = current_dataset_name.replace('class ', '') if 'class' in current_dataset_name else current_dataset_name
            cluster_err_alert = style_configs.no_error_alert()

            # for click action on dataset selection
            if (triggered_button == 'button-misspred-dataset-update-cls') and (triggered_button_value > 0):
                title_aft_misspred_dataset = f'Loss Cluster Analysis for {selected_dataset.capitalize()} Miss Predictions'
                title_aft_params = f'Log-Loss Clustering via KMean on {selected_dataset.capitalize()} Miss Predictions'

                # pre-requisite to check if dataset is valid with sufficient data-points for auto-clustering
                ls_dfs_prob_misspred = IntLossClusterer(self.data_loader).extract_misspredictions()
                if specific_dataset != 'All' and any(len(df[df['yPred-label'] == specific_dataset]) < 8 for df in ls_dfs_prob_misspred):
                    cluster_err_alert = style_configs.activate_cluster_error_alert(specific_dataset)
                    return dash.no_update, dash.no_update, cluster_err_alert, dash.no_update, \
                            dash.no_update, dash.no_update, dash.no_update, dash.no_update

                outputs_callback_dataset = fig_plot_logloss_clusters_cls(self.data_loader,
                                                                        num_cluster=int(selected_cluster),
                                                                        log_func=style_configs.LOG_METHOD_DICT[selected_method],
                                                                        specific_dataset=specific_dataset)

                fig_obj_cluster_cls, ls_cluster_score_cls = outputs_callback_dataset[1], outputs_callback_dataset[2]
                fig_obj_elbow_cls = outputs_callback_dataset[3]
                text_score_cls = _display_score(ls_cluster_score_cls, self.model_names)
                return '', dash.no_update, cluster_err_alert, title_aft_misspred_dataset, fig_obj_elbow_cls, \
                        title_aft_params, text_score_cls, fig_obj_cluster_cls

            # for click action on num_cluster and log_method selection
            elif (triggered_button == 'button-logloss-update-cls') and (triggered_button_value > 0):
                outputs_callback_params = fig_plot_logloss_clusters_cls(self.data_loader,
                                                                        num_cluster=int(selected_cluster),
                                                                        log_func=style_configs.LOG_METHOD_DICT[selected_method],
                                                                        specific_dataset=specific_dataset)

                fig_obj_cluster_cls_params, ls_cluster_score_cls_params = outputs_callback_params[1], outputs_callback_params[2]
                text_score_cls_params = _display_score(ls_cluster_score_cls_params, self.model_names)
                return dash.no_update, '', cluster_err_alert, dash.no_update, dash.no_update, dash.no_update, \
                        text_score_cls_params, fig_obj_cluster_cls_params

            else:
                raise PreventUpdate

        @app.callback(
            Output('alert-to-reset-loss-cluster-cls', 'children'),
            Output('table-title-features-loss-cluster', 'children'),
            Output('show-feat-table-loss-cluster', 'children'),
            Output('table-title-probs-loss-cluster', 'children'),
            Output('show-prob-table-loss-cluster', 'children'),
            Input('fig-loss-cluster-cls', 'relayoutData'),
            Input('fig-loss-cluster-cls', 'restyleData'),
            State('select-misspred-dataset-cls', 'value'),
            State('select-num-cluster-cls', 'value'),
            State('select-log-method-cls', 'value'))
        def display_table_based_on_selected_range_cls(relayout_data, restyle_data, selected_dataset, selected_cluster, selected_method):
            '''
            Callbacks functionalities specific to reponse from fig-obj to data-table [ classification ]
            '''
            default_title = style_configs.DEFAULT_TITLE_STYLE
            title_table_features_cls = html.H6('Feature Values :', style=default_title, className='title__table-misspred-cls')
            title_table_probs_cls = html.H6('Probabilities Overview :', style=default_title, className='title__table-misspred-cls')

            if relayout_data is not None:
                specific_dataset = selected_dataset.replace('class ', '') if 'class' in selected_dataset else selected_dataset
                outputs_callback_fig_action = fig_plot_logloss_clusters_cls(self.data_loader,
                                                                        num_cluster=int(selected_cluster),
                                                                        log_func=style_configs.LOG_METHOD_DICT[selected_method],
                                                                        specific_dataset=specific_dataset)
                dfs_viz, df_features = outputs_callback_fig_action[0], outputs_callback_fig_action[6]

                try:
                    df_features = df_features.round(2)  # limit long decimals on feature values
                    dfs_viz[0] = dfs_viz[0].round(4)  # standardize prob values to 4 decimals
                    if self.is_bimodal:
                        dfs_viz[1] = dfs_viz[1].round(4)
                except TypeError:
                    df_features
                    dfs_viz

                df_features = insert_index_col(df_features)
                dfs_viz = [insert_index_col(df) for df in dfs_viz]

                if is_reset(relayout_data):
                    alert_obj_cls = None
                    title_table_features_cls = None
                    table_obj_features_cls = None
                    title_table_probs_cls = None
                    table_obj_probs_cls = None

                elif is_active_trace(relayout_data):
                    models = self.model_names
                    if restyle_data is not None:  # [{'visible': ['legendonly']}, [1]]
                        if detected_legend_filtration(restyle_data):
                            model_to_exclude_from_view = self.model_names[restyle_data[1][0]]
                            models = [model for model in self.model_names if model != model_to_exclude_from_view]

                    default_header = style_configs.default_header_style()
                    alert_obj_cls = style_configs.activate_alert()

                    # dfs_viz adjusted to the correct df according to the filtered model following click action on legend
                    dfs_viz_adjusted = get_adjusted_dfs_based_on_legend_filtration(dfs_viz, models)
                    df_final_features, df_final_probs = convert_cluster_relayout_data_to_df_cls(relayout_data,
                                                                                                dfs_viz_adjusted,
                                                                                                df_features,
                                                                                                models)

                    data_relayout_features_cls = df_final_features.to_dict('records')
                    data_relayout_prob_cls = df_final_probs.to_dict('recorfs')
                    table_obj_features_cls = table_with_relayout_datapoints(data_relayout_features_cls,
                                                                            df_final_features.columns,
                                                                            default_header,
                                                                            'csv')
                    table_obj_probs_cls = table_with_relayout_datapoints(data_relayout_prob_cls,
                                                                        df_final_probs.columns,
                                                                        default_header,
                                                                        'csv')
                return alert_obj_cls, title_table_features_cls, table_obj_features_cls, title_table_probs_cls, table_obj_probs_cls
            else:
                raise PreventUpdate