Creating Analysis Class

Creating Analysis Class#

We’ll establish labels and category ordering for our visualizations, then integrate these specifications into our dataframes.

base_labels = dict(
    # df_orders
    year = 'Year'
    , purchase_year = 'Year'
    , purchase_month = 'Month'
    , purchase_season = 'Season'
    , purchase_weekday = 'Day of Week'
    , purchase_day_type = 'Day Type'
    , purchase_time_of_day = 'Time of Day'
    , purchase_hour = 'Hour'
    , order_status = 'Order Status'
    , is_delayed = 'Delivery Delay Status'
    , is_canceled = 'Order Cancellation Status'
    , is_delivered = 'Delivery Status'
    , delivery_time_days_cat = 'Delivery Time Category'
    , delivery_issue_reason = 'Delivery Issue Reason'
    , is_purchase = 'Purchase Status'
        # from df_paymetns
    , order_has_installment = 'Installment Status'
    , order_total_payment_cat = 'Order Payment Category'
    , order_payment_types = 'Order Payment Types'
        # from df_items and df_products
    , order_is_free_shipping = 'Free Shipping Status'
    , order_general_product_categories = 'General Product Categories'
    , order_product_categories = 'Product Categories'
    , order_total_weight_cat = 'Order Weight Category'
    , order_total_volume_cat = 'Order Volume Category'
        # from df_reviews
    , order_avg_reviews_score = 'Order Review Score'
    , order_review_sentiment = 'Order Review Sentiment'
    # df_sales
    , sale_is_customer_first_purchase = 'First-Time Purchase'
    , sale_is_customer_first_purchase_month = 'First Purchase Month'
    # df_customers
    , customer_state = 'Customer State'
    , customer_city = 'Customer City'
    , customer_top_purchase_weekdays = 'Top Purchase Weekdays'
    , customer_payment_types = 'Payment Methods'
    , customer_top_product_categories = 'Top Product Categories'
    , customer_top_general_product_categories = 'Top General Product Categories'       
    , activity_segment = 'Activity Segment'
    , value_segment = 'Value Segment'
    , purchase_freq_segment = 'Purchase Frequency Segment'
    , repeat_segment = 'Repeat Segment'
    , loyalty_segment = 'Loyalty Segment'
    , risk_segment = 'Risk Segment'
    , weekday_segment = 'Weekday Segment'
    , installment_segment = 'Installment Segment'
    , products_cnt_segment = 'Products Count Segment'
    , weight_segment = 'Weight Segment'    
    # df_payments
    , has_installments = 'Installment Status'
    , payment_type = 'Payment Type'
    # df_products
    , general_product_category = 'General Product Category'
    , product_category = 'Product Category'
    # df_review
    , review_score = 'Review Score'
    , season_review = 'Season'
    , review_day_type = 'Day Type'
    , review_creation_weekday = 'Day of Week'
    # df_sellers
    , seller_state = 'Seller State'
    , seller_city = 'Seller City'
    # for all tables
    , day_of_month = 'Day of Month'
)
base_category_orders = dict(
    is_purchase = ['Purchase', 'Not Purchase']
    , purchase_year = ['2017', '2018']
    , purchase_season = ['Spring', 'Summer', 'Autumn', 'Winter']
    , purchase_month = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
    , purchase_weekday = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    , purchase_day_type = ['Weekday', 'Weekend']
    , purchase_time_of_day = ['Morning', 'Afternoon', 'Evening', 'Night']
    , purchase_hour = list(map(str, range(24)))
    , season_review = ['Spring', 'Summer', 'Autumn', 'Winter']
    , review_day_type = ['Weekday', 'Weekend']
    , review_creation_weekday = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']    
    , order_avg_reviews_score=list(map(str, range(1, 6)))
    , review_score=list(map(str, range(1, 6)))
    , order_review_sentiment = ['Positive', 'Neutral', 'Negative']
    , is_delayed = ['Delayed', 'Not Delayed']
    , is_delivered = ['Delivered', 'Not Delivered']
    , order_status = ['Delivered', 'Shipped', 'Processing', 'Unavailable', 'Canceled', 'Invoiced', 'Approved', 'Created']
    , delivery_issue_reason = ['Service Issue', 'Customer Issue', 'No Issues']
    , delivery_time_days_cat = ['Fast', 'Medium', 'Long']
    , order_total_payment_cat =  ['Cheap', 'Medium', 'Expensive']
    , order_total_weight_cat = ['Light', 'Medium', 'Heavy']
    , order_total_volume_cat = ['Small', 'Medium', 'Large']
    , order_has_installment = ['Has Installments', 'No Installments']
    , has_installments = ['Has Installments', 'No Installments']
    , order_is_free_shipping = ['Free Shipping', 'Paid Shipping'] 
    , day_of_month = list(map(str, range(31)))
    # segments
    , value_segment = ['Low', 'Medium', 'High']
    , activity_segment = ['Core', 'Potential Core', 'Short-Lived Repeat', 'One Time', 'Never Converted']
    , purchase_freq_segment = ['Weekly', 'Monthly', 'Quarterly', 'Semiannual', 'Annual']
    , repeat_segment = ['Fast Repeat', 'Medium Repeat', 'Slow Repeat']
    , loyalty_segment = ['Critic', 'Neutral', 'Promoter']
    , risk_segment = ['Reliable', 'Risky']
    , weekday_segment = ['Weekday', 'Weekend']
    , installment_segment = ['Full Pay', 'Installment']
    , products_cnt_segment = ['Single Product', 'Multi Product', 'Bulk Buyer']
    , weight_segment = ['Light', 'Medium', 'Heavy']
)
for df in [df_orders, df_sales, df_items, df_customers, df_products, df_sellers, df_reviews, df_payments]:
    df.viz.update_plotly_settings(
        labels=base_labels
        , category_orders=base_category_orders
    )

We’ll develop a dedicated class for generating standardized visualizations.

class PlotBuilder:
    df = None
    time_column = None
    time_column_label = None
    metric = None
    metric_label = None # for axis label
    metric_label_for_distribution = None # if metric_lable is aggregated
    agg_func = None
    freq = None
    title_base = None
    norm_by = None
    cur_dim = None
    dimensions = base_labels
    axis_sort_order = None
    text_auto = None
    plotly_kwargs = {}
    update_fig = {}
    block_save_fig_for_slides = True
    slide_path = 'for_slides/svg/'
    slide_img_fmt = 'svg'

    @classmethod
    def reset_configure(cls) -> None:
        """Reset global settings"""
        cls.df = None
        cls.time_column = None
        cls.time_column_label = None
        cls.metric = None
        cls.metric_label = None 
        cls.metric_label_for_distribution = None 
        cls.agg_func = None
        cls.freq = None
        cls.title_base = None
        cls.norm_by = None
        cls.cur_dim = None
        cls.dimensions = base_labels
        cls.axis_sort_order = None
        cls.text_auto = None
        cls.plotly_kwargs = {}
        cls.update_fig = {}
        
    @classmethod
    def metric_info(cls, freq=None, agg_func=None, **kwargs):
        """
        Display distribution information and statistics for the metric column.
        
        Parameters:
        -----------
        freq : str, optional
            The time frequency for aggregation (e.g., 'ME' for month, 'W' for week, 'D' for day).
            If not provided, the analysis will be performed on raw, non-aggregated data.
        agg_func : str or function, optional
            The aggregation function to apply when freq is specified (e.g., 'mean', 'sum', 'count').
            If not provided, uses the class's default agg_func.
        **kwargs : dict
            Additional arguments to pass to the underlying visualization function.
            Commonly used to customize labels, titles, or plot parameters.
            
        Notes:
        ------
        - When freq is specified, each data point represents an aggregated value
          for the given time period, providing insights into temporal patterns.
        - Without freq, the analysis shows the raw distribution of individual values.
        - The method automatically handles axis labels and titles, but these can be
          overridden through kwargs if needed.
        """
        
        metric_label_for_title = cls.get_metric_label_for_title_for_metric_info()
        
        if freq:
            period_map = {'ME': 'Month', 'W': 'Week', 'D': 'Day'}
            
            if not agg_func:
                agg_func = cls.agg_func
            agg_metric = f'{agg_func}_{cls.metric}_per_{period_map[freq].lower()}' 
            # Aggregate data by specified frequency
            df_metric_per_period = (
                cls.df.groupby(pd.Grouper(key=cls.time_column, freq=freq), observed=False)[cls.metric]
                .agg(agg_func)
                .to_frame(agg_metric)
            )
            
            # Enhance title with aggregation info
            metric_label_for_title += f' per {period_map.get(freq, freq)}'
            
            # Set default labels if none provided
            if 'labels' not in kwargs:
                kwargs['labels'] = {
                    agg_metric: cls.metric_label_for_distribution if cls.metric_label_for_distribution else cls.metric_label
                }
            # Set default title if none provided
            if 'title' not in kwargs:
                kwargs['title'] = f'Distribution of {metric_label_for_title}'
            
            return df_metric_per_period[agg_metric].explore.info(**kwargs)
        else:
            # Handle non-aggregated case
            if 'labels' not in kwargs:
                kwargs['labels'] = {
                    cls.metric: cls.metric_label_for_distribution if cls.metric_label_for_distribution else cls.metric_label
                }
            
            if 'title' not in kwargs:
                kwargs['title'] = f'Distribution of {metric_label_for_title}'
            
            return cls.df[cls.metric].explore.info(**kwargs)
    
    @classmethod
    def get_metric_label_for_title_for_metric_info(cls):
        """
        Helper method to get the appropriate metric label for titles.
        """
        if cls.metric_label_for_distribution:
            label_source = cls.metric_label_for_distribution
        else:
            label_source = cls.metric_label
            
        # Use only the first part if multiple labels are comma-separated
        return label_source.split(',')[0] if ',' in label_source else label_source
    
    @classmethod
    def metric_top(cls, id_column='order_id', n=10, freq=None, agg_func=None) -> pd.DataFrame:
        """
        Display top n entries based on the metric, with optional temporal aggregation.
        
        Parameters:
        -----------
        id_column : str, optional (default='order_id')
            The column name to display as identifier along with the metric.
        n : int, optional (default=10)
            Number of top entries to return.
        freq : str, optional
            The time frequency for aggregation (e.g., 'ME' for month, 'W' for week, 'D' for day).
            If not provided, the analysis will be performed on raw, non-aggregated data.
        agg_func : str or function, optional
            The aggregation function to apply when freq is specified (e.g., 'mean', 'sum', 'count').
            If not provided, uses the class's default agg_func.
            
        Returns:
        --------
        pd.DataFrame
            DataFrame containing the top n entries with the specified identifier column and metric values.
            When aggregation is applied, returns top time periods with aggregated metric values.
            
        Notes:
        ------
        - Without aggregation: shows top individual records sorted by raw metric values.
        - With aggregation: shows top time periods sorted by aggregated metric values.
        - The method preserves the original class metric labels for proper display.
        - For aggregated results, the identifier column will be the time period.
        """
        
        if freq:
            # Handle aggregated case
            period_map = {'ME': 'Month', 'W': 'Week', 'D': 'Day'}
            
            if not agg_func:
                agg_func = cls.agg_func
            
            # Aggregate data by specified frequency
            df_agg = (
                cls.df.groupby(pd.Grouper(key=cls.time_column, freq=freq), observed=False)[cls.metric]
                .agg(agg_func)
            )
            
            return df_agg.sort_values(ascending=False).head(n).to_frame()
        else:
            # Handle non-aggregated case
            return cls.df.set_index(id_column)[cls.metric].sort_values(ascending=False).head(n).to_frame()

    @classmethod
    def get_dim(cls, print_by_chunk=True) -> Union[None, list]:
        """
        Returns a list of specified measurements
        You can get a line in the form of a line to conveniently copy and immediately create a variable.
        And you can just in the form of a list
        """
        if print_by_chunk:
            dims = list(cls.dimensions.keys())
            result = "["
            for i, item in enumerate(dims):
                result += f"'{item}'"
                if i < len(dims) - 1:
                    result += ", "
                if (i + 1) % 5 == 0:
                    result += "\n"
            result += "]"
            print(result)
        else:
            return list(cls.dimensions.keys())

    @classmethod
    def check_cur_dim(cls, kwargs) -> None:
        """
        Helps not to indicate the name of the measurements when you need to sort out all the excessions
        Also displays the line with the name of the section for the report and the name of the measurement itself
        """
        if 'x' in kwargs and isinstance(kwargs['x'], int):
            kwargs['x'] = cls.cur_dim[kwargs['x']]
            res = '**By ' + cls.dimensions[kwargs['x']] + '**'
            print(res)
            print('x: ', kwargs['x'])
        if 'y' in kwargs and isinstance(kwargs['y'], int):
            kwargs['y'] = cls.cur_dim[kwargs['y']]
            res = '**By ' + cls.dimensions[kwargs['y']] + '**'
            print(res)
            print('y: ', kwargs['y'])
        if 'color' in kwargs and isinstance(kwargs['color'], int):
            kwargs['color'] = cls.cur_dim[kwargs['color']]
            res = '**By ' + cls.dimensions[kwargs['color']] + '**'
            print(res)
            print('color: ', kwargs['color'])
        if 'cat1' in kwargs and isinstance(kwargs['cat1'], int):
            kwargs['cat1'] = cls.cur_dim[kwargs['cat1']]
            res = '**By ' + cls.dimensions[kwargs['cat1']] + '**'
            print(res)
            print('cat1: ', kwargs['cat1'])
        if 'cat2' in kwargs and isinstance(kwargs['cat2'], int):
            kwargs['cat2'] = cls.cur_dim[kwargs['cat2']]
            res = '**By ' + cls.dimensions[kwargs['cat2']] + '**'
            print(res)
            print('cat2: ', kwargs['cat2'])

    @classmethod
    def configure(cls, **kwargs) -> None:
        """Set global settings"""
        cls.reset_configure()
        for key, value in kwargs.items():
            if hasattr(cls, key):
                setattr(cls, key, value)
            else:
                raise AttributeError(f"Invalid config parameter: {key}")
            
    @classmethod
    def to_slide(cls, fig: go.Figure, title_postfix: str = None):
        if not cls.block_save_fig_for_slides:
            title = fig.layout.title.text
            if isinstance(title_postfix, str):
                title += title_postfix
            fig.write_image(f"{cls.slide_path}{title}.{cls.slide_img_fmt}")
        
    @classmethod
    def line(cls, **kwargs) -> go.Figure:
        """Create line plot"""
        to_slide = False
        if 'to_slide' in kwargs:
            to_slide = kwargs['to_slide']
            kwargs.pop('to_slide')
        kwargs = cls._prepare_common_settings(kwargs, 'line')
        kwargs['labels'] = cls._prepare_labels(kwargs, 'line')
        if not kwargs.get('title'):
            kwargs['title'] = cls._prepare_title(kwargs, 'line')
        if kwargs.get('data_frame') is not None:
            fig = kwargs.pop('data_frame').viz.line(**kwargs)
        else:
            fig = cls.df.viz.line(**kwargs)
        fig = cls._update_fig(fig, kwargs, 'line')
        if to_slide:
            cls.to_slide(fig, to_slide)
        return fig

    @classmethod
    def bar_groupby(cls, **kwargs) -> go.Figure:
        """Create bar plot with groupby"""
        to_slide = False
        if 'to_slide' in kwargs:
            to_slide = kwargs['to_slide']
            kwargs.pop('to_slide')
        kwargs = cls._prepare_common_settings(kwargs, 'bar_groupby')
        kwargs['labels'] = cls._prepare_labels(kwargs, 'bar_groupby')
        if not kwargs.get('title'):
            kwargs['title'] = cls._prepare_title(kwargs, 'bar_groupby')
        if kwargs.get('data_frame') is not None:
            fig = kwargs.pop('data_frame').viz.bar(**kwargs)
        else:
            fig = cls.df.viz.bar(**kwargs)
        fig = cls._update_fig(fig, kwargs, 'bar_groupby')
        if to_slide:
            cls.to_slide(fig, to_slide)
        return fig

    @classmethod
    def line_resample(cls, **kwargs) -> go.Figure:
        """Create line plot with resample"""
        to_slide = False
        if 'to_slide' in kwargs:
            to_slide = kwargs['to_slide']
            kwargs.pop('to_slide')
        kwargs = cls._prepare_common_settings(kwargs, 'line_resample')
        kwargs['labels'] = cls._prepare_labels(kwargs, 'line_resample')
        if not kwargs.get('title'):
            kwargs['title'] = cls._prepare_title(kwargs, 'line_resample')
        if kwargs.get('data_frame') is not None:
            fig = kwargs.pop('data_frame').viz.line(**kwargs)
        else:
            fig = cls.df.viz.line(**kwargs)
        fig = cls._update_fig(fig, kwargs, 'line')
        if to_slide:
            cls.to_slide(fig, to_slide)
        return fig

    @classmethod
    def area_resample(cls, **kwargs) -> go.Figure:
        """Create line plot with resample"""
        to_slide = False
        if 'to_slide' in kwargs:
            to_slide = kwargs['to_slide']
            kwargs.pop('to_slide')
        kwargs = cls._prepare_common_settings(kwargs, 'line_resample')
        kwargs['labels'] = cls._prepare_labels(kwargs, 'line_resample')
        if not kwargs.get('title'):
            kwargs['title'] = cls._prepare_title(kwargs, 'line_resample')
        if kwargs.get('data_frame') is not None:
            fig = kwargs.pop('data_frame').viz.area(**kwargs)
        else:
            fig = cls.df.viz.area(**kwargs)
        fig = cls._update_fig(fig, kwargs, 'area')
        if to_slide:
            cls.to_slide(fig, to_slide)
        return fig

    @classmethod
    def heatmap(cls, **kwargs) -> go.Figure:
        """Create heatmap plot"""
        to_slide = False
        if 'to_slide' in kwargs:
            to_slide = kwargs['to_slide']
            kwargs.pop('to_slide')
        kwargs = cls._prepare_common_settings(kwargs, 'heatmap')
        kwargs['labels'] = cls._prepare_labels(kwargs, 'heatmap')
        if not kwargs.get('title'):
            kwargs['title'] = cls._prepare_title(kwargs, 'heatmap')
        if kwargs.get('data_frame') is not None:
            fig = kwargs.pop('data_frame').viz.heatmap(**kwargs)
        else:
            fig = cls.df.viz.heatmap(**kwargs)
        fig = cls._update_fig(fig, kwargs, 'heatmap')
        if to_slide:
            cls.to_slide(fig, to_slide)
        return fig

    @classmethod
    def pie_bar(cls, **kwargs) -> go.Figure:
        """Create pie_bar plot"""
        to_slide = False
        if 'to_slide' in kwargs:
            to_slide = kwargs['to_slide']
            kwargs.pop('to_slide')
        kwargs = cls._prepare_common_settings(kwargs, 'pie_bar')
        kwargs['labels'] = cls._prepare_labels(kwargs, 'pie_bar')
        if not kwargs.get('title'):
            kwargs['title'] = cls._prepare_title(kwargs, 'pie_bar')
        if kwargs.get('data_frame') is not None:
            fig = kwargs.pop('data_frame').viz.pie_bar(**kwargs)
        else:
            fig = cls.df.viz.pie_bar(**kwargs)
        fig = cls._update_fig(fig, kwargs, 'pie_bar')
        if to_slide:
            cls.to_slide(fig, to_slide)
        return fig

    @classmethod
    def box(cls, **kwargs) -> go.Figure:
        """Create boxplot"""
        to_slide = False
        if 'to_slide' in kwargs:
            to_slide = kwargs['to_slide']
            kwargs.pop('to_slide')
        kwargs = cls._prepare_common_settings(kwargs, 'box')
        kwargs['labels'] = cls._prepare_labels(kwargs, 'box')
        if not kwargs.get('title'):
            kwargs['title'] = cls._prepare_title(kwargs, 'box')
        if kwargs.get('data_frame') is not None:
            fig = kwargs.pop('data_frame').viz.box(**kwargs)
        else:
            fig = cls.df.viz.box(**kwargs)
        fig = cls._update_fig(fig, kwargs, 'box')
        if to_slide:
            cls.to_slide(fig, to_slide)
        return fig

    @classmethod
    def histogram(cls, **kwargs) -> go.Figure:
        """Create histogram plot"""
        to_slide = False
        if 'to_slide' in kwargs:
            to_slide = kwargs['to_slide']
            kwargs.pop('to_slide')
        kwargs = cls._prepare_common_settings(kwargs, 'histogram')
        kwargs['labels'] = cls._prepare_labels(kwargs, 'histogram')
        if not kwargs.get('title'):
            kwargs['title'] = cls._prepare_title(kwargs, 'histogram')
        if kwargs.get('data_frame') is not None:
            fig = kwargs.pop('data_frame').viz.histogram(**kwargs)
        else:
            # Since order_avg_reviews_score is numerical, it is better to convert to string for plotly
            if kwargs.get('color') == 'order_avg_reviews_score':
                df = cls.df.copy()
                df['order_avg_reviews_score'] = cls.df['order_avg_reviews_score'].astype(str).astype('category')
            else:
                df = cls.df
            fig = df.viz.histogram(**kwargs)
        fig = cls._update_fig(fig, kwargs, 'histogram')
        if to_slide:
            cls.to_slide(fig, to_slide)
        return fig

    @classmethod
    def cat_compare(cls, **kwargs) -> go.Figure:
        """Create plots for compare categorical columns"""
        to_slide = False
        if 'to_slide' in kwargs:
            to_slide = kwargs['to_slide']
            kwargs.pop('to_slide')
        kwargs = cls._prepare_common_settings(kwargs, 'cat_compare')
        if 'category_orders' not in kwargs:
            kwargs['category_orders']={kwargs.get('cat1'): 'descending', kwargs.get('cat2'): 'descending'}
            if cls.plotly_kwargs and 'category_orders' in cls.plotly_kwargs:
                kwargs['category_orders'].update(cls.plotly_kwargs['category_orders'])
        if kwargs.get('data_frame') is not None:
            fig = kwargs.pop('data_frame').viz.cat_compare(**kwargs)
        else:
            fig = cls.df.viz.cat_compare(**kwargs)
        fig = fig
        if to_slide:
            cls.to_slide(fig, to_slide)
        return fig
    
    @classmethod
    def period_change(cls, **kwargs) -> go.Figure:
        """Plot period-over-period changes"""
        to_slide = False
        if 'to_slide' in kwargs:
            to_slide = kwargs['to_slide']
            kwargs.pop('to_slide')
        kwargs = cls._prepare_common_settings(kwargs, 'period_change')
        kwargs['labels'] = cls._prepare_labels(kwargs, 'period_change')
        if not kwargs.get('title'):
            kwargs['title'] = cls._prepare_title(kwargs, 'period_change')        
        kwargs = cls._prepare_common_settings(kwargs, 'period_change')
        if kwargs.get('data_frame') is not None:
            fig = kwargs.pop('data_frame').viz.period_change(**kwargs)
        else:
            fig = cls.df.viz.period_change(**kwargs)
        fig = cls._update_fig(fig, kwargs, 'period_change')
        if to_slide:
            cls.to_slide(fig, to_slide)
        return fig

    @classmethod
    def _prepare_labels(cls, kwargs: dict, graph_type: str) -> dict:
        """Labels preparation for graphs"""
        labels = dict()

        # Label for base metric
        if graph_type in ['line', 'bar_groupby', 'line_resample', 'pie_bar']:
            if cls.metric and cls.metric_label:
                labels[cls.metric] = cls.metric_label
        elif graph_type in ['box', 'histogram']:
            if cls.metric and cls.metric_label_for_distribution:
                labels[cls.metric] = cls.metric_label_for_distribution

        # labels for time_column
        if graph_type in ['line', 'line_resample', 'box', 'period_change']:
            if cls.time_column and cls.time_column_label:
                labels[cls.time_column] = cls.time_column_label

        # labels for specific graph_type
        if graph_type in ['heatmap']:
            if isinstance(kwargs['x'], pd.core.resample.TimeGrouper):
                labels['x'] = cls.time_column_label
            else:
                if kwargs['x'] not in cls.dimensions:
                    raise ValueError(f"{kwargs['x']} not in cls.dimensions")
                labels['x'] = cls.dimensions[kwargs['x']]
            if kwargs['y'] not in cls.dimensions:
                raise ValueError(f"{kwargs['y']} not in cls.dimensions")
            labels['y'] = cls.dimensions[kwargs['y']]
            labels['color'] = cls.metric_label
        if 'labels' in kwargs:
            labels.update(kwargs['labels'])
        return labels

    @classmethod
    def _prepare_title(cls, kwargs: dict, graph_type: str) -> str:
        """Title preparation for graphs"""
        # Determin axis_dimension
        if graph_type in ['bar_groupby', 'pie_bar'] or (graph_type == 'box' and kwargs.get('mode') != 'time_series'):
            if kwargs.get('y') == cls.metric:
                axis_dimension = kwargs['x']
            elif kwargs.get('x') == cls.metric:
                axis_dimension = kwargs['y']
            else:
                raise ValueError('Can not define axis_dimension')
            if graph_type != 'box' and cls.axis_sort_order and axis_dimension != 'purchase_season' and axis_dimension not in kwargs.get('category_orders', {}):
                axis_sort_order = cls.axis_sort_order
                # For bottom direction change sorting direction
                if kwargs.get('trim_top_n_direction') == 'bottom':
                    axis_sort_order = 'ascending'
                kwargs.setdefault('category_orders', {}).update({axis_dimension: axis_sort_order})
            if graph_type == 'box' and cls.agg_func and cls.axis_sort_order and axis_dimension != 'purchase_season' and axis_dimension not in kwargs.get('category_orders', {}):
                axis_sort_order = cls.axis_sort_order
                # For bottom direction change sorting direction
                if kwargs.get('trim_top_n_direction') == 'bottom':
                    axis_sort_order = 'ascending'
                kwargs.setdefault('category_orders', {}).update({axis_dimension: f'{cls.agg_func} {axis_sort_order}'})
            if axis_dimension not in cls.dimensions:
                raise ValueError(f'{axis_dimension} not in cls.dimensions')

        # Base part of titke
        # if metric label contain ',' cut that part of metric label
        if graph_type in ['box', 'histogram']:
            if cls.metric_label_for_distribution:
                metric_label_for_distribution = cls.metric_label_for_distribution if ',' not in cls.metric_label_for_distribution else cls.metric_label_for_distribution.split(',')[0]
            else:
                metric_label_for_distribution = cls.metric_label if ',' not in cls.metric_label else cls.metric_label.split(',')[0]
            if graph_type == 'box':
                title = f'Boxplots of {metric_label_for_distribution}'
            elif graph_type == 'histogram':
                title = f'Distribution of {metric_label_for_distribution}'
        elif graph_type not in ['period_change']:
            if cls.title_base:
                title = cls.title_base
            elif cls.metric_label:
                title = cls.metric_label if ',' not in cls.metric_label else cls.metric_label.split(',')[0]
            else:
                raise ValueError('For auto create title, title_base or metric_label must be define')

        # Color part of title
        if graph_type in ['line', 'bar_groupby', 'line_resample', 'pie_bar', 'box', 'histogram']:
            if not (graph_type in ['line', 'line_resample', 'histogram'] or (graph_type == 'box' and kwargs.get('mode') == 'time_series')):
                title += f' by {cls.dimensions[axis_dimension]}'
            if 'color' in kwargs:
                if kwargs['color'] not in cls.dimensions:
                    raise ValueError(f"{kwargs['color']} not in cls.dimensions")
                if graph_type in ['line', 'line_resample', 'histogram'] or (graph_type == 'box' and kwargs.get('mode') == 'time_series'):
                    title += f" by {cls.dimensions[kwargs['color']]}"
                else:
                    title += f" and {cls.dimensions[kwargs['color']]}"

        # Datetime part of title
        if graph_type in ['line', 'line_resample'] or (graph_type == 'box' and kwargs.get('mode') == 'time_series'):
            if 'freq' in kwargs:
                resample_freq_for_title = kwargs['freq']
            else:
                resample_freq_for_title = cls.freq
            if not resample_freq_for_title:
                raise ValueError('freq must be define')
            freq_map = {'h': 'Hour', 'D': 'Day', 'W': 'Week', 'ME': 'Month', 'M': 'Month'}
            if 'color' in kwargs:
                title += f' and {freq_map[resample_freq_for_title]}'
            else:
                title += f' by {freq_map[resample_freq_for_title]}'
        # Part for specific graph_type
        if graph_type in ['heatmap']:
            title += f" by {cls.dimensions[kwargs['x']]}"
            title += f" and {cls.dimensions[kwargs['y']]}"
        elif graph_type in ['histogram', 'box']:
            if 'lower_quantile' in kwargs or 'upper_quantile' in kwargs:
                quantile_for_title = ' ('
                if kwargs.get('mode') == 'dual_box_trim':
                    quantile_for_title += 'Right: '
                if 'lower_quantile' in kwargs:
                    quantile_for_title += f"from {kwargs['lower_quantile']} "
                if 'upper_quantile' in kwargs:
                    quantile_for_title += f"to {kwargs['upper_quantile']} "
                quantile_for_title += 'Quantile)'
                title += quantile_for_title
        if graph_type in ['period_change']:
            title_map = {
                'mom': 'Monthly Change in {metric}',
                'wow': 'Weekly Change in {metric}',
                'dod': 'Daily Change in {metric}',
                'yoy': 'Yearly Change in {metric}'
            }
            period = kwargs.get('period', 'mom')
            if not cls.metric_label:
                raise ValueError('metric_label must be define')
            metric_label = cls.metric_label if ',' not in cls.metric_label else cls.metric_label.split(',')[0]
            title = title_map[period].format(metric=metric_label)
        return title

    @classmethod
    def _prepare_common_settings(cls, kwargs: dict, graph_type: str) -> dict:
        """Preparation of general settings for graphs"""
        cls.check_cur_dim(kwargs)
        if graph_type not in ['heatmap', 'cat_compare']:
            kwargs.setdefault('hover_data', {}).update({cls.metric: ':.3f'})
        if graph_type in ['cat_compare']:
            if 'cat1' in kwargs and 'cat2' not in kwargs:
                kwargs['cat2'] = cls.metric
            elif 'cat2' in kwargs and 'cat1' not in kwargs:
                kwargs['cat1'] = cls.metric

        # top_n settings
        if graph_type in ['line', 'line_resample']:
            if (kwargs.get('color') in ['customer_state', 'customer_city', 'order_product_categories', 'order_general_product_categories', 'seller_state', 'seller_city']
                and 'trim_top_n_color' not in kwargs):
                kwargs['trim_top_n_color'] = 5

        if graph_type in ['bar_groupby', 'box']:
            if kwargs.get('y') in ['customer_state', 'customer_city', 'order_product_categories', 'order_general_product_categories',
                                   'product_category', 'general_product_category', 'seller_state', 'seller_city']:
                if 'trim_top_n_y' not in kwargs:
                    kwargs['trim_top_n_y'] = 15
                if 'height' not in kwargs:
                    kwargs['height'] = 500

        # aggregation
        if graph_type in ['bar_groupby', 'line_resample', 'pie_bar', 'heatmap']:
            kwargs.setdefault('agg_func', cls.agg_func)
            if graph_type != 'heatmap':
                kwargs.setdefault('agg_column', cls.metric)
        # normalization
        if graph_type in ['bar_groupby', 'pie_bar']:
            if cls.text_auto:
                kwargs.setdefault('text_auto', cls.text_auto)
            kwargs.setdefault('norm_by', cls.norm_by)
        # time freq
        if graph_type in ['line_resample']:
            kwargs.setdefault('freq', cls.freq)

        # settings for specific graph_type
        if graph_type in ['line', 'line_resample']:
            kwargs.setdefault('x', cls.time_column)
            kwargs.setdefault('y', cls.metric)

        elif graph_type in ['bar_groupby']:
            if 'x' not in kwargs and 'y' not in kwargs:
                raise ValueError('x or y must be define')
            if 'y' not in kwargs:
                kwargs.setdefault('y', cls.metric)
            elif 'x' not in kwargs:
                kwargs.setdefault('x', cls.metric)

        elif graph_type in ['heatmap']:
            kwargs.setdefault('do_pivot', True)
            kwargs.setdefault('z', cls.metric)
            kwargs['width'] = 1100
        elif graph_type in ['pie_bar']:
            kwargs.setdefault('hole', 0.5)
            if 'x' not in kwargs and 'y' not in kwargs:
                raise ValueError('x or y must be define')
            if 'y' not in kwargs:
                kwargs.setdefault('y', cls.metric)
            elif 'x' not in kwargs:
                kwargs.setdefault('x', cls.metric)
        elif graph_type in ['box']:
            if kwargs.get('mode') == 'time_series':
                kwargs.setdefault('x', cls.time_column)
                kwargs.setdefault('y', cls.metric)
            else:
                kwargs['show_dual'] = True
                kwargs['upper_quantile'] = 0.95

                if 'x' not in kwargs and 'y' not in kwargs:
                    raise ValueError('x or y must be define')
                if 'y' not in kwargs:
                    kwargs.setdefault('y', cls.metric)
                elif 'x' not in kwargs:
                    kwargs.setdefault('x', cls.metric)
        elif graph_type in ['histogram']:
            if 'x' in kwargs and isinstance(kwargs['x'], str) or 'color' in kwargs:
                kwargs.setdefault('show_hist', False)
                kwargs.setdefault('show_kde', True)
                kwargs.setdefault('mode', 'dual_box_trim')
                kwargs.setdefault('show_legend_title', True)
            else:
                kwargs.setdefault('mode', 'dual_hist_trim')
            kwargs.setdefault('x', cls.metric)
            kwargs.setdefault('upper_quantile', 0.95)
        elif graph_type in ['period_change']:
            kwargs.setdefault('metric_col', cls.metric)
            kwargs.setdefault('date_col', cls.time_column)
            kwargs.setdefault('agg_func', cls.agg_func)
            if cls.plotly_kwargs:
                kwargs.update(cls.plotly_kwargs)
        return kwargs
    
    @classmethod
    def _update_fig(cls, fig: go.Figure, kwargs: dict, graph_type: str) -> dict:
        update_fig = {}
        for param in cls.update_fig:
            if hasattr(fig.layout, param):
                update_fig[param] = cls.update_fig[param]
        fig.update_layout(**update_fig)
        return fig

pb = PlotBuilder