In [None]:
%run ../../_pre_run.ipynb

# Sales Analysis

## Number of Sales

In [None]:
pb.configure(
    df = df_sales
    , time_column = 'order_purchase_dt'
    , time_column_label = 'Date' 
    , metric = 'order_id'
    , metric_label = 'Share of Sales'
    , metric_label_for_distribution = 'Number of Sales'
    , agg_func = 'nunique'
    , norm_by='all'
    , axis_sort_order='descending'
    , text_auto='.1%'
    , update_fig={'xaxis': {'tickformat': '.0%'}}
)

In [None]:
print(f'Total number of sales: {df_sales.order_id.nunique()}')

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(freq='D')

**Key Observations:**  

- 75% of days had ≤215 orders  
- 5% had ≤45 orders  
- 5% had ≥293 orders  
- Several days exceeded 400 orders  

Let’s look by different dimensions.

**By Season**

Since 2018 has incomplete monthly data, it’s better to also analyze by year..

In [None]:
pb.bar_groupby(
    x='purchase_season'
    , color='purchase_year'
)

**Key Observations:**  

- Lowest sales in summer (both years)  
- Highest sales in autumn (2018)  

**By Time of Day**

In [None]:
pb.bar_groupby(y='purchase_time_of_day')

**Key Observations:**  

- Sales by time of day:  
  - Evening: 36% (peak)  
  - Night: 9% (lowest)  
  - Morning: 23%  
  - Afternoon: 32%  

**By Day of Week**

In [None]:
pb.bar_groupby(y='purchase_weekday')

**Key Observations:**  

- Saturday: 11% (lowest)  
- Monday: 16% (highest)  

**By Weekday vs Weekend**

In [None]:
pb.bar_groupby(y='purchase_day_type')

**Key Observations:**  

- 77% of orders were placed on weekdays  

**By Day of the Week and Hour of the Day**

In [None]:
fig = pb.heatmap(
    x='purchase_hour'
    , y='purchase_weekday'
    , labels={'color': 'Number of Sales'}
    , title='Number of Sales by Day of the Week and Hour of the Day'
).update_layout(xaxis_dtick=1, xaxis_tickformat=None)
pb.to_slide(fig)
fig.show()

**Key Observations:**  

- 1AM-8AM had lowest sales across all weekdays  

**By Review Score**

In [None]:
pb.bar_groupby(y='order_avg_reviews_score')

**Key Observations:**  

- Review score distribution:  
  - 5 stars: 59%  
  - 2 stars: 3% (lowest)  
  - More 1-star than 2/3-star orders  

**By Whether the Order is Delayed or Not**

In [None]:
pb.bar_groupby(y='is_delayed')

**Key Observations:**  

- 92% of orders had no delivery delay  

**By Payment Category**

In [None]:
pb.bar_groupby(y='order_total_payment_cat')

**Key Observations:**  

- 63% of orders were medium-priced  

**By Order Weight Category**

In [None]:
pb.bar_groupby(y='order_total_weight_cat')

**Key Observations:**  

- Order weight distribution:  
  - Medium: 46%  
  - Light: 40%  

**By Delivery Time Category**

In [None]:
pb.bar_groupby(y='delivery_time_days_cat')

**Key Observations:**  

- 59% of orders had medium delivery time  

**By Presence of Installment Payments**

In [None]:
pb.bar_groupby(y='order_has_installment')

**Key Observations:**  

- 51% of orders used installments  

**By Payment Type**

In [None]:
pb.bar_groupby(y='order_payment_types')

**Key Observations:**  

- Payment methods:  
  - Credit card: 75%  
  - Boleto: 20%  

**By Product Category**

In [None]:
pb.bar_groupby(
    y='order_product_categories'
    , text_auto=False
)

**Key Observations:**  

- Top 3 product categories:  
  1. Bed Bath Table: 9%  
  2. Health Beauty: 9%  
  3. Sports Leisure: 8%  

**By Generalized Product Category**

In [None]:
pb.bar_groupby(
    y='order_general_product_categories'
    , text_auto=False
)

**Key Observations:**  

- Top 3 generalized categories:  
  1. Electronics: 27%  
  2. Furniture: 18%  
  3. Home & Garden: 14%  

**By Top Customer States**

In [None]:
pb.bar_groupby(y='customer_state', text_auto=False)

**Key Observations:**  

- Sales by state:  
  - São Paulo: 42%  
  - Rio de Janeiro: 13%  
  - Minas Gerais: 12%  
  - Others: ≤6%  

**By Top Customer Cities**

In [None]:
pb.bar_groupby(y='customer_city', text_auto=False)

**Key Observations:**  

- Sales by city:  
  - São Paulo: 16%  
  - Rio de Janeiro: 7%  
  - Others: ≤3%  

## Sum of Sales

In [None]:
pb.configure(
    df = df_sales
    , time_column = 'order_purchase_dt'
    , metric = 'total_payment'
    , metric_label = 'Sum of Sales, R$'
    , agg_func = 'sum'
    , title_base = 'Sum and Number of Sales'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}
)

In [None]:
print(f'Total Sales Amount: {df_sales.total_payment.sum():,.2f}, R$')

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(freq='D')

**Key Observations:**  

- 75% of days had sales ≤33K R$  
- 5% of days had ≤6.7K R$  
- 5% of days had ≥49K R$  
- Several days exceeded 70K R$  

Let’s look by different dimensions.

**By Season**

Since 2018 has incomplete monthly data, it’s better to also analyze by year..

In [None]:
pb.bar_groupby(
    x='purchase_season'
    , color='purchase_year'
    , title='Sum of Sales by Season and Year'
)

**Key Observations:**  

- Lowest sales revenue in summer (both years)  
- Highest revenue in autumn (2018)  

**By Time of Day**

In [None]:
pb.bar_groupby(y='purchase_time_of_day', show_count=True, to_slide=True)

**Key Observations:**  

- Highest sales volume and revenue in evenings  
- Lowest at night  

**By Day of Week**

In [None]:
pb.bar_groupby(y='purchase_weekday', show_count=True, to_slide=True)

**Key Observations:**  

- Monday has highest sales volume/revenue  
- Saturday has lowest 

**By Weekday vs Weekend**

In [None]:
pb.bar_groupby(y='purchase_day_type', show_count=True, to_slide=True)

**Key Observations:**  

- Weekday sales/revenue significantly higher than weekends  


**By Day of the Week and Hour of the Day**

In [None]:
fig = pb.heatmap(
    x='purchase_hour'
    , y='purchase_weekday'
    , text_auto='.3s'
    , labels={'color': 'Amount, R$'}
    , title='Sales Amount by Day of the Week and Hour of the Day'
).update_layout(xaxis_dtick=1)
pb.to_slide(fig)
fig.show()

**Key Observations:**  

- 1AM-8AM has lowest revenue across all weekdays  

**By Whether the Order is Delayed or Not**

In [None]:
pb.bar_groupby(y='is_delayed', show_count=True)

**Key Observations:**  

- Non-delayed orders have significantly higher sales/revenue  

**By Order Weight Category**

In [None]:
pb.bar_groupby(y='order_total_weight_cat', show_count=True, to_slide=True)

**Key Observations:**  

- Medium-weight orders generate more revenue than heavy/light  
- Light orders have higher quantity share but lower revenue share  
- Heavy orders are more expensive  

**By Presence of Installment Payments**

In [None]:
pb.bar_groupby(y='order_has_installment', show_count=True, to_slide=True)

**Key Observations:**  

- Installment orders generate significantly more revenue despite similar order counts  
- Installment enables more expensive purchases  

**By Top Customer States**

In [None]:
pb.bar_groupby(y='customer_state', show_count=True, to_slide=True)

**Key Observations:**  

- São Paulo state dominates sales volume/revenue  
- Rio de Janeiro and Minas Gerais rank 2nd/3rd  

**By Top Customer Cities**

In [None]:
pb.bar_groupby(y='customer_city', show_count=True, to_slide=True)

**Key Observations:**  

- São Paulo city leads in sales volume/revenue  
- Rio de Janeiro ranks second  

**By Review Score**

In [None]:
pb.bar_groupby(y='order_avg_reviews_score', show_count=True, to_slide=True)

**Key Observations:**  

- 5-star reviews have highest sales/revenue  
- 2-star reviews have lowest  
- 1-star reviews exceed 2/3-star in volume/revenue  

**By Payment Type**

Since a single order can have multiple payments, we will measure transaction volume based on payment count.

In [None]:
pb.bar_groupby(
    y='order_payment_types'
    , show_count=True
    , to_slide=True
)

**Key Observations:**  

- Credit card leads payment methods (volume/revenue)  
- Boleto ranks second  

**By Product Category**

For the category product split, we cannot take the payment amount. We will calculate the sum based on the product price and freight value. 

The count will be determined by the number of items.

In [None]:
pb.bar_groupby(
    y='order_general_product_categories'
    , show_count=True
    , to_slide=True
)

**Key Observations:**  

- Electronics leads categories (volume/revenue)  
- Furniture ranks second  
- Furniture has smaller price gap in quantity vs revenue  

## Average Order Value

In [None]:
pb.configure(
    df = df_sales
    , metric = 'total_payment'
    , metric_label = 'Average Order Value, R$'
    , metric_label_for_distribution = 'Order Value, R$'
    , agg_func = 'mean'
    , title_base = 'Average Order Value and Number of Sales'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}    
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(total_payment='Order Value, R$')
    , title='Distribution of Order Value'
    , upper_quantile=0.95
    , hist_mode='dual_hist_trim'
)

**Key Observations:**  

- 75% of orders ≤177 R$  
- 5% ≤33 R$  
- 5% ≥445 R$  
- Many outliers >1000 R$  

Let’s look by different dimensions.

**By Season**

Since 2018 has incomplete monthly data, it’s better to also analyze by year..

In [None]:
pb.bar_groupby(
    x='purchase_season'
    , color='purchase_year'
    , title='Average Order Value by Season and Year'
)

**Key Observations:**  

- Summer 2017 had higher order values  
- Other seasons slightly higher in 2018  

**By Day of the Week and Hour of the Day**

In [None]:
fig = pb.heatmap(
    x='purchase_hour'
    , y='purchase_weekday'
    , text_auto='.1f'
    , labels={'color': 'AOV, R$'}
    , title='Average Order Value by Day of the Week and Hour of the Day'
).update_layout(xaxis_dtick=1)
pb.to_slide(fig)
fig.show()

**Key Observations:**  

- Nighttime doesn't always have lowest average order value  
- Some weekday nights show value peaks  

**By Whether the Order is Delayed or Not**

In [None]:
pb.histogram(
    color='is_delayed'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=True
    , show_kde=False
    , nbins=30
).show()
pb.bar_groupby(
    y='is_delayed'
    , show_count=True
).show()

**Key Observations:**  

- Non-delayed orders have lower average values  

**By Order Weight Category**

In [None]:
pb.histogram(
    color='order_total_weight_cat'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=True
    , show_kde=False
    , nbins=30
).show()
pb.bar_groupby(
    y='order_total_weight_cat'
    , show_count=True
).show()

**Key Observations:**  

- Heavier orders have higher average values  

**By Presence of Installment Payments**

In [None]:
pb.histogram(
    color='order_has_installment'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=True
    , show_kde=False
    , nbins=30
).show()
pb.bar_groupby(
    y='order_has_installment'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Installment orders have much higher average values  

**By Top Customer States**

In [None]:
pb.box(
    y='customer_state'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_state'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- São Paulo has most orders but lowest average value among top states  
- Para has highest average order value  

**By Top Customer Cities**

In [None]:
pb.box(
    y='customer_city'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_city'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Rio de Janeiro combines high volume with high average value  
- Salvador has highest average order value among top cities  

**By Review Score**

In [None]:
pb.histogram(
    color='order_avg_reviews_score'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
    , nbins=30
).show()
pb.bar_groupby(
    y='order_avg_reviews_score'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- 1-star reviews have highest order values  
- 2-star reviews rank second  
- Expensive orders receive more low ratings  

## Reviews Score

In [None]:
pb.configure(
    df = df_sales
    , metric = 'order_avg_reviews_score'
    , metric_label = 'Average Order Reviews Score'
    , metric_label_for_distribution = 'Order Reviews Score'
    , agg_func = 'mean'
    , title_base = 'Average Order Reviews Score and Number of Sales'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}        
)

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(order_avg_reviews_score='Order Reviews Score')
    , title='Distribution of Order Reviews Score'
    , xaxis_type='category'
)

**Key Observations:**  

- 59% of orders have 5-star reviews  

Let’s look by different dimensions.

**By Season**

In [None]:
pb.bar_groupby(
    x='purchase_season'
    , color='purchase_year'
    , title='Average Order Reviews Score by Season and Year'
)

**Key Observations:**  

- Winter 2018 had slightly higher ratings  
- Other seasons slightly higher in 2017 

**By Day of the Week and Hour of the Day**

In [None]:
pb.heatmap(
    x='purchase_hour'
    , y='purchase_weekday'
    , text_auto='.1f'
    , title='Average Order Reviews Score by Day of the Week and Hour of the Day'
    , labels=dict(color = 'Score')
).update_layout(xaxis_dtick=1)

**Key Observations:**  

- Nighttime shows rating extremes (especially Thursdays)  

**By Delivery Delay Status**

In [None]:
pb.cat_compare(
    cat2='is_delayed'
    , visible_graphs=[2]
)
pb.bar_groupby(
    y='is_delayed'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Non-delayed orders have significantly higher ratings  
- Higher 5-star share for on-time deliveries  
- "Unknown" delivery status orders mostly get 1-star  

**By Delivery Time Category**

In [None]:
pb.cat_compare(
    cat2='delivery_time_days_cat'
    , visible_graphs=[2]
)
pb.bar_groupby(
    y='delivery_time_days_cat'
    , show_count=True
).show()

**Key Observations:**  

- Faster deliveries get better ratings  

**By Customer State**

In [None]:
pb.cat_compare(
    cat2='customer_state'
    , visible_graphs=[2]
    , trim_top_n_cat2=7
)
fig = pb.bar_groupby(
    y='customer_state'
    , show_count=True
).update_layout(xaxis_domain=[0, 0.4], xaxis2_domain=[0.6, 1])
pb.to_slide(fig)
fig.show()

**Key Observations:**  

- Maranhão has lowest average rating among top states  
- Rio de Janeiro and Bahia have highest 1-star share 

**By Customer City**

In [None]:
pb.cat_compare(
    cat2='customer_city'
    , visible_graphs=[2]
    , trim_top_n_cat2=7
)
pb.bar_groupby(
    y='customer_city'
    , show_count=True
).update_layout(xaxis_domain=[0, 0.4], xaxis2_domain=[0.6, 1]).show()

**Key Observations:**  

- Rio de Janeiro and Porto Alegre have notable 1-star concentrations  


## Order Weight

In [None]:
pb.configure(
    df = df_sales
    , metric = 'total_weight_kg'
    , metric_label = 'Average Weight of Order, kg'
    , metric_label_for_distribution = 'Weight of Order, kg'
    , title_base = 'Average Weight of Order and Number of Sales'
    , agg_func = 'mean'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}        
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(total_weight_kg='Weight of Order, kg')
    , title='Distribution of Weight of Order'
    , upper_quantile=0.99
    , hist_mode='dual_hist_trim'
)

**Key Observations:**  

- 75% of orders ≤2kg  
- 5% ≤150g  
- 5% ≥10kg  

Let’s look by different dimensions.

**By Season**

Since 2018 has incomplete monthly data, it’s better to also analyze by year..

In [None]:
pb.bar_groupby(
    x='purchase_season'
    , color='purchase_year'
    , title='Average Weight of Order by Season and Year'
)

**Key Observations:**  

- 2017 had heavier orders across all seasons  

**By Time of Day**

In [None]:
pb.histogram(
    color='purchase_time_of_day'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='purchase_time_of_day'
    , show_count=True
).show()

**Key Observations:**  

- Afternoons have heaviest orders  
- Nights have lightest  

**By Whether the Order is Delayed or Not**

In [None]:
pb.histogram(
    color='is_delayed'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='is_delayed'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Delayed orders are heavier  

**By Presence of Installment Payments**

In [None]:
pb.histogram(
    color='order_has_installment'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_has_installment'
    , show_count=True
    , to_slide=True
)

**Key Observations:**  

- Installment orders are heavier  

**By Top Customer States**

In [None]:
pb.box(
    y='customer_state'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_state'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Mato Grosso has heaviest average orders among top states  

**By Top Customer Cities**

In [None]:
pb.box(
    y='customer_city'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_city'
    , show_count=True
    , to_slide=True
)

**Key Observations:**  

- Santos and Rio de Janeiro have heaviest average orders  

**By Review Score**

In [None]:
pb.bar_groupby(y='order_avg_reviews_score', show_count=True, to_slide=True)

**Key Observations:**  

- 1-star reviews have significantly heavier orders  
- 2-star reviews rank second  
- Heavy orders receive lower ratings  

## Number of Products per Order

In [None]:
pb.configure(
    df = df_sales
    , metric = 'products_cnt'
    , metric_label = 'Average Number of Products in Order'
    , metric_label_for_distribution = 'Number of Products in Order'
    , title_base = 'Number of Products in Order and Number of Sales'
    , agg_func = 'mean'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}          
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(products_cnt='Number of Products in Order')
    , title='Distribution of Number of Products in Order'
)

**Key Observations:**  

- 90% of orders contain single product  
- Two anomalies had 20-21 products  

Let’s look by different dimensions.

**By Whether the Order is Delayed or Not**

In [None]:
pb.bar_groupby(
    y='is_delayed'
    , show_count=True
).show()

**Key Observations:**  

- Non-delayed orders have slightly more products  

**By Review Score**

In [None]:
pb.bar_groupby(y='order_avg_reviews_score', show_count=True, to_slide=True)

**Key Observations:**  

- 1/2-star reviews have more products per order  

## Product Price per Order

In [None]:
pb.configure(
    df = df_sales
    , metric = 'avg_products_price'
    , metric_label = 'Average Product Price in Order, R$'
    , metric_label_for_distribution = 'Product Price in Order, R$'
    , agg_func = 'mean'
    , title_base = 'Average Product Price in Order and Number of Sales'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}         
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(avg_products_price='Average Product Price in Order, R$')
    , title='Distribution of Average Product Price in Order'
    , upper_quantile=0.99
    , hist_mode='dual_hist_trim'
)

**Key Observations:**  

- 75% of orders have average product price ≤140 R$  
- 5% have ≥363 R$  

Let’s look by different dimensions.

**By Season**

Since 2018 has incomplete monthly data, it’s better to also analyze by year..

In [None]:
pb.bar_groupby(
    x='purchase_season'
    , color='purchase_year'
    , title='Average Product Price in Order by Season and Year'
)

**Key Observations:**  

- Summer/fall 2017 had higher product prices  
- Winter 2018 was higher  

**By Time of Day**

In [None]:
pb.histogram(
    color='purchase_time_of_day'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='purchase_time_of_day'
    , show_count=True
).show()

**Key Observations:**  

- Nighttime has lower product prices  

**By Whether the Order is Delayed or Not**

In [None]:
pb.histogram(
    color='is_delayed'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='is_delayed'
    , show_count=True
).show()

**Key Observations:**  

- Delayed orders have higher product prices  

**By Presence of Installment Payments**

In [None]:
pb.histogram(
    color='order_has_installment'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_has_installment'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Installment orders have significantly higher product prices  

**By Top Customer States**

In [None]:
pb.box(
    y='customer_state'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_state'
    , show_count=True
).show()

**Key Observations:**  

- Para has highest average product price among top states  
- São Paulo has lowest  

**By Top Customer Cities**

In [None]:
pb.box(
    y='customer_city'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_city'
    , show_count=True
).show()

**Key Observations:**  

- Brasília, Rio de Janeiro and Salvador have highest product prices among top cities.  

## Number of Sellers per Order

In [None]:
pb.configure(
    df = df_sales
    , metric = 'sellers_cnt'
    , metric_label = 'Average Number of Sellers in Order'
    , agg_func = 'mean'
    , axis_sort_order='descending'
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(sellers_cnt='Number of Sellers in Order')
    , title='Distribution of Number of Sellers in Order'
    , xaxis_type='category'
)

**Key Observations:**  

- 99% of orders have single seller  

## Number of Categories per Order

In [None]:
pb.configure(
    df = df_sales
    , metric = 'product_categories_cnt'
    , metric_label = 'Average Number of Categories in Order'
    , agg_func = 'mean'
    , axis_sort_order='descending'
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(product_categories_cnt='Number of Categories in Order')
    , title='Distribution of Number of Categories in Order'
    , xaxis_type='category'
)

**Key Observations:**  

- 99% of orders have single category  

In [None]:
%run ../../_post_run.ipynb