In [None]:
%run ../../_pre_run.ipynb

# Delivery Analysis

## Delivery Cost

In [None]:
pb.configure(
    df = df_sales
    , time_column = 'order_purchase_dt'
    , metric = 'total_freight_value'
    , metric_label = 'Average Freight Value per Order, R$'
    , metric_label_for_distribution = 'Freight Value per Order, R$'
    , agg_func = 'mean'
    , axis_sort_order='descending'
    , text_auto='.3s'
)

In [None]:
print(f'Average Freight Value per Order: {df_sales.total_freight_value.mean():.2f} R$')

Top Orders.

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    upper_quantile=0.95
    , hist_mode='dual_hist_trim'    
)

**Key Observations:**  

- 75% of orders have shipping costs ≤24 R$  
- Top 5% have shipping costs ≥54.7 R$  
- Several extreme outliers exist with very high shipping costs 

In [None]:
pb.metric_top(freq='D')

Let’s look by different dimensions.

**By Whether the Order is Delayed or Not**

In [None]:
pb.histogram(
    color='is_delayed'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
    , nbins=30
).show()
pb.bar_groupby(
    y='is_delayed'
    , show_count=True
).show()

**Key Observations:**  

- Delayed orders have higher shipping costs than non-delayed  

**By Order Weight Category**

In [None]:
pb.histogram(
    color='order_total_weight_cat'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=True
    , show_kde=False
    , nbins=30
).show()
pb.bar_groupby(
    y='order_total_weight_cat'
).show()

**Key Observations:**  

- Heavier orders have higher shipping costs (expected pattern)  

**By Presence of Installment Payments**

In [None]:
pb.histogram(
    color='order_has_installment'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=True
    , show_kde=False
    , nbins=30
).show()
pb.bar_groupby(
    y='order_has_installment'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Installment orders have higher shipping costs  

**By Top Customer States**

In [None]:
pb.box(
    y='customer_state'
    , upper_quantile=0.95
    , show_dual=True
).show()
fig = pb.bar_groupby(
    y='customer_state'
    , show_count=True
).update_layout(xaxis2_title_text='Number of Sales')
pb.to_slide(fig)
fig.show()

**Key Observations:**  

- Among top states by sales volume:  
  - São Paulo has lowest average shipping costs  
  - Maranhão has highest  

**By Top Customer Cities**

In [None]:
pb.box(
    y='customer_city'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_city'
    , show_count=True
).update_layout(xaxis2_title_text='Number of Sales')

**Key Observations:**  

- Among top cities by sales volume, highest average shipping costs in:  
  1. Salvador  
  2. Porto Alegre  
  3. Brasília  

**By Review Score**

In [None]:
pb.histogram(
    color='order_avg_reviews_score'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
    , nbins=30
).show()
fig = pb.bar_groupby(
    y='order_avg_reviews_score'
    , show_count=True
).update_layout(xaxis2_title_text='Number of Sales')
pb.to_slide(fig)
fig.show()

**Key Observations:**  

- Higher shipping costs correlate with lower order ratings  


## Distance Between Customer and Seller

In [None]:
pb.configure(
    df = df_sales
    , time_column = 'order_purchase_dt'
    , metric = 'avg_distance_km'
    , metric_label = 'Average Distance, km'
    , metric_label_for_distribution = 'Average Distance, km'
    , agg_func = 'mean'
    , axis_sort_order='descending'
    , text_auto='.1f'
)

In [None]:
print(f'Average Distance: {df_sales.avg_distance_km.mean():.2f} km')

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    upper_quantile=0.95
    , hist_mode='dual_hist_trim'    
)

**Key Observations:**  

- 75% of orders have seller-buyer distance ≤800km  
- 5% ≤16.5km  
- 5% ≥2,000km  
- Several extreme outliers (>4,000km)  

Let’s look by different dimensions.

**By Whether the Order is Delayed or Not**

In [None]:
pb.histogram(
    color='is_delayed'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
    , nbins=30
).show()
pb.bar_groupby(
    y='is_delayed'
    , to_slide=True
).show()

**Key Observations:**  

- Delayed orders have greater average seller-buyer distance  


**By Presence of Installment Payments**

In [None]:
pb.histogram(
    color='order_has_installment'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=True
    , show_kde=False
    , nbins=30
).show()
pb.bar_groupby(
    y='order_has_installment'
    , show_count=True
    , to_slide=True
)

**Key Observations:**  

- Installment orders have greater average seller-buyer distance  


## Delivery Time

### Proportion of Each Stage in Delivery Time

Let's look at what percentage of the total delivery time each stage occupies. 

We will not consider any anomalous dates, as there are only a few and they will not significantly affect the result.

In [None]:
tmp_df_sales = (
    df_sales[[
        'order_purchase_dt',
        'order_approved_dt',
        'order_delivered_carrier_dt',
        'order_delivered_customer_dt',
    ]]
    [lambda x: (x.order_delivered_customer_dt >= x.order_purchase_dt) & (x.order_approved_dt >= x.order_purchase_dt)
        & (x.order_delivered_carrier_dt >= x.order_approved_dt) & (x.order_delivered_customer_dt >= x.order_delivered_carrier_dt)
     ]
    .dropna()
)

In [None]:
tmp_df_sales['from_purchase_to_customer'] = (tmp_df_sales['order_delivered_customer_dt'] - tmp_df_sales['order_purchase_dt']).dt.total_seconds()
tmp_df_sales['From Purchase to Approved'] = (
    (tmp_df_sales['order_approved_dt'] - tmp_df_sales['order_purchase_dt']).dt.total_seconds() * 100 / tmp_df_sales['from_purchase_to_customer']
).round(2)
tmp_df_sales['From Approved to Carrier'] = (
    (tmp_df_sales['order_delivered_carrier_dt'] - tmp_df_sales['order_approved_dt']).dt.total_seconds() * 100 / tmp_df_sales['from_purchase_to_customer']
).round(2)
tmp_df_sales['From Carrier to Customer'] = (
    (tmp_df_sales['order_delivered_customer_dt'] - tmp_df_sales['order_delivered_carrier_dt']).dt.total_seconds() * 100 / tmp_df_sales['from_purchase_to_customer']
).round(2) 

In [None]:
tmp_df_sales = (
    tmp_df_sales[['order_purchase_dt', 'From Purchase to Approved', 'From Approved to Carrier', 'From Carrier to Customer']]
    .melt(id_vars = 'order_purchase_dt', var_name='Stage', value_name='Percent of All Delivery Time')
    .rename(columns={'order_purchase_dt': 'Date'})
)

Let's look at what percentage of the total delivery time each stage occupies on average.

In [None]:
sorted_means = tmp_df_sales.groupby('Stage')['Percent of All Delivery Time'].mean().sort_values(ascending=False)

In [None]:
annotations_data = [
    (0.6, -0.1, 'Carrier > Customer'),
    (-0.05, 0.8, 'Approved > Carrier'),
    (0.45, 1.08, 'Purchase > Approved')
]
fig = px.pie(
    values=sorted_means.values,
    names=sorted_means.index,
    title='Average Delivery Time Distribution by Stage',
    labels={'names': 'Delivery Stage', 'values': 'Percentage of Total Time'},
    category_orders={'names': ['From Carrier to Customer', 'From Approved to Carrier']},
    hole=0.4 
)
fig.update_traces(
    textinfo='percent',  
    textposition='inside', 
    texttemplate='%{percent:.1%}', 
    hovertemplate='%{label}: %{percent:.1%}', 
)
fig.update_layout(
    showlegend=False, 
    width=500,
    height=400,
    margin=dict(t=60),
    title_y=0.97
)
for x, y, text in annotations_data:
    fig.add_annotation(
        x=x,
        y=y,
        text=text,
        showarrow=False,
        font=dict(size=12)
    )
pb.to_slide(fig)
fig.show()

**Key Observations:**  

- Delivery time distribution:  
  - Payment approval: 4%  
  - Carrier handoff: 25.5%  
  - Carrier delivery: 70.5%  

Look at distribution.

In [None]:
tmp_df_sales.viz.box(
    x='Percent of All Delivery Time'
    , y='Stage'
    , title='Percent of All Delivery Time by Stage'
)

**Key Observations:**  

- Carrier delivery consumes most of total delivery time  
- Significant differences between stages (non-overlapping IQRs)  


### Total Delivery Time

In [None]:
pb.configure(
    df = df_sales
    , metric = 'delivery_time_days'
    , metric_label = 'Average Order Delivery Time, days'
    , metric_label_for_distribution = 'Order Delivery Time, days'
    , agg_func = 'mean'
    , title_base = 'Average Order Delivery Time and Number of Sales'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(delivery_time_days='Order Delivery Time, days')
    , title='Distribution of Order Delivery Time'
    , upper_quantile=0.99
    , hist_mode='dual_hist_trim'
)

**Key Observations:**  

- Median delivery time: ≥10 days  
- 75% deliver in ≥16 days  
- Top 5% take ≥30 days  

Let’s look by different dimensions.

**By Day of Week**

In [None]:
pb.histogram(
    color='purchase_weekday'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='purchase_weekday'
    , show_count=True
).show()

**Key Observations:**  

- Friday/Saturday orders have slightly longer delivery times  

**By Payment Category**

In [None]:
pb.histogram(
    color='order_total_payment_cat'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_total_payment_cat'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- More expensive orders take longer to deliver  

**By Order Weight Category**

In [None]:
pb.histogram(
    color='order_total_weight_cat'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_total_weight_cat'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Heavy orders take longer to deliver than light/medium  

**By Review Score**

In [None]:
pb.histogram(
    color='order_avg_reviews_score'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_avg_reviews_score'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- 1-star rated orders have noticeably longer delivery times  


**By Top Customer States**

In [None]:
pb.box(
    y='customer_state'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_state'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Among top states by sales volume, top 3 states with longest delivery times:  
  1. Pará  
  2. Maranhão  
  3. Ceará 

**By Top Customer Cities**

In [None]:
pb.box(
    y='customer_city'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_city'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Among top cities by sales volume, top 3 cities with longest delivery times:  
  1. Salvador  
  2. Porto Alegre  
  3. Rio de Janeiro   

### Delivery Delay

In [None]:
pb.configure(
    df = df_sales
    , metric = 'delivery_delay_days'
    , metric_label = 'Average Delivery Delay, days'
    , metric_label_for_distribution = 'Delivery Delay, days'
    , agg_func = 'mean'
    , title_base = 'Average Delivery Delay and Number of Sales'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(delivery_time_days='Delivery Delay, days')
    , title='Distribution of Delivery Delay'
    , lower_quantile=0.01
    , upper_quantile=0.99
    , hist_mode='dual_hist_trim'
)

**Key Observations:**  

- 75% of orders deliver ≥6 days early  
- ~5% are ≥4 days late 

**By Review Score**

In [None]:
pb.histogram(
    color='order_avg_reviews_score'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_avg_reviews_score'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Higher rated orders deliver earlier than estimated  

In [None]:
pb.box(
    y='customer_state'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_state'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Among top states by sales volume, top 3 states for early delivery:  
  1. Mato Grosso  
  2. Pará  
  3. Rio Grande do Sul  

### From Purchase to Approved Time

In [None]:
pb.configure(
    df = df_sales
    , metric = 'from_purchase_to_approved_hours'
    , metric_label = 'Average Order Processing Time, hour'
    , metric_label_for_distribution = 'Order Processing Time, hour'
    , agg_func = 'mean'
    , title_base = 'Average Order Processing Time and Number of Sales'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(from_purchase_to_approved_hours='Order Processing Time, hour')
    , title='Distribution of Order Processing Time'
    , upper_quantile=0.99
    , hist_mode='dual_hist_trim'
)

**Key Observations:**  

- 75% of orders take ≥14 hours to process  
- Top 5% take ≥48 hours  

Let’s look by different dimensions.

**By Day of Week**

In [None]:
pb.histogram(
    color='purchase_weekday'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='purchase_weekday'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Friday/Saturday orders process slowest  
- Wednesday orders process fastest  

**By Time of Day**

In [None]:
pb.histogram(
    color='purchase_time_of_day'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='purchase_time_of_day'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Nighttime orders take longer to process  

**By Whether the Order is Delayed**

In [None]:
pb.histogram(
    color='is_delayed'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='is_delayed'
    , show_count=True
).show()

**Key Observations:**  

- Non-delayed orders process faster (expected pattern)

**By Weekday vs Weekend**

In [None]:
pb.histogram(
    color='purchase_day_type'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='purchase_day_type'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Weekday orders process significantly faster than weekends  

**By Payment Category**

In [None]:
pb.histogram(
    color='order_total_payment_cat'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_total_payment_cat'
    , show_count=True
).show()

**Key Observations:**  

- Cheap/expensive orders process faster than mid-priced  

**By Order Weight Category**

In [None]:
pb.histogram(
    color='order_total_weight_cat'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_total_weight_cat'
    , show_count=True
).show()

**Key Observations:**  

- Heavy orders take longer to process  

**By Presence of Installment Payments**

In [None]:
pb.histogram(
    color='order_has_installment'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_has_installment'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Installment orders process much faster  

**By Review Score**

In [None]:
pb.histogram(
    color='order_avg_reviews_score'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_avg_reviews_score'
    , show_count=True
).show()

**Key Observations:**  

- 1/2-star rated orders took longer to process  

### From Approval to Carrier Time

In [None]:
pb.configure(
    df = df_sales
    , metric = 'from_approved_to_carrier_days'
    , metric_label = 'Average Order Approval to Carrier Time, days'
    , metric_label_for_distribution = 'Order Approval to Carrier Time, days'
    , agg_func = 'mean'
    , title_base = 'Average Order Approval to Carrier Time and Number of Sales'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(from_approved_to_carrier_days='Order Approval to Carrier Time, days')
    , title='Distribution of Order Approval to Carrier Time'
    , upper_quantile=0.99
    , hist_mode='dual_hist_trim'
)

**Key Observations:**  

- 75% of orders transfer to carrier within ≤3.5 days  
- Top 5% take ≥8 days  

Let’s look by different dimensions.

**By Day of Week**

In [None]:
pb.histogram(
    color='purchase_weekday'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='purchase_weekday'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Friday/Saturday orders take longest to transfer to carrier 

**By Time of Day**

In [None]:
pb.histogram(
    color='purchase_time_of_day'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='purchase_time_of_day'
    , show_count=True
).show()

**Key Observations:**  

- Morning orders transfer fastest to carrier   

**By Payment Category**

In [None]:
pb.histogram(
    color='order_total_payment_cat'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_total_payment_cat'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Expensive orders take longer to transfer to carrier  

**By Order Weight Category**

In [None]:
pb.histogram(
    color='order_total_weight_cat'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_total_weight_cat'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Heavy orders take longer to transfer to carrier     

**By Presence of Installment Payments**

In [None]:
pb.histogram(
    color='order_has_installment'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_has_installment'
    , show_count=True
).show()

**Key Observations:**  

- Installment orders take slightly longer to transfer to carrier 

**By Review Score**

In [None]:
pb.histogram(
    color='order_avg_reviews_score'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_avg_reviews_score'
    , show_count=True
).show()

**Key Observations:**  

- Faster carrier transfer correlates with higher ratings  

### Carrier Delivery Time

In [None]:
pb.configure(
    df = df_sales
    , metric = 'from_carrier_to_customer_days'
    , metric_label = 'Average Delivery Time from Carrier, days'
    , metric_label_for_distribution = 'Delivery Time from Carrier, days'
    , agg_func = 'mean'
    , title_base = 'Average Delivery Time from Carrier and Number of Sales'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    labels=dict(from_carrier_to_customer_days='Delivery Time from Carrier, days')
    , title='Distribution of Delivery Time from Carrier'
    , upper_quantile=0.99
    , hist_mode='dual_hist_trim'
)

**Key Observations:**  

- Median carrier delivery time: ≥7 days  
- 25% take ≥12 days  
- 5% take ≥24 days  

Let’s look by different dimensions.

**By Payment Category**

In [None]:
pb.histogram(
    color='order_total_payment_cat'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_total_payment_cat'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Cheap items deliver fastest via carrier  

**By Order Weight Category**

In [None]:
pb.histogram(
    color='order_total_weight_cat'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_total_weight_cat'
    , show_count=True
).show()

**Key Observations:**  

- Light items deliver slightly faster via carrier   

**By Presence of Installment Payments**

In [None]:
pb.histogram(
    color='order_has_installment'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_has_installment'
    , show_count=True
).show()

**Key Observations:**  

- Installment orders take slightly longer via carrier  

**By Review Score**

In [None]:
pb.histogram(
    color='order_avg_reviews_score'
    , upper_quantile=0.95
    , mode='dual_box_trim'
    , show_box=True
    , show_hist=False
    , show_kde=True
).show()
pb.bar_groupby(
    y='order_avg_reviews_score'
    , show_count=True
).show()

**Key Observations:**  

- Longer carrier delivery times correlate with lower ratings  

**By Top Customer States**

In [None]:
pb.box(
    y='customer_state'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_state'
    , show_count=True
    , to_slide=True
).show()

- Among top states by sales volume, top 3 states with longest carrier delivery:  
  1. Pará  
  2. Maranhão  
  3. Ceará  

**By Top Customer Cities**

In [None]:
pb.box(
    y='customer_city'
    , upper_quantile=0.95
    , show_dual=True
).show()
pb.bar_groupby(
    y='customer_city'
    , show_count=True
    , to_slide=True
).show()

**Key Observations:**  

- Among top cities by sales volume, top 3 cities with longest carrier delivery:  
  1. Salvador  
  2. Porto Alegre  
  3. Rio de Janeiro  

### Carrier Handoff Delay

In [None]:
pb.configure(
    df = df_sales
    , metric = 'avg_carrier_delivery_delay_days'
    , metric_label = 'Average Carrier Delivery Delay, days'
    , metric_label_for_distribution = 'Carrier Delivery Delay, days'
    , agg_func = 'mean'
    , title_base = 'Average Carrier Delivery Delay and Number of Sales'
    , axis_sort_order='descending'
    , text_auto='.3s'
    , update_fig={'xaxis2': {'title_text': 'Number of Sales'}}
)

Top Orders

In [None]:
pb.metric_top()

Let’s see at statistics and distribution of the metric.

In [None]:
pb.metric_info(
    lower_quantile=0.01
    , upper_quantile=0.99
    , hist_mode='dual_hist_trim'    
)

**Key Observations:**  

- 75% of orders transfer to carrier ≥1.6 days early  
- Extreme early transfers due to data anomalies  
- 5% are ≥0.79 days late  
- 1% are ≥7 days late  

Let’s look by different dimensions.

**By Review Score**

In [None]:
pb.bar_groupby(
    y='order_avg_reviews_score'
    , show_count=True
).show()

**Key Observations:**  

- Earlier carrier transfer correlates with higher ratings  

In [None]:
%run ../../_post_run.ipynb