Luokat: Kaikki - categories - crm - frequency

jonka Lijing Liang 2 vuotta sitten

118

CRM_Full_Feature_Workflow

The document outlines a comprehensive workflow for analyzing user interactions on a home page, specifically focusing on click events. It uses SQL queries to extract and aggregate data on user clicks within different page sections such as main categories, flash sales, and official mall segments.

CRM_Full_Feature_Workflow

CRM_Full_Feature_Workflow

Incentive

dev_clv_calculation_daily.crm_in_app_voucher_usage_feature_daily_snapshot
'/projects/clv_calculation_daily/hdfs/dev/lijing_test/clv_feature_breakdown_analysis/user_feature_snapshot_from_in_app_voucher_usage'
murah_lebay (only ID)

select distinct '{country}' as grass_region , cast(itemid as bigint) as item_id , grass_date FROM idecbi_bd.ec_shopee_bd_cmp_murah_lebay_rpt__product_label_1d where grass_date <= date'{grass_date}') , user_base_6 as ( select distinct buyer_id as userid ,'murah_lebay' as feature ,case when b.item_id is not null then 'Yes' else 'No' end as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live o join murah_lebay b on o.item_id = b.item_id and to_date(cast(create_datetime as timestamp)) = b.grass_date where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}'

idecbi_bd.ec_shopee_bd_cmp_murah_lebay_rpt__product_label_1d

sarp

select distinct buyer_id as userid ,'sarp' as feature --including both PV and FSV , 'Yes' as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live o join sarp b on o.item_id = b.item_id and o.shop_id = b.shop_id and to_date(cast(create_datetime as timestamp)) >= start_date and to_date(cast(create_datetime as timestamp)) <= end_date where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}'

Filter SARP

select distinct '{country}' as grass_region, cast(start_date as date) as start_date, cast(end_date as date) as end_date, cast(shop_id as string) shop_id, item_id from idecbi_bd.ec_shopee_bd_cmp_nug_exclusive_price_item_list a where status != 'Not Uploaded' and ingestion_timestamp = (SELECT MAX(ingestion_timestamp) col1 FROM idecbi_bd.ec_shopee_bd_cmp_nug_exclusive_price_item_list)

idecbi_bd.ec_shopee_bd_cmp_nug_exclusive_price_item_list

referral

select distinct buyer_id as userid ,'referral' as feature --including both PV and FSV , case when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) = 0 then '0' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0 and 0.1 then '0.0 - 0.1' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.1 and 0.2 then '0.1 - 0.2' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.2 and 0.3 then '0.2 - 0.3' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.3 and 0.4 then '0.3 - 0.4' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.4 and 0.5 then '0.4 - 0.5' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.5 and 0.6 then '0.5 - 0.6' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.6 and 0.7 then '0.6 - 0.7' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.7 and 0.8 then '0.7 - 0.8' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.8 and 0.9 then '0.8 - 0.9' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.9 and 1 then '0.9 - 1.0' end as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live o where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}' group by 1,2

low_price

select distinct buyer_id as userid, 'low_price' as feature --including both PV and FSV , case when low_price_order_cnt / total_order_cnt = 0 then '0' when low_price_order_cnt / total_order_cnt between 0 and 0.1 then '0.0 - 0.1' when low_price_order_cnt / total_order_cnt between 0.1 and 0.2 then '0.1 - 0.2' when low_price_order_cnt / total_order_cnt between 0.2 and 0.3 then '0.2 - 0.3' when low_price_order_cnt / total_order_cnt between 0.3 and 0.4 then '0.3 - 0.4' when low_price_order_cnt / total_order_cnt between 0.4 and 0.5 then '0.4 - 0.5' when low_price_order_cnt / total_order_cnt between 0.5 and 0.6 then '0.5 - 0.6' when low_price_order_cnt / total_order_cnt between 0.6 and 0.7 then '0.6 - 0.7' when low_price_order_cnt / total_order_cnt between 0.7 and 0.8 then '0.7 - 0.8' when low_price_order_cnt / total_order_cnt between 0.8 and 0.9 then '0.8 - 0.9' when low_price_order_cnt / total_order_cnt between 0.9 and 1 then '0.9 - 1.0' end as feature_value from (select distinct buyer_id ,count(distinct case when b.item_id is not null then order_id end) as low_price_order_cnt ,count(distinct order_id) as total_order_cnt from mp_order.dwd_order_item_all_ent_df__reg_s0_live o left join low_price b on o.item_id = b.item_id and to_date(cast(create_datetime as timestamp)) >= b.start_date and to_date(cast(create_datetime as timestamp)) <= b.end_date where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}' group by 1)

with low_price as ( select distinct grass_region , item_id , date(start_date) as start_date , date(end_date) as end_date , 'low_price' as is_low_price from regmkt_general.in_app_lowprice_sku_daily_for_anlys_id)

regmkt_general.in_app_lowprice_sku_daily_for_anlys_id

Net_checkout_with_voucher_l90

SELECT distinct userid , feature , case when feature_value = 0 then '0' when feature_value > 0 and feature_value <= 0.1 then '0.0 - 0.1' when feature_value > 0.1 and feature_value <= 0.2 then '0.1 - 0.2' when feature_value > 0.2 and feature_value <= 0.3 then '0.2 - 0.3' when feature_value > 0.3 and feature_value <= 0.4 then '0.3 - 0.4' when feature_value > 0.4 and feature_value <= 0.5 then '0.4 - 0.5' when feature_value > 0.5 and feature_value <= 0.6 then '0.5 - 0.6' when feature_value > 0.6 and feature_value <= 0.7 then '0.6 - 0.7' when feature_value > 0.7 and feature_value <= 0.8 then '0.7 - 0.8' when feature_value > 0.8 and feature_value <= 0.9 then '0.8 - 0.9' when feature_value > 0.9 and feature_value <= 1.0 then '0.9 - 1.0' end as feature_value FROM (select buyer_id as userid , 'Net_checkout_with_voucher_l90' as feature , cast(voucher_used_net__base_checkout_cnt_90d as double) / cast(net__base_checkout_cnt_90d as double) as feature_value FROM mp_order.dws_buyer_net_gmv_nd__reg_s0_live WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' AND tz_type = 'local')

mp_order.dws_buyer_net_gmv_nd__reg_s0_live

na ( denominator is 0)

nuz_exclusive_deals (only ID)

select distinct buyer_id as userid, 'exclusive_deals' as feature --including both PV and FSV ,case when b.item_id is not null then 'YES' else 'NO' end as feature_value from user_base a join ed_item b on a.item_id = b.item_id and a.shop_id = b.shop_id -- and to_date(cast(create_datetime as timestamp)) >= b.update_date where item_type = 'exclusive deal'

nuz_free_gift

Filter users' usage

select distinct buyer_id as userid, 'free_gift' as feature ,case when b.item_id is not null then 'YES' else 'NO' end as feature_value from user_base a join ed_item b on a.item_id = b.item_id and a.shop_id = b.shop_id --and to_date(cast(create_datetime as timestamp)) >= b.update_date where item_type = 'free gift'

Item Base (Filter Free Gift / Exclusive Deals Items)

ed_item = spark.sql(""" with dp_items as ( select 'ID' as country, item_id, iss_item_id as itemid, model_id as modelid from digitalpurchase.shopee_ls_item_id_db__item_tab__reg_daily_s0_live ) select * from ( select distinct item.region as grass_region ,case when item.item_type = 1 then 'free gift' when item.item_type = 2 then 'exclusive deal' end as item_type ,case when dp_items.item_id is not null then 'DP' else 'MP' end as item_platform ,cast(item.shop_id as string) as shop_id ,case when dp_items.item_id is not null then dp_items.item_id else item.item_id end as item_id ,item.custom_name as item_cname from marketplace.shopee_welcome_package_db__welcome_package_item_tab__reg_daily_s0_live item left join dp_items on item.region = dp_items.country and dp_items.itemid =item.item_id where date(from_unixtime(case when item.region in ('ID','TH','VN') then item.create_time - 3600 when item.region in ('BR') then item.create_time - (11*3600) when item.region in ('MX') then item.create_time - (14*3600) else item.create_time end)) < current_date() --and item.item_status = 1 -- item.item_status = 1, this sku is active ) where item_platform = 'MP' and grass_region = '{country}' """.format(country = country)).cache().createOrReplaceTempView('ed_item')

User Base (Filter First Checkouts)

user_base = spark.sql(""" select distinct a.buyer_id , a.grass_region ,from_unixtime( case when a.grass_region in ('ID','TH','VN') then buyer_first_purchase_timestamp_td - 3600 when a.grass_region in ('BR') then buyer_first_purchase_timestamp_td - (11*3600) when a.grass_region in ('MX') then buyer_first_purchase_timestamp_td - (14*3600) else buyer_first_purchase_timestamp_td end) as buyer_first_purchase_time , buyer_first_purchase_base_checkout_id_list_td[0].base_checkout_id as base_checkout_id , b.order_id , b.item_id , b.shop_id from mp_order.dws_buyer_gmv_td__reg_s0_live a join mp_order.dwd_order_item_all_ent_df__reg_s0_live b on a.buyer_id = b.buyer_id and cast(buyer_first_purchase_base_checkout_id_list_td[0].base_checkout_id as string) = cast(b.base_checkout_id as string) and a.grass_region = b.grass_region where a.grass_region='{country}' and a.tz_type = 'local' and a.grass_date = date'{grass_date}' """.format(country=country, grass_date=grass_date)).cache().createOrReplaceTempView('user_base')

digitalpurchase.shopee_ls_item_id_db__item_tab__reg_daily_s0_live

marketplace.shopee_welcome_package_db__welcome_package_item_tab__reg_daily_s0_live

nuz_voucher_fsv

select distinct buyer_id as userid, 'nuz_voucher_fsv' as feature --including both PV and FSV ,case when b.promotionid is not null then 'Yes' else 'No' end as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live o join (select cast(trim(voucherid) as bigint) as promotionid from regmkt_general.shopee_regional_mkt_team__nuz_voucher_tbl -- week no is Monday and voucher at least will valid for 1 day in the week where cast(week_no as date) <= date'{grass_date}' ----Look at all the promotion id available before the snapshot date and grass_region = '{country}' ) b on o.fsv_promotion_id = b.promotionid where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}'

nuz_vouche_pv

select distinct buyer_id as userid, 'nuz_vouche_pv' as feature --including both PV and FSV ,case when b.promotionid is not null then 'Yes' else 'No' end as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live o join (select cast(trim(voucherid) as bigint) as promotionid from regmkt_general.shopee_regional_mkt_team__nuz_voucher_tbl -- week no is Monday and voucher at least will valid for 1 day in the week where cast(week_no as date) <= date'{grass_date}' ----Look at all the promotion id available before the snapshot date and grass_region = '{country}' ) b on o.pv_promotion_id = b.promotionid where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}'

regmkt_general.shopee_regional_mkt_team__nuz_voucher_tbl

Engagement

Active Frequency

with monthly_dau as ( SELECT distinct user_id as userid ,grass_date as active_date from traffic.shopee_traffic_dws_platform_active_user_1d__reg_s1_live where grass_date between date'{grass_date}' - interval '30' day and date'{grass_date}' - interval '1' day and tz_type = 'local' and grass_region = '{country}' ) , user_base_21 as (select userid , feature , active_freq as feature_value from (select 'active_frequency' as feature , userid , count(distinct active_date) as active_freq from monthly_dau group by 1,2) )

0 - 30

A1 / A7 / A14 / A30 / A60

select distinct user_id as userid , 'AX' as feature , 'YES' as feature_value from traffic.shopee_traffic_dws_platform_active_user_1d__reg_s1_live where grass_region = '{country}' and grass_date between date'{grass_date}' - interval 'X' day and date'{grass_date}' - interval '1' day and tz_type = 'local'

traffic.shopee_traffic_dws_platform_active_user_1d__reg_s1_live

Acquisition

register_channel

select distinct user_id as userid , 'register_channel' as feature , case when register_channel is not null then register_channel else 'unknown' end as feature_value from mp_omclv.dwd_user_cat_register_install_gmv_event_df__reg_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'

last_install_channel

select distinct user_id as userid , 'last_install_channel' as feature , case when last_install_channel is not null then last_install_channel else 'unknown' end as feature_value from mp_omclv.dwd_user_cat_register_install_gmv_event_df__reg_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'

last_install_source

select distinct user_id as userid , 'last_install_source' as feature , case when last_install_source is not null then last_install_source else 'unknown' end as feature_value from mp_omclv.dwd_user_cat_register_install_gmv_event_df__reg_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'

Refer to results

In-app feature

Partition

feature

dev_clv_crm.crm_in_app_traffic_feature_daily_snapshot
all_game_play

select distinct userid, count(game_event_id) as feature_value --click-freq from game.game_mart_dwd_play_di where grass_date between date'{grass_date}' - interval '30' day and date'{grass_date}' and tz_type = 'local' and grass_region = '{country}' and game_event_id is not null group by 1

search_bar

select distinct userid , sum(click_freq) as feature_value from home_clicks where target_type = 'search_bar' group by 1

shopee_live

select distinct userid ,sum(click_freq) as feature_value from home_clicks where page_section = 'shopee_live' AND target_type IN ('live_banner', 'see_more_link','see_more_card') group by 1

digital_products

select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'digital_product' AND target_type IN ('digital_banner', 'digital_product','see_more_link') group by 1

daily_discover

select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'daily_discover' AND target_type IN ('item','similar_button', 'collection', 'cluster', 'tab', 'see_more_button') group by 1

top_product

select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'top_product' AND target_type IN ('cluster','see_more','see_more_card') group by 1

categories

select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'main_category' AND target_type IN ('see_more_link', 'category') group by 1

shopee_mall

select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'official_mall_new' AND target_type IN ( 'shop', 'banner', 'recommended_brand','see_more_card', 'see_more_link','info_button','shopee_mall_button') group by 1

flash_sale

select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'flash_sale' AND target_type IN ('item', 'see_all_link') group by 1

home_banner

select distinct userid , sum(click_freq) as feature_value from home_clicks where target_type = 'home_circle' group by 1

pop_up

select distinct userid , sum(click_freq) as feature_value from home_clicks where target_type = 'pop_up_banner' group by 1

Frequency Segments

30+

16-30

6-15

5

4

3

2

1

Frequency from 0 - ++

Main Query to get home_page clicks
WITH home_clicks as ( select distinct userid , page_section[0] as page_section , target_type , count(distinct event_id) as click_freq from traffic.shopee_traffic_dwd_click_hi__reg_s1_live where grass_region = '{country}' and utc_date = date'{grass_date}' and page_type = 'home' and operation = 'click' and userid > 0 group by 1,2,3 ) select distinct * from home_clicks a

Table: dev_clv_calculation_daily.crm_feature_home_clicks_temp

Payment

other_payment_methods

select distinct userid ,'other_payment_methods' as feature ,case when others = 0 then '0' when others > 0 and others <= 0.1 then '0.0 - 0.1' when others > 0.1 and others <= 0.2 then '0.1 - 0.2' when others > 0.2 and others <= 0.3 then '0.2 - 0.3' when others > 0.3 and others <= 0.4 then '0.3 - 0.4' when others > 0.4 and others <= 0.5 then '0.4 - 0.5' when others > 0.5 and others <= 0.6 then '0.5 - 0.6' when others > 0.6 and others <= 0.7 then '0.6 - 0.7' when others > 0.7 and others <= 0.8 then '0.7 - 0.8' when others > 0.8 and others <= 0.9 then '0.8 - 0.9' when others > 0.9 and others <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Shopee_pay_later' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_pay_later ,cast (cast (count(case when payment_method = 'Shopee_Wallet' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_wallet ,cast (cast (count(case when payment_method = 'COD' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as cod ,cast (cast (count(case when payment_method = 'Others' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as others ,count(checkoutid) as total_checkout from payment group by 1 )

shopee_credit

,user_base_13 as ( select distinct userid ,'shopee_credit' as feature ,case when others = 0 then '0' when shopee_credit > 0 and shopee_credit <= 0.1 then '0.0 - 0.1' when shopee_credit > 0.1 and shopee_credit <= 0.2 then '0.1 - 0.2' when shopee_credit > 0.2 and shopee_credit <= 0.3 then '0.2 - 0.3' when shopee_credit > 0.3 and shopee_credit <= 0.4 then '0.3 - 0.4' when shopee_credit > 0.4 and shopee_credit <= 0.5 then '0.4 - 0.5' when shopee_credit > 0.5 and shopee_credit <= 0.6 then '0.5 - 0.6' when shopee_credit > 0.6 and shopee_credit <= 0.7 then '0.6 - 0.7' when shopee_credit > 0.7 and shopee_credit <= 0.8 then '0.7 - 0.8' when shopee_credit > 0.8 and shopee_credit <= 0.9 then '0.8 - 0.9' when shopee_credit > 0.9 and shopee_credit <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Shopee_credit' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_credit from payment group by 1 ) )

credit_card

,user_base_14 as ( select distinct userid ,'credit_card' as feature ,case when others = 0 then '0' when credit_card > 0 and credit_card <= 0.1 then '0.0 - 0.1' when credit_card > 0.1 and credit_card <= 0.2 then '0.1 - 0.2' when credit_card > 0.2 and credit_card <= 0.3 then '0.2 - 0.3' when credit_card > 0.3 and credit_card <= 0.4 then '0.3 - 0.4' when credit_card > 0.4 and credit_card <= 0.5 then '0.4 - 0.5' when credit_card > 0.5 and credit_card <= 0.6 then '0.5 - 0.6' when credit_card > 0.6 and credit_card <= 0.7 then '0.6 - 0.7' when credit_card > 0.7 and credit_card <= 0.8 then '0.7 - 0.8' when credit_card > 0.8 and credit_card <= 0.9 then '0.8 - 0.9' when credit_card > 0.9 and credit_card <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Credit_card' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as credit_card from payment group by 1 ) )

bank_transfer

,user_base_15 as ( select distinct userid ,'bank_transfer' as feature ,case when others = 0 then '0' when bank_transfer > 0 and bank_transfer <= 0.1 then '0.0 - 0.1' when bank_transfer > 0.1 and bank_transfer <= 0.2 then '0.1 - 0.2' when bank_transfer > 0.2 and bank_transfer <= 0.3 then '0.2 - 0.3' when bank_transfer > 0.3 and bank_transfer <= 0.4 then '0.3 - 0.4' when bank_transfer > 0.4 and bank_transfer <= 0.5 then '0.4 - 0.5' when bank_transfer > 0.5 and bank_transfer <= 0.6 then '0.5 - 0.6' when bank_transfer > 0.6 and bank_transfer <= 0.7 then '0.6 - 0.7' when bank_transfer > 0.7 and bank_transfer <= 0.8 then '0.7 - 0.8' when bank_transfer > 0.8 and bank_transfer <= 0.9 then '0.8 - 0.9' when bank_transfer > 0.9 and bank_transfer <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Bank_transfer' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as bank_transfer from payment group by 1 ) )

shopee_wallet

select distinct userid ,'shopee_wallet' as feature ,case when shopee_wallet = 0 then '0' when shopee_wallet > 0 and shopee_wallet <= 0.1 then '0.0 - 0.1' when shopee_wallet > 0.1 and shopee_wallet <= 0.2 then '0.1 - 0.2' when shopee_wallet > 0.2 and shopee_wallet <= 0.3 then '0.2 - 0.3' when shopee_wallet > 0.3 and shopee_wallet <= 0.4 then '0.3 - 0.4' when shopee_wallet > 0.4 and shopee_wallet <= 0.5 then '0.4 - 0.5' when shopee_wallet > 0.5 and shopee_wallet <= 0.6 then '0.5 - 0.6' when shopee_wallet > 0.6 and shopee_wallet <= 0.7 then '0.6 - 0.7' when shopee_wallet > 0.7 and shopee_wallet <= 0.8 then '0.7 - 0.8' when shopee_wallet > 0.8 and shopee_wallet <= 0.9 then '0.8 - 0.9' when shopee_wallet > 0.9 and shopee_wallet <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Shopee_pay_later' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_pay_later ,cast (cast (count(case when payment_method = 'Shopee_Wallet' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_wallet ,cast (cast (count(case when payment_method = 'COD' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as cod ,cast (cast (count(case when payment_method = 'Others' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as others ,count(checkoutid) as total_checkout from payment group by 1 )

shopee_pay_later

select distinct userid ,'shopee_pay_later' as feature ,case when shopee_pay_later = 0 then '0' when shopee_pay_later > 0 and shopee_pay_later <= 0.1 then '0.0 - 0.1' when shopee_pay_later > 0.1 and shopee_pay_later <= 0.2 then '0.1 - 0.2' when shopee_pay_later > 0.2 and shopee_pay_later <= 0.3 then '0.2 - 0.3' when shopee_pay_later > 0.3 and shopee_pay_later <= 0.4 then '0.3 - 0.4' when shopee_pay_later > 0.4 and shopee_pay_later <= 0.5 then '0.4 - 0.5' when shopee_pay_later > 0.5 and shopee_pay_later <= 0.6 then '0.5 - 0.6' when shopee_pay_later > 0.6 and shopee_pay_later <= 0.7 then '0.6 - 0.7' when shopee_pay_later > 0.7 and shopee_pay_later <= 0.8 then '0.7 - 0.8' when shopee_pay_later > 0.8 and shopee_pay_later <= 0.9 then '0.8 - 0.9' when shopee_pay_later > 0.9 and shopee_pay_later <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Shopee_pay_later' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_pay_later ,cast (cast (count(case when payment_method = 'Shopee_Wallet' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_wallet ,cast (cast (count(case when payment_method = 'COD' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as cod ,cast (cast (count(case when payment_method = 'Others' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as others ,count(checkoutid) as total_checkout from payment group by 1 )

COD

select distinct userid ,'cod' as feature ,case when cod = 0 then '0' when cod > 0 and cod <= 0.1 then '0.0 - 0.1' when cod > 0.1 and cod <= 0.2 then '0.1 - 0.2' when cod > 0.2 and cod <= 0.3 then '0.2 - 0.3' when cod > 0.3 and cod <= 0.4 then '0.3 - 0.4' when cod > 0.4 and cod <= 0.5 then '0.4 - 0.5' when cod > 0.5 and cod <= 0.6 then '0.5 - 0.6' when cod > 0.6 and cod <= 0.7 then '0.6 - 0.7' when cod > 0.7 and cod <= 0.8 then '0.7 - 0.8' when cod > 0.8 and cod <= 0.9 then '0.8 - 0.9' when cod > 0.9 and cod <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Shopee_pay_later' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_pay_later ,cast (cast (count(case when payment_method = 'Shopee_Wallet' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_wallet ,cast (cast (count(case when payment_method = 'COD' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as cod ,cast (cast (count(case when payment_method = 'Others' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as others ,count(checkoutid) as total_checkout from payment group by 1

na (denominator is 0)

0.9 - 1.0

0.8 - 0.9

0.7 - 0.8

0.6 - 0.7

0.5 - 0.6

0.4 - 0.5

0.3 - 0.4

0.2 - 0.3

0.1 - 0.2

0.0 - 0.1

0

Main Query
, payment as ( select distinct to_date(cast(create_datetime as TIMESTAMP)) as grass_date ,buyer_id ,cast(base_checkout_id as string) as checkoutid ,case when payment_method = 'PAY_COD' then 'COD' when payment_method = 'PAY_SHOPEE_WALLET_V2' or payment_method = 'PAY_SHOPEE_WALLET' or payment_method = 'PAY_SELLER_WALLET' then 'Shopee_wallet' when payment_method = 'PAY_SHOPEE_CREDIT' then 'Shopee_credit' when payment_method = 'PAY_BANK_TRANSFER' then 'Bank_transfer' when payment_method like '%_CC%' then 'Credit_card' when payment_method = 'PAY_CYBERSOURCE' and payment_be_channel_id not in (120630 ,130630 ,140630 ,40000 ,20000 ,70000 ,80000 ,30000 ,50000 ,10000 ,20200 ,50200 ,10200) then 'Credit_card' else 'Others' end as payment_method from mp_order.dwd_order_item_all_ent_df__reg_s0_live where grass_region = '{country}' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}' and tz_type = 'local' )

Consumption

dev_clv_calculation_daily.crm_in_app_purchase_feature_daily_snapshot
'/projects/clv_calculation_daily/hdfs/dev/lijing_test/clv_feature_breakdown_analysis/user_feature_snapshot_from_in_app_purchase_behaviour'
Shopee_food_buyer

select o.buyer_id as userid , 'Shopee_food_buyer' as feature , case when s.store_id is not null then 'Yes' else 'No' end as feature_value from shopeefood.fact_shopeefood_order_id_db__order_da o left join ( select store_id , store_name from shopeefood.fact_shopeefood_merchant_id_db__store_da where date(dt) = date'{grass_date}' and store_name not like '%[TEST]%') s on o.store_id = s.store_id where date(from_unixtime(o.create_time/1000-3600)) < date'{grass_date}' -- order date and order_status = 800 -- order is completed and date(dt) = date'{grass_date}'

shopeefood.fact_shopeefood_merchant_id_db__store_da

shopeefood.fact_shopeefood_order_id_db__order_da

Mall_Buyer

select distinct buyer_id as userid , 'Mall_Buyer' as feature , 'Yes' as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live where grass_region = '{country}' and grass_date <= date'{grass_date}' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}' and is_official_shop = 1 and tz_type='local'

mp_order.dwd_order_item_all_ent_df__reg_s0_live

mp_category

select userid ,feature ,case when b.feature_value is null then 'Others' else b.feature_value end feature_value from ( select distinct buyer_id as userid ,'mp_category' as feature , cast(split(most_purchased_global_be_cats, '"')[1] as string) as feature_value from mp_order.dws_buyer_purchase_category_td__reg_s0_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local' ) a left join ( select distinct feature_value ----get top 15 categories from ( select distinct feature ,feature_value ,rank() OVER (PARTITION BY feature ORDER BY user_cnt DESC) AS rn from ( select 'mp_category' as feature ,cast(split(most_purchased_global_be_cats,'"')[1] as string) as feature_value ,count(distinct buyer_id) as user_cnt from mp_order.dws_buyer_purchase_category_td__reg_s0_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local' group by 1, 2 ) ) where rn <= 15 ) b on a.feature_value = b.feature_value

mp_order.dws_buyer_purchase_category_td__reg_s0_live

Women Clothes

Women Bags

Tickets, Vouchers & Services

Sports & Outdoors

Muslim Fashion

Motorcycles

Mom & Baby

Mobile & Gadgets

Men Clothes

Home & Living

Health

Food & Beverages

Fashion Accessories

Beauty

Baby & Kids Fashion

MP_Purchase_Level (Halted)

select distinct buyer_id as userid , 'MP_Purchase_Level' as feature , case when completed_order_cnt_td = 0 then 'Non-mature' when completed_order_cnt_td = 1 then 'Mature-light' when completed_order_cnt_td between 2 and 4 then 'Mature-normal' when completed_order_cnt_td between 5 and 14 then 'Mature-heavy' when completed_order_cnt_td between 15 and 29 then 'Whale' when completed_order_cnt_td >= 30 then 'Super-whale' end as feature_value from mp_order.dws_buyer_gmv_td__reg_s0_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'

mp_order.dws_buyer_gmv_td__reg_s0_live

na

Super-whale

Whale

Mature-heavy

Mature-normal

Mature-light

Non-mature

sensitivity_cod_level

SELECT distinct user_id as userid , 'sensitivity_cod_label' as feature , case when sensitivity_cod_label = 1 then 'YES' when sensitivity_cod_label = 0 then 'NO' end as feature_value FROM mpi_users.userdm_public_shopee_user_a180_tags WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' -- earliest date = 2022-04-07

Categroies

No (0)

Yes (1)

Mum_Membership

SELECT distinct user_id as userid , 'Mum_Membership' as feature , 'YES' as feature_value from mp_user.dim_user_ext__reg_s0_live where grass_date = date'{grass_date}' and grass_region = '{country}' and array_contains(club_tags, 'mums club')

DP_Buyer

SELECT distinct buyer_id as userid , 'DP_User' as feature , 'YES' as feature_value FROM digitalpurchase.order_mart_dws_dp_buyer_gmv_td WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' AND tz_type = 'local'

digitalpurchase.order_mart_dws_dp_buyer_gmv_td

Mart_Buyer

select distinct buyer_id as userid , 'Mart_Buyer' as feature , 'YES' as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live where grass_region = '{country}' and grass_date <= date'{grass_date}' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}' and is_supermarket_shop = 1 and tz_type='local'

Subtopic

(newly added) Count Order_id

shopee_rewards_tier (Halted)

SELECT distinct user_id as userid , 'shopee_rewards_tier' as feature , loyalty_tier_name as feature_value FROM mp_user.dim_user_ext__reg_s0_live WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' and loyalty_tier_name is not null

mp_user.dim_user_ext__reg_s0_live

Gold

Silver

Classic

Platinum

Demographic Features

partition

grass_region

grass_date

dev_clv_calculation_daily.crm_user_feature_daily_snapshot
HDFS
/projects/clv_calculation_daily/hdfs/dev/lijing_test/clv_feature_breakdown_analysis/user_feature_snapshot_from_diverse_sources
mobile_device_price_range

select distinct user_id as userid , 'mobile_device_price_range' as feature , case when last_used_property_price_sgd/10000 between 0 and 99 then '0-100' when last_used_property_price_sgd/10000 between 100 and 199 then '100-200' when last_used_property_price_sgd/10000 between 200 and 299 then '200-300' when last_used_property_price_sgd/10000 between 300 and 399 then '300-400' when last_used_property_price_sgd/10000 between 400 and 499 then '400 above' else 'Others' end as feature_value from mp_omclv.dwd_user_cat_register_install_gmv_event_df__reg_live where tz_type = 'local' and grass_region = '{country}' and grass_date = date'{grass_date}'

mp_omclv.dwd_user_cat_register_install_gmv_event_df__reg_live

400+

300-400

200-300

100-200

0-100

device_brand

SELECT distinct user_id as userid , 'device_brand' as feature , case when last_activity_device_brand_td like '%vivo%' then 'vivo' when last_activity_device_brand_td like '%xiaomi%' then 'xiaomi' when last_activity_device_brand_td like '%samsung%' then 'samsung' when last_activity_device_brand_td like '%apple%' then 'apple' when last_activity_device_brand_td like '%realme%' then 'realme' when last_activity_device_brand_td like '%infinix%' then 'infinix' when last_activity_device_brand_td like '%asus%' then 'asus' when last_activity_device_brand_td like '%oppo%' then 'oppo' else 'others' end as feature_value from mp_user.ads_activeness_user_activity_td__reg_s0_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'

others

oppo

asus

infinix

realme

apple

samsung

vivo

os_platform

SELECT distinct user_id as userid , 'os_platform' as feature , case when last_activity_platform_td is not null then last_activity_platform_td when last_activity_platform_td is null then 'unknown' end as feature_value from mp_user.ads_activeness_user_activity_td__reg_s0_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'

mp_user.ads_activeness_user_activity_td__reg_s0_live

pc_web

android_web_lite

ios_web_lite

andorid_web

ios_web

android

ios

has_kids

SELECT distinct user_id as userid ,'has_kids' as feature , case when haskid_predict_label = 1 then 'YES' when haskid_predict_label = 0 then 'NO' end as feature_value from mpi_users.userdm_public_shopee_user_a180_tags where grass_region = '{country}' and grass_date = date'{grass_date}' and haskid_predict_label >= 0

Table

No

Yes

language

SELECT distinct user_id as userid , 'language' as feature , case when app_language in ('id') then 'id' when app_language in ('en','en-my','sg') then 'en' else 'Others' end as feature_value FROM mp_user.dim_user__reg_s0_live WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' and tz_type = 'local'

EN

ID

age_group

SELECT distinct user_id as userid , 'age_group' as feature , case when age_predict_label = 1 then '18-24' when age_predict_label = 2 then '25-34' when age_predict_label = 3 then '35-44' when age_predict_label = 4 then '45+' end as feature_value from mpi_users.userdm_public_shopee_user_a180_tags where grass_region = '{country}' and grass_date = date'{grass_date}' and age_predict_label is not null

45+

25-44

25-34

18-24

income level

select distinct user_id as userid , 'income_level' as feature , income_lvl_predict_label as feature_value from mpi_users.userdm_public_shopee_user_a180_tags where grass_region = '{country}' and grass_date = date'{grass_date}' and income_lvl_predict_label is not null

mpi_users.userdm_public_shopee_user_a180_tags

(4) 1000+ (million idr per month)

(3) 500-1000

(2) 280-500

(1) 150-280

(0) 0-150

gender

SELECT distinct user_id as userid ,'gender' as feature , case when gender = 1 then 'Male' when gender = 2 then 'Female' when gender = 3 then 'Male' when gender = 4 then 'Female' end as feature_value FROM mp_user.dim_user__reg_s0_live WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' AND tz_type = 'local' AND gender is not null

Male (Male + Predicted Male)

Female (Female + Predicted Female)

city_tier & address_city
Query

SELECT distinct user_id as userid ,'address_city' as feature , default_delivery_address_city as feature_value FROM mp_user.dim_user__reg_s0_live a WHERE a.grass_region = '{country}' AND a.grass_date = date'{grass_date}' and tz_type = 'local' and default_delivery_address_city is not null

SELECT distinct user_id as userid ,'city_tier' as feature , case when b.city_tier is not null then b.city_tier else 'Others' end as feature_value FROM mp_user.dim_user__reg_s0_live a left join idecbi_mkt.ec_shopee_mkt_growth_tactical_dim__city_tiering b on a.default_delivery_address_city = b.city WHERE a.grass_region = '{country}' AND a.grass_date = date'{grass_date}' and tz_type = 'local' and default_delivery_address_city is not null

Tables

idecbi_mkt.ec_shopee_mkt_growth_tactical_dim__city_tiering

mp_user.dim_user__reg_s0_live

Categories

unknown

Others

City Tier 4 - Undeveloped Cities

City Tier 3 - Underdeveloped Cities

City Tier 2 - Developing Cities

City Tier 1 - Developed Cities