by Lijing Liang 2 years ago
126
More like this
select distinct '{country}' as grass_region , cast(itemid as bigint) as item_id , grass_date FROM idecbi_bd.ec_shopee_bd_cmp_murah_lebay_rpt__product_label_1d where grass_date <= date'{grass_date}') , user_base_6 as ( select distinct buyer_id as userid ,'murah_lebay' as feature ,case when b.item_id is not null then 'Yes' else 'No' end as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live o join murah_lebay b on o.item_id = b.item_id and to_date(cast(create_datetime as timestamp)) = b.grass_date where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}'
idecbi_bd.ec_shopee_bd_cmp_murah_lebay_rpt__product_label_1d
select distinct buyer_id as userid ,'sarp' as feature --including both PV and FSV , 'Yes' as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live o join sarp b on o.item_id = b.item_id and o.shop_id = b.shop_id and to_date(cast(create_datetime as timestamp)) >= start_date and to_date(cast(create_datetime as timestamp)) <= end_date where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}'
Filter SARP
select distinct '{country}' as grass_region, cast(start_date as date) as start_date, cast(end_date as date) as end_date, cast(shop_id as string) shop_id, item_id from idecbi_bd.ec_shopee_bd_cmp_nug_exclusive_price_item_list a where status != 'Not Uploaded' and ingestion_timestamp = (SELECT MAX(ingestion_timestamp) col1 FROM idecbi_bd.ec_shopee_bd_cmp_nug_exclusive_price_item_list)
idecbi_bd.ec_shopee_bd_cmp_nug_exclusive_price_item_list
select distinct buyer_id as userid ,'referral' as feature --including both PV and FSV , case when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) = 0 then '0' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0 and 0.1 then '0.0 - 0.1' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.1 and 0.2 then '0.1 - 0.2' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.2 and 0.3 then '0.2 - 0.3' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.3 and 0.4 then '0.3 - 0.4' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.4 and 0.5 then '0.4 - 0.5' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.5 and 0.6 then '0.5 - 0.6' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.6 and 0.7 then '0.6 - 0.7' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.7 and 0.8 then '0.7 - 0.8' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.8 and 0.9 then '0.8 - 0.9' when count(distinct case when pv_promotion_id=1030523 or fsv_promotion_id=1030523 then order_id end)/count(distinct order_id) between 0.9 and 1 then '0.9 - 1.0' end as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live o where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}' group by 1,2
select distinct buyer_id as userid, 'low_price' as feature --including both PV and FSV , case when low_price_order_cnt / total_order_cnt = 0 then '0' when low_price_order_cnt / total_order_cnt between 0 and 0.1 then '0.0 - 0.1' when low_price_order_cnt / total_order_cnt between 0.1 and 0.2 then '0.1 - 0.2' when low_price_order_cnt / total_order_cnt between 0.2 and 0.3 then '0.2 - 0.3' when low_price_order_cnt / total_order_cnt between 0.3 and 0.4 then '0.3 - 0.4' when low_price_order_cnt / total_order_cnt between 0.4 and 0.5 then '0.4 - 0.5' when low_price_order_cnt / total_order_cnt between 0.5 and 0.6 then '0.5 - 0.6' when low_price_order_cnt / total_order_cnt between 0.6 and 0.7 then '0.6 - 0.7' when low_price_order_cnt / total_order_cnt between 0.7 and 0.8 then '0.7 - 0.8' when low_price_order_cnt / total_order_cnt between 0.8 and 0.9 then '0.8 - 0.9' when low_price_order_cnt / total_order_cnt between 0.9 and 1 then '0.9 - 1.0' end as feature_value from (select distinct buyer_id ,count(distinct case when b.item_id is not null then order_id end) as low_price_order_cnt ,count(distinct order_id) as total_order_cnt from mp_order.dwd_order_item_all_ent_df__reg_s0_live o left join low_price b on o.item_id = b.item_id and to_date(cast(create_datetime as timestamp)) >= b.start_date and to_date(cast(create_datetime as timestamp)) <= b.end_date where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}' group by 1)
with low_price as ( select distinct grass_region , item_id , date(start_date) as start_date , date(end_date) as end_date , 'low_price' as is_low_price from regmkt_general.in_app_lowprice_sku_daily_for_anlys_id)
regmkt_general.in_app_lowprice_sku_daily_for_anlys_id
SELECT distinct userid , feature , case when feature_value = 0 then '0' when feature_value > 0 and feature_value <= 0.1 then '0.0 - 0.1' when feature_value > 0.1 and feature_value <= 0.2 then '0.1 - 0.2' when feature_value > 0.2 and feature_value <= 0.3 then '0.2 - 0.3' when feature_value > 0.3 and feature_value <= 0.4 then '0.3 - 0.4' when feature_value > 0.4 and feature_value <= 0.5 then '0.4 - 0.5' when feature_value > 0.5 and feature_value <= 0.6 then '0.5 - 0.6' when feature_value > 0.6 and feature_value <= 0.7 then '0.6 - 0.7' when feature_value > 0.7 and feature_value <= 0.8 then '0.7 - 0.8' when feature_value > 0.8 and feature_value <= 0.9 then '0.8 - 0.9' when feature_value > 0.9 and feature_value <= 1.0 then '0.9 - 1.0' end as feature_value FROM (select buyer_id as userid , 'Net_checkout_with_voucher_l90' as feature , cast(voucher_used_net__base_checkout_cnt_90d as double) / cast(net__base_checkout_cnt_90d as double) as feature_value FROM mp_order.dws_buyer_net_gmv_nd__reg_s0_live WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' AND tz_type = 'local')
mp_order.dws_buyer_net_gmv_nd__reg_s0_live
na ( denominator is 0)
select distinct buyer_id as userid, 'exclusive_deals' as feature --including both PV and FSV ,case when b.item_id is not null then 'YES' else 'NO' end as feature_value from user_base a join ed_item b on a.item_id = b.item_id and a.shop_id = b.shop_id -- and to_date(cast(create_datetime as timestamp)) >= b.update_date where item_type = 'exclusive deal'
Filter users' usage
select distinct buyer_id as userid, 'free_gift' as feature ,case when b.item_id is not null then 'YES' else 'NO' end as feature_value from user_base a join ed_item b on a.item_id = b.item_id and a.shop_id = b.shop_id --and to_date(cast(create_datetime as timestamp)) >= b.update_date where item_type = 'free gift'
Item Base (Filter Free Gift / Exclusive Deals Items)
ed_item = spark.sql(""" with dp_items as ( select 'ID' as country, item_id, iss_item_id as itemid, model_id as modelid from digitalpurchase.shopee_ls_item_id_db__item_tab__reg_daily_s0_live ) select * from ( select distinct item.region as grass_region ,case when item.item_type = 1 then 'free gift' when item.item_type = 2 then 'exclusive deal' end as item_type ,case when dp_items.item_id is not null then 'DP' else 'MP' end as item_platform ,cast(item.shop_id as string) as shop_id ,case when dp_items.item_id is not null then dp_items.item_id else item.item_id end as item_id ,item.custom_name as item_cname from marketplace.shopee_welcome_package_db__welcome_package_item_tab__reg_daily_s0_live item left join dp_items on item.region = dp_items.country and dp_items.itemid =item.item_id where date(from_unixtime(case when item.region in ('ID','TH','VN') then item.create_time - 3600 when item.region in ('BR') then item.create_time - (11*3600) when item.region in ('MX') then item.create_time - (14*3600) else item.create_time end)) < current_date() --and item.item_status = 1 -- item.item_status = 1, this sku is active ) where item_platform = 'MP' and grass_region = '{country}' """.format(country = country)).cache().createOrReplaceTempView('ed_item')
User Base (Filter First Checkouts)
user_base = spark.sql(""" select distinct a.buyer_id , a.grass_region ,from_unixtime( case when a.grass_region in ('ID','TH','VN') then buyer_first_purchase_timestamp_td - 3600 when a.grass_region in ('BR') then buyer_first_purchase_timestamp_td - (11*3600) when a.grass_region in ('MX') then buyer_first_purchase_timestamp_td - (14*3600) else buyer_first_purchase_timestamp_td end) as buyer_first_purchase_time , buyer_first_purchase_base_checkout_id_list_td[0].base_checkout_id as base_checkout_id , b.order_id , b.item_id , b.shop_id from mp_order.dws_buyer_gmv_td__reg_s0_live a join mp_order.dwd_order_item_all_ent_df__reg_s0_live b on a.buyer_id = b.buyer_id and cast(buyer_first_purchase_base_checkout_id_list_td[0].base_checkout_id as string) = cast(b.base_checkout_id as string) and a.grass_region = b.grass_region where a.grass_region='{country}' and a.tz_type = 'local' and a.grass_date = date'{grass_date}' """.format(country=country, grass_date=grass_date)).cache().createOrReplaceTempView('user_base')
digitalpurchase.shopee_ls_item_id_db__item_tab__reg_daily_s0_live
marketplace.shopee_welcome_package_db__welcome_package_item_tab__reg_daily_s0_live
select distinct buyer_id as userid, 'nuz_voucher_fsv' as feature --including both PV and FSV ,case when b.promotionid is not null then 'Yes' else 'No' end as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live o join (select cast(trim(voucherid) as bigint) as promotionid from regmkt_general.shopee_regional_mkt_team__nuz_voucher_tbl -- week no is Monday and voucher at least will valid for 1 day in the week where cast(week_no as date) <= date'{grass_date}' ----Look at all the promotion id available before the snapshot date and grass_region = '{country}' ) b on o.fsv_promotion_id = b.promotionid where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}'
select distinct buyer_id as userid, 'nuz_vouche_pv' as feature --including both PV and FSV ,case when b.promotionid is not null then 'Yes' else 'No' end as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live o join (select cast(trim(voucherid) as bigint) as promotionid from regmkt_general.shopee_regional_mkt_team__nuz_voucher_tbl -- week no is Monday and voucher at least will valid for 1 day in the week where cast(week_no as date) <= date'{grass_date}' ----Look at all the promotion id available before the snapshot date and grass_region = '{country}' ) b on o.pv_promotion_id = b.promotionid where o.grass_region = '{country}' and tz_type = 'local' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}'
regmkt_general.shopee_regional_mkt_team__nuz_voucher_tbl
with monthly_dau as ( SELECT distinct user_id as userid ,grass_date as active_date from traffic.shopee_traffic_dws_platform_active_user_1d__reg_s1_live where grass_date between date'{grass_date}' - interval '30' day and date'{grass_date}' - interval '1' day and tz_type = 'local' and grass_region = '{country}' ) , user_base_21 as (select userid , feature , active_freq as feature_value from (select 'active_frequency' as feature , userid , count(distinct active_date) as active_freq from monthly_dau group by 1,2) )
0 - 30
select distinct user_id as userid , 'AX' as feature , 'YES' as feature_value from traffic.shopee_traffic_dws_platform_active_user_1d__reg_s1_live where grass_region = '{country}' and grass_date between date'{grass_date}' - interval 'X' day and date'{grass_date}' - interval '1' day and tz_type = 'local'
traffic.shopee_traffic_dws_platform_active_user_1d__reg_s1_live
select distinct user_id as userid , 'register_channel' as feature , case when register_channel is not null then register_channel else 'unknown' end as feature_value from mp_omclv.dwd_user_cat_register_install_gmv_event_df__reg_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'
select distinct user_id as userid , 'last_install_channel' as feature , case when last_install_channel is not null then last_install_channel else 'unknown' end as feature_value from mp_omclv.dwd_user_cat_register_install_gmv_event_df__reg_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'
select distinct user_id as userid , 'last_install_source' as feature , case when last_install_source is not null then last_install_source else 'unknown' end as feature_value from mp_omclv.dwd_user_cat_register_install_gmv_event_df__reg_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'
Refer to results
feature
select distinct userid, count(game_event_id) as feature_value --click-freq from game.game_mart_dwd_play_di where grass_date between date'{grass_date}' - interval '30' day and date'{grass_date}' and tz_type = 'local' and grass_region = '{country}' and game_event_id is not null group by 1
select distinct userid , sum(click_freq) as feature_value from home_clicks where target_type = 'search_bar' group by 1
select distinct userid ,sum(click_freq) as feature_value from home_clicks where page_section = 'shopee_live' AND target_type IN ('live_banner', 'see_more_link','see_more_card') group by 1
select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'digital_product' AND target_type IN ('digital_banner', 'digital_product','see_more_link') group by 1
select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'daily_discover' AND target_type IN ('item','similar_button', 'collection', 'cluster', 'tab', 'see_more_button') group by 1
select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'top_product' AND target_type IN ('cluster','see_more','see_more_card') group by 1
select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'main_category' AND target_type IN ('see_more_link', 'category') group by 1
select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'official_mall_new' AND target_type IN ( 'shop', 'banner', 'recommended_brand','see_more_card', 'see_more_link','info_button','shopee_mall_button') group by 1
select distinct userid , sum(click_freq) as feature_value from home_clicks where page_section = 'flash_sale' AND target_type IN ('item', 'see_all_link') group by 1
select distinct userid , sum(click_freq) as feature_value from home_clicks where target_type = 'home_circle' group by 1
select distinct userid , sum(click_freq) as feature_value from home_clicks where target_type = 'pop_up_banner' group by 1
Frequency Segments
30+
16-30
6-15
5
4
3
2
1
Frequency from 0 - ++
Table: dev_clv_calculation_daily.crm_feature_home_clicks_temp
select distinct userid ,'other_payment_methods' as feature ,case when others = 0 then '0' when others > 0 and others <= 0.1 then '0.0 - 0.1' when others > 0.1 and others <= 0.2 then '0.1 - 0.2' when others > 0.2 and others <= 0.3 then '0.2 - 0.3' when others > 0.3 and others <= 0.4 then '0.3 - 0.4' when others > 0.4 and others <= 0.5 then '0.4 - 0.5' when others > 0.5 and others <= 0.6 then '0.5 - 0.6' when others > 0.6 and others <= 0.7 then '0.6 - 0.7' when others > 0.7 and others <= 0.8 then '0.7 - 0.8' when others > 0.8 and others <= 0.9 then '0.8 - 0.9' when others > 0.9 and others <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Shopee_pay_later' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_pay_later ,cast (cast (count(case when payment_method = 'Shopee_Wallet' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_wallet ,cast (cast (count(case when payment_method = 'COD' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as cod ,cast (cast (count(case when payment_method = 'Others' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as others ,count(checkoutid) as total_checkout from payment group by 1 )
,user_base_13 as ( select distinct userid ,'shopee_credit' as feature ,case when others = 0 then '0' when shopee_credit > 0 and shopee_credit <= 0.1 then '0.0 - 0.1' when shopee_credit > 0.1 and shopee_credit <= 0.2 then '0.1 - 0.2' when shopee_credit > 0.2 and shopee_credit <= 0.3 then '0.2 - 0.3' when shopee_credit > 0.3 and shopee_credit <= 0.4 then '0.3 - 0.4' when shopee_credit > 0.4 and shopee_credit <= 0.5 then '0.4 - 0.5' when shopee_credit > 0.5 and shopee_credit <= 0.6 then '0.5 - 0.6' when shopee_credit > 0.6 and shopee_credit <= 0.7 then '0.6 - 0.7' when shopee_credit > 0.7 and shopee_credit <= 0.8 then '0.7 - 0.8' when shopee_credit > 0.8 and shopee_credit <= 0.9 then '0.8 - 0.9' when shopee_credit > 0.9 and shopee_credit <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Shopee_credit' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_credit from payment group by 1 ) )
,user_base_14 as ( select distinct userid ,'credit_card' as feature ,case when others = 0 then '0' when credit_card > 0 and credit_card <= 0.1 then '0.0 - 0.1' when credit_card > 0.1 and credit_card <= 0.2 then '0.1 - 0.2' when credit_card > 0.2 and credit_card <= 0.3 then '0.2 - 0.3' when credit_card > 0.3 and credit_card <= 0.4 then '0.3 - 0.4' when credit_card > 0.4 and credit_card <= 0.5 then '0.4 - 0.5' when credit_card > 0.5 and credit_card <= 0.6 then '0.5 - 0.6' when credit_card > 0.6 and credit_card <= 0.7 then '0.6 - 0.7' when credit_card > 0.7 and credit_card <= 0.8 then '0.7 - 0.8' when credit_card > 0.8 and credit_card <= 0.9 then '0.8 - 0.9' when credit_card > 0.9 and credit_card <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Credit_card' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as credit_card from payment group by 1 ) )
,user_base_15 as ( select distinct userid ,'bank_transfer' as feature ,case when others = 0 then '0' when bank_transfer > 0 and bank_transfer <= 0.1 then '0.0 - 0.1' when bank_transfer > 0.1 and bank_transfer <= 0.2 then '0.1 - 0.2' when bank_transfer > 0.2 and bank_transfer <= 0.3 then '0.2 - 0.3' when bank_transfer > 0.3 and bank_transfer <= 0.4 then '0.3 - 0.4' when bank_transfer > 0.4 and bank_transfer <= 0.5 then '0.4 - 0.5' when bank_transfer > 0.5 and bank_transfer <= 0.6 then '0.5 - 0.6' when bank_transfer > 0.6 and bank_transfer <= 0.7 then '0.6 - 0.7' when bank_transfer > 0.7 and bank_transfer <= 0.8 then '0.7 - 0.8' when bank_transfer > 0.8 and bank_transfer <= 0.9 then '0.8 - 0.9' when bank_transfer > 0.9 and bank_transfer <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Bank_transfer' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as bank_transfer from payment group by 1 ) )
select distinct userid ,'shopee_wallet' as feature ,case when shopee_wallet = 0 then '0' when shopee_wallet > 0 and shopee_wallet <= 0.1 then '0.0 - 0.1' when shopee_wallet > 0.1 and shopee_wallet <= 0.2 then '0.1 - 0.2' when shopee_wallet > 0.2 and shopee_wallet <= 0.3 then '0.2 - 0.3' when shopee_wallet > 0.3 and shopee_wallet <= 0.4 then '0.3 - 0.4' when shopee_wallet > 0.4 and shopee_wallet <= 0.5 then '0.4 - 0.5' when shopee_wallet > 0.5 and shopee_wallet <= 0.6 then '0.5 - 0.6' when shopee_wallet > 0.6 and shopee_wallet <= 0.7 then '0.6 - 0.7' when shopee_wallet > 0.7 and shopee_wallet <= 0.8 then '0.7 - 0.8' when shopee_wallet > 0.8 and shopee_wallet <= 0.9 then '0.8 - 0.9' when shopee_wallet > 0.9 and shopee_wallet <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Shopee_pay_later' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_pay_later ,cast (cast (count(case when payment_method = 'Shopee_Wallet' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_wallet ,cast (cast (count(case when payment_method = 'COD' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as cod ,cast (cast (count(case when payment_method = 'Others' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as others ,count(checkoutid) as total_checkout from payment group by 1 )
select distinct userid ,'shopee_pay_later' as feature ,case when shopee_pay_later = 0 then '0' when shopee_pay_later > 0 and shopee_pay_later <= 0.1 then '0.0 - 0.1' when shopee_pay_later > 0.1 and shopee_pay_later <= 0.2 then '0.1 - 0.2' when shopee_pay_later > 0.2 and shopee_pay_later <= 0.3 then '0.2 - 0.3' when shopee_pay_later > 0.3 and shopee_pay_later <= 0.4 then '0.3 - 0.4' when shopee_pay_later > 0.4 and shopee_pay_later <= 0.5 then '0.4 - 0.5' when shopee_pay_later > 0.5 and shopee_pay_later <= 0.6 then '0.5 - 0.6' when shopee_pay_later > 0.6 and shopee_pay_later <= 0.7 then '0.6 - 0.7' when shopee_pay_later > 0.7 and shopee_pay_later <= 0.8 then '0.7 - 0.8' when shopee_pay_later > 0.8 and shopee_pay_later <= 0.9 then '0.8 - 0.9' when shopee_pay_later > 0.9 and shopee_pay_later <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Shopee_pay_later' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_pay_later ,cast (cast (count(case when payment_method = 'Shopee_Wallet' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_wallet ,cast (cast (count(case when payment_method = 'COD' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as cod ,cast (cast (count(case when payment_method = 'Others' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as others ,count(checkoutid) as total_checkout from payment group by 1 )
select distinct userid ,'cod' as feature ,case when cod = 0 then '0' when cod > 0 and cod <= 0.1 then '0.0 - 0.1' when cod > 0.1 and cod <= 0.2 then '0.1 - 0.2' when cod > 0.2 and cod <= 0.3 then '0.2 - 0.3' when cod > 0.3 and cod <= 0.4 then '0.3 - 0.4' when cod > 0.4 and cod <= 0.5 then '0.4 - 0.5' when cod > 0.5 and cod <= 0.6 then '0.5 - 0.6' when cod > 0.6 and cod <= 0.7 then '0.6 - 0.7' when cod > 0.7 and cod <= 0.8 then '0.7 - 0.8' when cod > 0.8 and cod <= 0.9 then '0.8 - 0.9' when cod > 0.9 and cod <= 1.0 then '0.9 - 1.0' end as feature_value from ( select distinct buyer_id as userid ,cast (cast (count(case when payment_method = 'Shopee_pay_later' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_pay_later ,cast (cast (count(case when payment_method = 'Shopee_Wallet' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as shopee_wallet ,cast (cast (count(case when payment_method = 'COD' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as cod ,cast (cast (count(case when payment_method = 'Others' then checkoutid else null end) as double) / cast (count(checkoutid) as double) as decimal(10, 2)) as others ,count(checkoutid) as total_checkout from payment group by 1
na (denominator is 0)
0.9 - 1.0
0.8 - 0.9
0.7 - 0.8
0.6 - 0.7
0.5 - 0.6
0.4 - 0.5
0.3 - 0.4
0.2 - 0.3
0.1 - 0.2
0.0 - 0.1
0
select o.buyer_id as userid , 'Shopee_food_buyer' as feature , case when s.store_id is not null then 'Yes' else 'No' end as feature_value from shopeefood.fact_shopeefood_order_id_db__order_da o left join ( select store_id , store_name from shopeefood.fact_shopeefood_merchant_id_db__store_da where date(dt) = date'{grass_date}' and store_name not like '%[TEST]%') s on o.store_id = s.store_id where date(from_unixtime(o.create_time/1000-3600)) < date'{grass_date}' -- order date and order_status = 800 -- order is completed and date(dt) = date'{grass_date}'
shopeefood.fact_shopeefood_merchant_id_db__store_da
shopeefood.fact_shopeefood_order_id_db__order_da
select distinct buyer_id as userid , 'Mall_Buyer' as feature , 'Yes' as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live where grass_region = '{country}' and grass_date <= date'{grass_date}' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}' and is_official_shop = 1 and tz_type='local'
mp_order.dwd_order_item_all_ent_df__reg_s0_live
select userid ,feature ,case when b.feature_value is null then 'Others' else b.feature_value end feature_value from ( select distinct buyer_id as userid ,'mp_category' as feature , cast(split(most_purchased_global_be_cats, '"')[1] as string) as feature_value from mp_order.dws_buyer_purchase_category_td__reg_s0_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local' ) a left join ( select distinct feature_value ----get top 15 categories from ( select distinct feature ,feature_value ,rank() OVER (PARTITION BY feature ORDER BY user_cnt DESC) AS rn from ( select 'mp_category' as feature ,cast(split(most_purchased_global_be_cats,'"')[1] as string) as feature_value ,count(distinct buyer_id) as user_cnt from mp_order.dws_buyer_purchase_category_td__reg_s0_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local' group by 1, 2 ) ) where rn <= 15 ) b on a.feature_value = b.feature_value
mp_order.dws_buyer_purchase_category_td__reg_s0_live
Women Clothes
Women Bags
Tickets, Vouchers & Services
Sports & Outdoors
Muslim Fashion
Motorcycles
Mom & Baby
Mobile & Gadgets
Men Clothes
Home & Living
Health
Food & Beverages
Fashion Accessories
Beauty
Baby & Kids Fashion
select distinct buyer_id as userid , 'MP_Purchase_Level' as feature , case when completed_order_cnt_td = 0 then 'Non-mature' when completed_order_cnt_td = 1 then 'Mature-light' when completed_order_cnt_td between 2 and 4 then 'Mature-normal' when completed_order_cnt_td between 5 and 14 then 'Mature-heavy' when completed_order_cnt_td between 15 and 29 then 'Whale' when completed_order_cnt_td >= 30 then 'Super-whale' end as feature_value from mp_order.dws_buyer_gmv_td__reg_s0_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'
mp_order.dws_buyer_gmv_td__reg_s0_live
na
Super-whale
Whale
Mature-heavy
Mature-normal
Mature-light
Non-mature
SELECT distinct user_id as userid , 'sensitivity_cod_label' as feature , case when sensitivity_cod_label = 1 then 'YES' when sensitivity_cod_label = 0 then 'NO' end as feature_value FROM mpi_users.userdm_public_shopee_user_a180_tags WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' -- earliest date = 2022-04-07
No (0)
Yes (1)
SELECT distinct user_id as userid , 'Mum_Membership' as feature , 'YES' as feature_value from mp_user.dim_user_ext__reg_s0_live where grass_date = date'{grass_date}' and grass_region = '{country}' and array_contains(club_tags, 'mums club')
SELECT distinct buyer_id as userid , 'DP_User' as feature , 'YES' as feature_value FROM digitalpurchase.order_mart_dws_dp_buyer_gmv_td WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' AND tz_type = 'local'
digitalpurchase.order_mart_dws_dp_buyer_gmv_td
select distinct buyer_id as userid , 'Mart_Buyer' as feature , 'YES' as feature_value from mp_order.dwd_order_item_all_ent_df__reg_s0_live where grass_region = '{country}' and grass_date <= date'{grass_date}' and to_date(cast(create_datetime as timestamp)) < date'{grass_date}' and is_supermarket_shop = 1 and tz_type='local'
Subtopic
(newly added) Count Order_id
SELECT distinct user_id as userid , 'shopee_rewards_tier' as feature , loyalty_tier_name as feature_value FROM mp_user.dim_user_ext__reg_s0_live WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' and loyalty_tier_name is not null
mp_user.dim_user_ext__reg_s0_live
Gold
Silver
Classic
Platinum
grass_region
grass_date
select distinct user_id as userid , 'mobile_device_price_range' as feature , case when last_used_property_price_sgd/10000 between 0 and 99 then '0-100' when last_used_property_price_sgd/10000 between 100 and 199 then '100-200' when last_used_property_price_sgd/10000 between 200 and 299 then '200-300' when last_used_property_price_sgd/10000 between 300 and 399 then '300-400' when last_used_property_price_sgd/10000 between 400 and 499 then '400 above' else 'Others' end as feature_value from mp_omclv.dwd_user_cat_register_install_gmv_event_df__reg_live where tz_type = 'local' and grass_region = '{country}' and grass_date = date'{grass_date}'
mp_omclv.dwd_user_cat_register_install_gmv_event_df__reg_live
400+
300-400
200-300
100-200
0-100
SELECT distinct user_id as userid , 'device_brand' as feature , case when last_activity_device_brand_td like '%vivo%' then 'vivo' when last_activity_device_brand_td like '%xiaomi%' then 'xiaomi' when last_activity_device_brand_td like '%samsung%' then 'samsung' when last_activity_device_brand_td like '%apple%' then 'apple' when last_activity_device_brand_td like '%realme%' then 'realme' when last_activity_device_brand_td like '%infinix%' then 'infinix' when last_activity_device_brand_td like '%asus%' then 'asus' when last_activity_device_brand_td like '%oppo%' then 'oppo' else 'others' end as feature_value from mp_user.ads_activeness_user_activity_td__reg_s0_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'
others
oppo
asus
infinix
realme
apple
samsung
vivo
SELECT distinct user_id as userid , 'os_platform' as feature , case when last_activity_platform_td is not null then last_activity_platform_td when last_activity_platform_td is null then 'unknown' end as feature_value from mp_user.ads_activeness_user_activity_td__reg_s0_live where grass_region = '{country}' and grass_date = date'{grass_date}' and tz_type = 'local'
mp_user.ads_activeness_user_activity_td__reg_s0_live
pc_web
android_web_lite
ios_web_lite
andorid_web
ios_web
android
ios
SELECT distinct user_id as userid ,'has_kids' as feature , case when haskid_predict_label = 1 then 'YES' when haskid_predict_label = 0 then 'NO' end as feature_value from mpi_users.userdm_public_shopee_user_a180_tags where grass_region = '{country}' and grass_date = date'{grass_date}' and haskid_predict_label >= 0
No
Yes
SELECT distinct user_id as userid , 'language' as feature , case when app_language in ('id') then 'id' when app_language in ('en','en-my','sg') then 'en' else 'Others' end as feature_value FROM mp_user.dim_user__reg_s0_live WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' and tz_type = 'local'
EN
ID
SELECT distinct user_id as userid , 'age_group' as feature , case when age_predict_label = 1 then '18-24' when age_predict_label = 2 then '25-34' when age_predict_label = 3 then '35-44' when age_predict_label = 4 then '45+' end as feature_value from mpi_users.userdm_public_shopee_user_a180_tags where grass_region = '{country}' and grass_date = date'{grass_date}' and age_predict_label is not null
45+
25-44
25-34
18-24
select distinct user_id as userid , 'income_level' as feature , income_lvl_predict_label as feature_value from mpi_users.userdm_public_shopee_user_a180_tags where grass_region = '{country}' and grass_date = date'{grass_date}' and income_lvl_predict_label is not null
mpi_users.userdm_public_shopee_user_a180_tags
(4) 1000+ (million idr per month)
(3) 500-1000
(2) 280-500
(1) 150-280
(0) 0-150
SELECT distinct user_id as userid ,'gender' as feature , case when gender = 1 then 'Male' when gender = 2 then 'Female' when gender = 3 then 'Male' when gender = 4 then 'Female' end as feature_value FROM mp_user.dim_user__reg_s0_live WHERE grass_region = '{country}' AND grass_date = date'{grass_date}' AND tz_type = 'local' AND gender is not null
Male (Male + Predicted Male)
Female (Female + Predicted Female)
SELECT distinct user_id as userid ,'address_city' as feature , default_delivery_address_city as feature_value FROM mp_user.dim_user__reg_s0_live a WHERE a.grass_region = '{country}' AND a.grass_date = date'{grass_date}' and tz_type = 'local' and default_delivery_address_city is not null
SELECT distinct user_id as userid ,'city_tier' as feature , case when b.city_tier is not null then b.city_tier else 'Others' end as feature_value FROM mp_user.dim_user__reg_s0_live a left join idecbi_mkt.ec_shopee_mkt_growth_tactical_dim__city_tiering b on a.default_delivery_address_city = b.city WHERE a.grass_region = '{country}' AND a.grass_date = date'{grass_date}' and tz_type = 'local' and default_delivery_address_city is not null
idecbi_mkt.ec_shopee_mkt_growth_tactical_dim__city_tiering
mp_user.dim_user__reg_s0_live
unknown
Others
City Tier 4 - Undeveloped Cities
City Tier 3 - Underdeveloped Cities
City Tier 2 - Developing Cities
City Tier 1 - Developed Cities