 • #分析|数据科学类
• #工作信息
• #求职
• #找工就业
• #facebook

# 面试‌‌‌‌‌‌‍‍‌‍‍‌‌‍‌‍‌‍‌‌‍‌‌‌‌‌‌‌‍‌‌‌前的SQL Window Function 总结复习篇 nanokk
2727
4

1. Calculate Revenue: 100*(m1-m0)/m0

2. Running Total

3. Duplicate data

4. Top N

5. Repeat Customer Behaviour

1.Calculate Revenue: 100*(m1-m0)/m0

WITH

monthly_revenue as (

SELECT

date_trunc(‘month’,datetime)::date as month,

state,

sum(amount) as revenue

FROM orders

GROUP BY 1,2

)

,prev_month_revenue as (

SELECT *,

lag(revenue) over (partition by state order by month) as prev_month_revenue

FROM monthly_revenue

)

SELECT *,

round(100.0*(revenue-prev_month_revenue)/prev_month_revenue,1) as revenue_growth

FROM prev_month_revenue

ORDER BY 2,1

2. Running Total:

WITH

monthly_revenue as (

SELECT

date_trunc(‘month’,datetime)::date as month,

sum(amount) as revenue

FROM orders

GROUP BY 1

)

SELECT *,

sum(revenue) over (order by month rows between unbounded preceding and current row) as running_total

FROM monthly_revenue

ORDER BY 1

Some other related Window Function：

SELECT

*

,sum(amount) over () as amount_total

,sum(amount) over (order by order_id rows between unbounded preceding and current row) as running_sum

,sum(amount) over (partition by customer_id order by datetime rows between unbounded preceding and current row) as running_sum_by_customer

,avg(amount) over (order by datetime rows between 5 preceding and current row) as trailing_avg

FROM orders

ORDER BY 1

3.duplicate data

SELECT *

FROM (

SELECT *,

row_number() over (partition by order_id order by datetime desc)

FROM orders

)

WHERE row_number=1

4. Top N rows in every group

WITH

orders_ranked as (

SELECT

date_trunc(‘month’,datetime)::date as month,

*,

row_number() over (partition by date_trunc(‘month’,datetime) order by amount desc, datetime)

FROM orders_cleaned

)

SELECT *

FROM orders_ranked

WHERE row_number<=2

ORDER BY 1

5. Repeat purchase behavior

What is the repeat purchase rate and the typical difference between the first order and the second order amount? That would be expressed as:

WITH

customer_orders as (

SELECT *,

row_number() over (partition by customer_id order by datetime) as customer_order_n,

lag(amount) over (partition by customer_id order by datetime) as prev_order_amount

FROM orders_cleaned

)

SELECT

round(100.0*sum(case when customer_order_n=2 then 1 end)/count(distinct customer_id),1) as repeat_purchases,

avg(case when customer_order_n=2 then 1.0*amount/prev_order_amount end) as revenue_expansion

FROM customer_orders
4条回复