SQL CTE (Common Table Expression) 高级用法与最佳实践_MsSqlserver

cte (common table expression) 详解

基础概念

定义

cte（common table expression，公用表表达式）是sql中的"命名临时结果集"，通过 with 关键字定义，仅在当前查询中生效。

核心作用：

简化复杂查询：将复杂逻辑分解为多个步骤
提高可读性：使sql语句更易理解和维护
复用子查询结果：避免重复计算相同的子查询

本质特性

非物理存储：不是物理表，不存储在磁盘上
临时性：查询执行过程中生成的虚拟结果集
作用域限制：仅在定义它的查询语句中有效
自动销毁：查询结束后自动清理

cte 主要特点

临时结果集：只在查询执行期间存在
可引用性：可以在主查询中多次引用
可读性强：比嵌套子查询更易理解
递归支持：支持递归查询（mysql 8.0+）

基本语法结构

with cte_name [(column_list)] as (
    -- cte定义查询
    select ...
)
-- 主查询
select ... from cte_name ...;

cte类型详解

非递归cte（普通cte）

特点

使用 with 关键字
单一子查询，不引用自身
一次性执行，结果供主查询使用

基础示例

-- 示例1：计算订单统计信息
with order_stats as (
    select 
        avg(amount) as avg_amount,
        max(amount) as max_amount,
        count(*) as total_orders
    from orders
    where order_date >= '2024-01-01'
)
select 
    o.order_id,
    o.amount,
    os.avg_amount,
    case 
        when o.amount > os.avg_amount then '高于平均'
        else '低于平均'
    end as amount_category
from orders o
cross join order_stats os
where o.order_date >= '2024-01-01';

多个cte示例

-- 示例2：多个cte协同工作
with 
high_value_customers as (
    select customer_id, sum(amount) as total_spent
    from orders
    group by customer_id
    having sum(amount) > 10000
),
recent_orders as (
    select customer_id, count(*) as recent_order_count
    from orders
    where order_date >= current_date - interval '30 days'
    group by customer_id
)
select 
    c.customer_name,
    hvc.total_spent,
    coalesce(ro.recent_order_count, 0) as recent_orders
from customers c
join high_value_customers hvc on c.customer_id = hvc.customer_id
left join recent_orders ro on c.customer_id = ro.customer_id
order by hvc.total_spent desc;

递归cte

核心结构

递归cte必须包含两个部分：

锚点成员（anchor member）：递归的起始点，非递归查询
递归成员（recursive member）：引用cte自身的查询

执行逻辑

执行锚点成员，获得初始结果集
递归成员使用当前结果集查询新数据
将新结果添加到结果集中
重复步骤2-3，直到递归成员返回空结果
返回完整的结果集

基础递归示例

-- 示例1：生成数字序列
with recursive number_series as (
    -- 锚点成员：起始值
    select 1 as n
    union all
    -- 递归成员：递增逻辑
    select n + 1
    from number_series
    where n < 10  -- 终止条件
)
select * from number_series;

树形结构查询示例

-- 示例2：组织架构查询（查找某员工及其所有下属）
with recursive employee_hierarchy as (
    -- 锚点成员：指定的管理者
    select 
        employee_id,
        employee_name,
        manager_id,
        0 as level,
        cast(employee_name as varchar(1000)) as path
    from employees
    where employee_id = 1001  -- 起始员工id
    union all
    -- 递归成员：查找下属
    select 
        e.employee_id,
        e.employee_name,
        e.manager_id,
        eh.level + 1,
        cast(eh.path || ' -> ' || e.employee_name as varchar(1000))
    from employees e
    join employee_hierarchy eh on e.manager_id = eh.employee_id
    where eh.level < 5  -- 防止无限递归
)
select 
    employee_id,
    employee_name,
    level,
    path as hierarchy_path
from employee_hierarchy
order by level, employee_name;

向上追溯示例

-- 示例3：向上追溯管理链
with recursive management_chain as (
    -- 锚点成员：指定员工
    select 
        employee_id,
        employee_name,
        manager_id,
        0 as level_up
    from employees
    where employee_id = 2001  -- 起始员工
    union all
    -- 递归成员：查找上级管理者
    select 
        e.employee_id,
        e.employee_name,
        e.manager_id,
        mc.level_up + 1
    from employees e
    join management_chain mc on e.employee_id = mc.manager_id
)
select * from management_chain order by level_up;

语法与执行机制

postgresql cte执行机制

物化控制

postgresql提供了对cte物化的精确控制：

-- 强制物化（默认行为）
with cte_name as materialized (
    select expensive_calculation() from large_table
)
select * from cte_name 
union all 
select * from cte_name;  -- 复用已计算的结果
-- 禁止物化（内联优化）
with cte_name as not materialized (
    select * from small_table where condition
)
select * from cte_name where additional_condition;

执行计划分析

-- 查看cte执行计划
explain (analyze, buffers) 
with sales_summary as (
    select 
        product_id,
        sum(quantity) as total_quantity,
        sum(amount) as total_amount
    from sales
    where sale_date >= '2024-01-01'
    group by product_id
)
select 
    p.product_name,
    ss.total_quantity,
    ss.total_amount
from products p
join sales_summary ss on p.product_id = ss.product_id;

递归cte的终止机制

自动终止条件

递归成员返回空结果集
达到系统递归深度限制
满足用户定义的终止条件

防止无限递归的策略

-- 策略1：使用计数器限制递归深度
with recursive limited_recursion as (
    select id, parent_id, name, 0 as depth
    from categories
    where parent_id is null
    union all
    select c.id, c.parent_id, c.name, lr.depth + 1
    from categories c
    join limited_recursion lr on c.parent_id = lr.id
    where lr.depth < 10  -- 限制最大深度
)
select * from limited_recursion;
-- 策略2：使用路径检测避免循环
with recursive path_tracking as (
    select 
        id, 
        parent_id, 
        name,
        array[id] as path
    from categories
    where parent_id is null
    union all
    select 
        c.id, 
        c.parent_id, 
        c.name,
        pt.path || c.id
    from categories c
    join path_tracking pt on c.parent_id = pt.id
    where not (c.id = any(pt.path))  -- 避免循环
)
select * from path_tracking;

性能考虑与优化

cte vs 子查询性能对比

何时使用cte

-- ✅ 推荐：需要多次引用相同结果时
with expensive_calc as (
    select 
        customer_id,
        complex_calculation(data) as result
    from large_table
    where complex_condition
)
select c1.customer_id, c1.result, c2.result
from expensive_calc c1
join expensive_calc c2 on c1.customer_id = c2.customer_id + 1;
-- ❌ 不推荐：简单的一次性查询
select * from (
    select * from small_table where simple_condition
) subquery;

性能优化技巧

1. 合理使用索引

-- 确保递归cte中的连接字段有索引
create index idx_categories_parent_id on categories(parent_id);
create index idx_employees_manager_id on employees(manager_id);
-- 在递归查询中使用索引友好的条件
with recursive category_tree as (
    select id, parent_id, name, 0 as level
    from categories
    where id = 1  -- 使用主键，利用主键索引
    union all
    select c.id, c.parent_id, c.name, ct.level + 1
    from categories c
    join category_tree ct on c.parent_id = ct.id  -- 利用外键索引
    where ct.level < 5
)
select * from category_tree;

2. 控制递归深度

-- 设置合理的递归深度限制
set max_stack_depth = '2mb';  -- postgresql
-- 或在查询中使用where条件限制深度

3. 优化数据类型和字段选择

-- ✅ 只选择必要的字段
with recursive slim_hierarchy as (
    select id, parent_id, level  -- 只选择必要字段
    from categories
    where parent_id is null
    union all
    select c.id, c.parent_id, sh.level + 1
    from categories c
    join slim_hierarchy sh on c.parent_id = sh.id
    where sh.level < 10
)
select sh.id, sh.level, c.name  -- 在最后再join获取详细信息
from slim_hierarchy sh
join categories c on sh.id = c.id;

内存使用优化

-- 大数据量递归查询的分批处理
with recursive batch_process as (
    select id, parent_id, name, 0 as level, 0 as batch_num
    from categories
    where parent_id is null
    union all
    select c.id, c.parent_id, c.name, bp.level + 1, 
           case when bp.level % 1000 = 0 then bp.batch_num + 1 
                else bp.batch_num end
    from categories c
    join batch_process bp on c.parent_id = bp.id
    where bp.level < 10000 and bp.batch_num < 10
)
select * from batch_process;

跨数据库支持

主流数据库cte支持对比

数据库	非递归cte	递归cte	关键差异	版本要求
postgresql	✅	✅ (`with recursive`)	标准实现，支持物化控制	8.4+
mysql	✅	✅ (`with recursive`)	8.0后支持，语法与postgresql一致	8.0+
sql server	✅	✅ (`with`)	递归不需要recursive关键字	2005+
oracle	✅	✅ (`with`)	支持子查询因子化	9i+
sqlite	✅	✅ (`with recursive`)	轻量实现	3.8.3+

数据库特定语法示例

sql server

-- sql server递归cte（无需recursive关键字）
with employee_cte as (
    -- 锚点成员
    select employee_id, manager_id, employee_name, 0 as level
    from employees
    where manager_id is null
    union all
    -- 递归成员
    select e.employee_id, e.manager_id, e.employee_name, ec.level + 1
    from employees e
    inner join employee_cte ec on e.manager_id = ec.employee_id
)
select * from employee_cte
option (maxrecursion 100);  -- sql server特有的递归限制语法

oracle

-- oracle的cte（子查询因子化）
with 
sales_data as (
    select product_id, sum(amount) as total_sales
    from sales
    where sale_date >= date '2024-01-01'
    group by product_id
),
product_info as (
    select product_id, product_name, category_id
    from products
)
select pi.product_name, sd.total_sales
from product_info pi
join sales_data sd on pi.product_id = sd.product_id
order by sd.total_sales desc;

mysql 8.0+

-- mysql递归cte
with recursive fibonacci as (
    select 0 as n, 0 as fib_n, 1 as fib_n_plus_1
    union all
    select n + 1, fib_n_plus_1, fib_n + fib_n_plus_1
    from fibonacci
    where n < 20
)
select n, fib_n from fibonacci;

兼容性处理策略

旧版本mysql替代方案

-- mysql 5.x 使用临时表替代cte
-- 替代普通cte
create temporary table temp_order_stats as
select avg(amount) as avg_amount from orders;
select o.*, t.avg_amount
from orders o
cross join temp_order_stats t
where o.amount > t.avg_amount;
drop temporary table temp_order_stats;
-- 替代递归cte（使用存储过程）
delimiter //
create procedure getemployeehierarchy(in root_id int)
begin
    create temporary table temp_hierarchy (
        employee_id int,
        level int
    );
    insert into temp_hierarchy values (root_id, 0);
    set @level = 0;
    while row_count() > 0 and @level < 10 do
        insert into temp_hierarchy
        select e.employee_id, @level + 1
        from employees e
        join temp_hierarchy th on e.manager_id = th.employee_id
        where th.level = @level;
        set @level = @level + 1;
    end while;
    select * from temp_hierarchy;
    drop temporary table temp_hierarchy;
end //
delimiter ;

实际应用场景

1. 数据分析与报表

销售漏斗分析

with sales_funnel as (
    select 
        'leads' as stage,
        count(*) as count,
        1 as stage_order
    from leads
    where created_date >= '2024-01-01'
    union all
    select 
        'qualified leads' as stage,
        count(*) as count,
        2 as stage_order
    from leads
    where status = 'qualified' and created_date >= '2024-01-01'
    union all
    select 
        'opportunities' as stage,
        count(*) as count,
        3 as stage_order
    from opportunities
    where created_date >= '2024-01-01'
    union all
    select 
        'closed won' as stage,
        count(*) as count,
        4 as stage_order
    from opportunities
    where status = 'won' and created_date >= '2024-01-01'
),
funnel_with_conversion as (
    select 
        stage,
        count,
        stage_order,
        lag(count) over (order by stage_order) as previous_count,
        case 
            when lag(count) over (order by stage_order) > 0 
            then round(count::decimal / lag(count) over (order by stage_order) * 100, 2)
            else 100.0
        end as conversion_rate
    from sales_funnel
)
select 
    stage,
    count,
    conversion_rate || '%' as conversion_rate
from funnel_with_conversion
order by stage_order;

同期群分析（cohort analysis）

with customer_cohorts as (
    select 
        customer_id,
        date_trunc('month', min(order_date)) as cohort_month
    from orders
    group by customer_id
),
customer_activities as (
    select 
        cc.cohort_month,
        date_trunc('month', o.order_date) as activity_month,
        count(distinct o.customer_id) as active_customers
    from customer_cohorts cc
    join orders o on cc.customer_id = o.customer_id
    group by cc.cohort_month, date_trunc('month', o.order_date)
),
cohort_table as (
    select 
        cohort_month,
        activity_month,
        active_customers,
        extract(epoch from (activity_month - cohort_month)) / (30 * 24 * 60 * 60) as month_number
    from customer_activities
)
select 
    cohort_month,
    month_number,
    active_customers,
    first_value(active_customers) over (
        partition by cohort_month 
        order by month_number
    ) as cohort_size,
    round(
        active_customers::decimal / 
        first_value(active_customers) over (
            partition by cohort_month 
            order by month_number
        ) * 100, 2
    ) as retention_rate
from cohort_table
order by cohort_month, month_number;

2. 层级数据处理

权限系统递归查询

-- 查询用户的所有有效权限（包括继承的权限）
with recursive user_permissions as (
    -- 直接权限
    select 
        up.user_id,
        up.permission_id,
        p.permission_name,
        'direct' as permission_source,
        0 as inheritance_level
    from user_permissions up
    join permissions p on up.permission_id = p.permission_id
    where up.user_id = :user_id
    union all
    -- 角色继承的权限
    select 
        ur.user_id,
        rp.permission_id,
        p.permission_name,
        'role:' || r.role_name as permission_source,
        1 as inheritance_level
    from user_roles ur
    join roles r on ur.role_id = r.role_id
    join role_permissions rp on r.role_id = rp.role_id
    join permissions p on rp.permission_id = p.permission_id
    where ur.user_id = :user_id
    union all
    -- 角色层级继承的权限
    select 
        up.user_id,
        rp.permission_id,
        p.permission_name,
        'inherited_role:' || pr.role_name as permission_source,
        up.inheritance_level + 1
    from user_permissions up
    join user_roles ur on up.user_id = ur.user_id
    join role_hierarchy rh on ur.role_id = rh.child_role_id
    join roles pr on rh.parent_role_id = pr.role_id
    join role_permissions rp on pr.role_id = rp.role_id
    join permissions p on rp.permission_id = p.permission_id
    where up.inheritance_level < 3  -- 限制继承深度
)
select distinct 
    permission_id,
    permission_name,
    min(inheritance_level) as min_inheritance_level,
    string_agg(distinct permission_source, ', ') as sources
from user_permissions
group by permission_id, permission_name
order by min_inheritance_level, permission_name;

分类目录管理

-- 移动分类及其所有子分类到新的父分类下
with recursive category_subtree as (
    -- 要移动的分类及其子分类
    select id, parent_id, name, 0 as level
    from categories
    where id = :category_to_move
    union all
    select c.id, c.parent_id, c.name, cs.level + 1
    from categories c
    join category_subtree cs on c.parent_id = cs.id
),
update_plan as (
    select 
        cs.id,
        case 
            when cs.level = 0 then :new_parent_id
            else cs.parent_id
        end as new_parent_id
    from category_subtree cs
)
update categories 
set parent_id = up.new_parent_id,
    updated_at = current_timestamp
from update_plan up
where categories.id = up.id;

3. 时间序列数据处理

生成时间序列并填充缺失数据

with recursive date_series as (
    select date '2024-01-01' as date_val
    union all
    select date_val + interval '1 day'
    from date_series
    where date_val < date '2024-12-31'
),
daily_sales as (
    select 
        date(order_date) as sale_date,
        sum(amount) as daily_amount,
        count(*) as daily_orders
    from orders
    where order_date >= '2024-01-01' 
      and order_date < '2025-01-01'
    group by date(order_date)
)
select 
    ds.date_val,
    coalesce(dsales.daily_amount, 0) as amount,
    coalesce(dsales.daily_orders, 0) as orders,
    -- 计算7天移动平均
    avg(coalesce(dsales.daily_amount, 0)) over (
        order by ds.date_val 
        rows between 6 preceding and current row
    ) as moving_avg_7_days
from date_series ds
left join daily_sales dsales on ds.date_val = dsales.sale_date
order by ds.date_val;

会话分析

-- 分析用户会话，定义30分钟无活动为会话结束
with recursive user_sessions as (
    select 
        user_id,
        event_time,
        event_type,
        row_number() over (partition by user_id order by event_time) as rn,
        event_time as session_start,
        1 as session_id
    from user_events
    where user_id = :user_id
      and event_time >= :start_date
    union all
    select 
        ue.user_id,
        ue.event_time,
        ue.event_type,
        us.rn + 1,
        case 
            when ue.event_time - us.event_time > interval '30 minutes'
            then ue.event_time
            else us.session_start
        end,
        case 
            when ue.event_time - us.event_time > interval '30 minutes'
            then us.session_id + 1
            else us.session_id
        end
    from user_events ue
    join user_sessions us on ue.user_id = us.user_id 
                          and ue.event_time > us.event_time
    where ue.user_id = :user_id
      and ue.event_time >= :start_date
      and us.rn = (select max(rn) from user_sessions where user_id = us.user_id)
)
select 
    session_id,
    session_start,
    max(event_time) as session_end,
    count(*) as event_count,
    max(event_time) - session_start as session_duration
from user_sessions
group by session_id, session_start
order by session_start;

最佳实践

1. 命名规范

-- ✅ 推荐：使用描述性的cte名称
with 
high_value_customers as (...),
recent_orders as (...),
product_performance as (...)
-- ❌ 避免：使用模糊的名称
with 
cte1 as (...),
temp as (...),
data as (...)

2. 结构化组织

-- ✅ 推荐：按逻辑顺序组织多个cte
with 
-- 基础数据提取
raw_sales_data as (
    select customer_id, product_id, amount, sale_date
    from sales
    where sale_date >= '2024-01-01'
),
-- 数据聚合
customer_totals as (
    select customer_id, sum(amount) as total_spent
    from raw_sales_data
    group by customer_id
),
-- 分类标记
customer_segments as (
    select 
        customer_id,
        total_spent,
        case 
            when total_spent > 10000 then 'vip'
            when total_spent > 5000 then 'premium'
            else 'standard'
        end as segment
    from customer_totals
)
-- 最终查询
select 
    c.customer_name,
    cs.total_spent,
    cs.segment
from customers c
join customer_segments cs on c.customer_id = cs.customer_id
order by cs.total_spent desc;

3. 递归cte最佳实践

始终包含终止条件

-- ✅ 推荐：明确的终止条件
with recursive hierarchy as (
    select id, parent_id, name, 0 as level
    from categories
    where parent_id is null
    union all
    select c.id, c.parent_id, c.name, h.level + 1
    from categories c
    join hierarchy h on c.parent_id = h.id
    where h.level < 10  -- 明确的深度限制
      and c.parent_id is not null  -- 防止null值问题
)
select * from hierarchy;

循环检测

-- ✅ 推荐：检测和防止循环引用
with recursive safe_hierarchy as (
    select 
        id, 
        parent_id, 
        name, 
        0 as level,
        array[id] as path
    from categories
    where parent_id is null
    union all
    select 
        c.id, 
        c.parent_id, 
        c.name, 
        sh.level + 1,
        sh.path || c.id
    from categories c
    join safe_hierarchy sh on c.parent_id = sh.id
    where sh.level < 20
      and not (c.id = any(sh.path))  -- 防止循环
)
select id, name, level, array_to_string(path, ' -> ') as path
from safe_hierarchy;

4. 性能优化最佳实践

合理使用索引

-- 为递归查询创建合适的索引
create index idx_categories_parent_id on categories(parent_id);
create index idx_categories_id_parent_id on categories(id, parent_id);
-- 复合索引用于复杂递归查询
create index idx_employees_manager_dept on employees(manager_id, department_id);

限制结果集大小

-- ✅ 推荐：在cte中尽早过滤数据
with filtered_orders as (
    select customer_id, amount, order_date
    from orders
    where order_date >= '2024-01-01'  -- 尽早过滤
      and status = 'completed'
      and amount > 0
),
customer_stats as (
    select 
        customer_id,
        count(*) as order_count,
        sum(amount) as total_amount
    from filtered_orders  -- 使用已过滤的数据
    group by customer_id
)
select * from customer_stats
where order_count >= 5;  -- 进一步过滤

常见陷阱与注意事项

1. 递归cte陷阱

无限递归

-- ❌ 危险：可能导致无限递归
with recursive dangerous_recursion as (
    select 1 as n
    union all
    select n + 1 from dangerous_recursion  -- 没有终止条件！
)
select * from dangerous_recursion;
-- ✅ 安全：包含终止条件
with recursive safe_recursion as (
    select 1 as n
    union all
    select n + 1 from safe_recursion where n < 100
)
select * from safe_recursion;

循环引用数据

-- 处理可能存在循环引用的数据
-- 假设categories表中存在循环引用：a -> b -> c -> a
-- ❌ 问题：可能导致无限递归
with recursive bad_hierarchy as (
    select id, parent_id, name from categories where id = 1
    union all
    select c.id, c.parent_id, c.name
    from categories c
    join bad_hierarchy bh on c.parent_id = bh.id
)
select * from bad_hierarchy;
-- ✅ 解决：使用路径跟踪防止循环
with recursive good_hierarchy as (
    select id, parent_id, name, array[id] as path
    from categories where id = 1
    union all
    select c.id, c.parent_id, c.name, gh.path || c.id
    from categories c
    join good_hierarchy gh on c.parent_id = gh.id
    where not (c.id = any(gh.path))
)
select id, parent_id, name from good_hierarchy;

2. 性能陷阱

过度使用cte

-- ❌ 过度使用：简单查询不需要cte
with simple_cte as (
    select * from users where status = 'active'
)
select * from simple_cte where age > 18;
-- ✅ 直接查询更简单高效
select * from users 
where status = 'active' and age > 18;

大数据量递归

-- ❌ 问题：大数据量递归可能导致内存溢出
with recursive large_hierarchy as (
    select id, parent_id, name from large_table where parent_id is null
    union all
    select lt.id, lt.parent_id, lt.name
    from large_table lt
    join large_hierarchy lh on lt.parent_id = lh.id
)
select * from large_hierarchy;
-- ✅ 解决：分批处理或限制深度
with recursive controlled_hierarchy as (
    select id, parent_id, name, 0 as level from large_table where parent_id is null
    union all
    select lt.id, lt.parent_id, lt.name, ch.level + 1
    from large_table lt
    join controlled_hierarchy ch on lt.parent_id = ch.id
    where ch.level < 5  -- 限制深度
)
select * from controlled_hierarchy;

3. 数据类型陷阱

union all类型不匹配

-- ❌ 问题：数据类型不匹配
with recursive type_mismatch as (
    select 1 as id, 'root' as name  -- name是varchar
    union all
    select id + 1, id + 1 from type_mismatch where id < 5  -- name变成了integer
)
select * from type_mismatch;
-- ✅ 解决：确保类型一致
with recursive type_consistent as (
    select 1 as id, 'root' as name
    union all
    select id + 1, cast(id + 1 as varchar) from type_consistent where id < 5
)
select * from type_consistent;

4. null值处理

-- ✅ 正确处理null值
with recursive null_safe_hierarchy as (
    select id, parent_id, name, 0 as level
    from categories
    where parent_id is null  -- 明确处理null
    union all
    select c.id, c.parent_id, c.name, nsh.level + 1
    from categories c
    join null_safe_hierarchy nsh on c.parent_id = nsh.id
    where c.parent_id is not null  -- 防止null值问题
      and nsh.level < 10
)
select * from null_safe_hierarchy;

高级用法

1. cte与窗口函数结合

-- 计算每个产品的销售趋势
with monthly_sales as (
    select 
        product_id,
        date_trunc('month', order_date) as month,
        sum(amount) as monthly_amount
    from orders
    where order_date >= '2024-01-01'
    group by product_id, date_trunc('month', order_date)
),
sales_with_trends as (
    select 
        product_id,
        month,
        monthly_amount,
        lag(monthly_amount) over (partition by product_id order by month) as prev_month_amount,
        avg(monthly_amount) over (
            partition by product_id 
            order by month 
            rows between 2 preceding and current row
        ) as moving_avg_3_months
    from monthly_sales
)
select 
    p.product_name,
    swt.month,
    swt.monthly_amount,
    swt.moving_avg_3_months,
    case 
        when swt.prev_month_amount is null then 'n/a'
        when swt.monthly_amount > swt.prev_month_amount then 'increasing'
        when swt.monthly_amount < swt.prev_month_amount then 'decreasing'
        else 'stable'
    end as trend
from sales_with_trends swt
join products p on swt.product_id = p.product_id
order by p.product_name, swt.month;

2. 递归cte生成复杂序列

生成斐波那契数列

with recursive fibonacci as (
    select 
        1 as n,
        0::bigint as fib_current,
        1::bigint as fib_next
    union all
    select 
        n + 1,
        fib_next,
        fib_current + fib_next
    from fibonacci
    where n < 50 and fib_next < 9223372036854775807  -- 防止溢出
)
select n, fib_current as fibonacci_number
from fibonacci;

生成工作日序列

with recursive business_days as (
    select date '2024-01-01' as business_date
    where extract(dow from date '2024-01-01') between 1 and 5
    union all
    select 
        case 
            when extract(dow from business_date + 1) = 6 then business_date + 3  -- 跳过周末
            when extract(dow from business_date + 1) = 0 then business_date + 2
            else business_date + 1
        end
    from business_days
    where business_date < date '2024-12-31'
),
business_days_with_holidays as (
    select bd.business_date
    from business_days bd
    left join holidays h on bd.business_date = h.holiday_date
    where h.holiday_date is null  -- 排除节假日
)
select business_date from business_days_with_holidays order by business_date;

3. cte用于数据清洗和转换

-- 复杂的数据清洗流程
with 
-- 第一步：基础数据清洗
cleaned_raw_data as (
    select 
        customer_id,
        trim(upper(customer_name)) as customer_name,
        case 
            when email ~* '^[a-za-z0-9._%+-]+@[a-za-z0-9.-]+\.[a-za-z]{2,}$' 
            then lower(email)
            else null
        end as email,
        case 
            when phone ~ '^\d{10,15}$' then phone
            else regexp_replace(phone, '[^\d]', '', 'g')
        end as phone
    from raw_customer_data
    where customer_name is not null
),
-- 第二步：去重处理
deduplicated_data as (
    select distinct on (customer_name, email)
        customer_id,
        customer_name,
        email,
        phone,
        row_number() over (partition by customer_name, email order by customer_id) as rn
    from cleaned_raw_data
    where email is not null
),
-- 第三步：数据验证
validated_data as (
    select 
        customer_id,
        customer_name,
        email,
        phone,
        case 
            when length(customer_name) < 2 then 'invalid name'
            when email is null then 'invalid email'
            when length(phone) < 10 then 'invalid phone'
            else 'valid'
        end as validation_status
    from deduplicated_data
    where rn = 1
)
-- 最终结果
select 
    customer_id,
    customer_name,
    email,
    phone,
    validation_status
from validated_data
where validation_status = 'valid';

4. 递归cte处理图结构

查找图中的所有路径

-- 在有向图中查找从起点到终点的所有路径
with recursive all_paths as (
    -- 起始节点
    select 
        start_node,
        end_node,
        array[start_node, end_node] as path,
        1 as path_length
    from graph_edges
    where start_node = :start_point
    union all
    -- 扩展路径
    select 
        ap.start_node,
        ge.end_node,
        ap.path || ge.end_node,
        ap.path_length + 1
    from all_paths ap
    join graph_edges ge on ap.end_node = ge.start_node
    where not (ge.end_node = any(ap.path))  -- 避免循环
      and ap.path_length < 10  -- 限制路径长度
)
select 
    start_node,
    end_node,
    path,
    path_length
from all_paths
where end_node = :end_point  -- 过滤到目标节点的路径
order by path_length, path;