DataBaser.Net: CASEWHEN순서

case when 문잘을 다음과 같이 쓴다.

case 
	when <= 10 then '1 ~ 10'
	when <= 20 then '11 ~ 20'
	else '21 ~
end

그냥 이렇게 써도 된다. 퍼포먼스 문제가 거의 없다.
만약 하드웨어가 꾸지고, 조금이라도 성능을 올려보고자 한다면 데이터의 분포를 고려해야 한다.
예를 들어,

drop table #temp
create table #temp(num int)
go

insert #temp 
select top 10000000 1
from master..spt_values a
    cross join master..spt_values b
    cross join master..spt_values c
    cross join master..spt_values d
    cross join master..spt_values e
go
insert #temp 
select top 5000000 2
from master..spt_values a
    cross join master..spt_values b
    cross join master..spt_values c
    cross join master..spt_values d
    cross join master..spt_values e
go
insert #temp 
select top 1000000 3
from master..spt_values a
    cross join master..spt_values b
    cross join master..spt_values c
    cross join master..spt_values d
    cross join master..spt_values e
go

select
    num
,   count(*) cnt
from #temp
group by
    num
order by 1

select 결과

num         cnt
----------- -----------
1           10000000
2           5000000
3           1000000

분포를 고려해서 조건을 재배치 하면 약간의 성능 향상을 기대할 수 있다.
아래는 조건을 변경했을 때의 100회씩 실행했다.

--drop table #rs
create table #rs(
    grp char(1)
,   cnt int
)

--drop table #stat
create table #stat(method int, exec_time int)


set nocount on
set statistics io off
set statistics time off

--방법1
declare 
    @i int = 1
,   @begin_dt datetime
,   @end_dt datetime

truncate table #rs;
while(@i <= 100)
begin 
    set @begin_dt = getdate()

    insert #rs
    select
        case 
            when num = 1 then 'a'
            when num = 2 then 'b'
            else 'c'
        end
    ,   count(*) cnt
    from #temp
    group by
        case 
            when num = 1 then 'a'
            when num = 2 then 'b'
            else 'c'
        end   
    set @end_dt = getdate()

    insert #stat
    select 1, datediff(ms, @begin_dt, @end_dt)

    set @i += 1
end
go    


--방법2
declare 
    @i int = 1
,   @begin_dt datetime
,   @end_dt datetime

truncate table #rs;
while(@i <= 100)
begin 
    set @begin_dt = getdate()

    insert #rs
    select
        case 
            when num = 3 then 'c'
            when num = 2 then 'b'
            else 'a'
        end
    ,   count(*) cnt
    from #temp
    group by
        case 
            when num = 3 then 'c'
            when num = 2 then 'b'
            else 'a'
        end   
    
    set @end_dt = getdate()

    insert #stat
    select 2, datediff(ms, @begin_dt, @end_dt)

    set @i += 1
end
go

측정을 해보면..

select
    method 방법
,   avg(exec_time) 평균
,   stdev(exec_time) 표준편차
from #stat
group by
    method

방법2가 조금 더 실행시간이 긴 것을 볼 수 있다.

방법          평균          표준편차
----------- ----------- ----------------------
1           507         143.08132144153
2           521         123.3129123143

num=1에 해당되는 행은 1천만 건이고, num=3은 1백만 건이다. 그러므로 아래와 같이 하면 첫 번째 when num = 1 then 'a'에 대부분 걸리게 된다.

        case 
            when num = 1 then 'a' --------(1)
            when num = 2 then 'b'
            else 'c'              --------(3)
        end

하지만, 아래와 같이하면 num=1에 해당되는 1천만 건의 조건 평가가 else까지 내려가야 한다. 즉, if를 몇 번 태우느냐의 차이다.

        case 
            when num = 3 then 'c'
            when num = 2 then 'b'
            else 'a'
        end

Retrieved from http://www.databaser.net/moniwiki/wiki.php/CASEWHEN순서
last modified 2018-04-13 23:12:53