99人参与 • 2024-11-25 • 大数据
select label,sum(cnt) as all from ( select rd,label,sum(1) as cnt from ( select id,label,round(rand(),2) as rd,value from tmp1 ) as tmp group by rd,label ) as tmp group by label;
select split(new_source,'\\_')[0] as source ,sum(cnt) as cnt from (select concat(source,'_', rand()*100) as new_source ,count(1) as cnt from test_table where day ='2022-01-01' group by concat(source,'_', rand()*100) )tt group by split(new_source,'\\_')[0]
select label,sum(value) as all from ( select rd,label,sum(value) as cnt from ( select tmp1.rd as rd,tmp1.label as label,tmp1.value*tmp2.value as value from ( select id,round(rand(),1) as rd,label,value from tmp1 ) as tmp1 join ( select id,rd,label,value from tmp2 lateral view explode(split('0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9',',')) mytable as rd ) as tmp2 on tmp1.rd = tmp2.rd and tmp1.label = tmp2.label ) as tmp1 group by rd,label ) as tmp1 group by label;
select source ,source_name ,sum(cnt) as cnt from (select t1.source ,new_source ,nvl(source_name,'未知') as source_name ,count(imei) as cnt from (select imei ,source ,concat(cast(rand()*10 as int ),'_',source ) as new_source from test_table_1 where day ='2022-01-01' ) t1 inner join ( select source_name ,concat(preflix,'_',source) as new_source from test_table_1 where day ='2022-01-01' lateral view explode(split('0,1,2,3,4,5,6,7,8,9,10',','))b as preflix ) t2 on t1.new_source =t2.new_source group by t1.source ,new_source ,nvl(source_name,'未知') ) tta group by source ,source_name
##优化前: create table test.tmp_table_test_all as select imei ,lable_id ,nvl(label_name,'未知') from tmp_table_1 t1 left join (select lable_id ,label_name from tmp_table_2 where day ='2024-01-01') t2 on t1.lable_id =t2.lable_id where t1.day ='2024-01-01' ; ## 优化后 : create table test.tmp_table_test_all_new as with tmp_table_test_1 as (select lable_id ,count(1) as cnt from tmp_table_1 t1 tablesample(5 percent) --抽样取5%的数据,减少table scan的量 group by lable_id order by cnt desc limit 100 ) select imei ,lable_id ,nvl(label_name,'未知') as label_name from tmp_table_1 t1 left join tmp_table_test_1 t2 on t1.lable_id =t2.lable_id left join (select lable_id ,label_name from tmp_table_2 where day ='2024-01-01') t3 on t1.lable_id =t3.lable_id where t1.day ='2024-01-01' and t2.lable_id is null union all select imei ,lable_id ,nvl(label_name,'未知') as label_name from tmp_table_1 t1 inner join (select lable_id from tmp_table_test_1 t1 left join tmp_table_2 t2 on t1.lable_id =t2.lable_id where t2.day ='2024-01-01') t3 on t1.lable_id =t3.lable_id where t1.day ='2024-01-01' ;
版权声明:本文内容由互联网用户贡献,该文观点仅代表作者本人。本站仅提供信息存储服务,不拥有所有权,不承担相关法律责任。 如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至 2386932994@qq.com 举报,一经查实将立刻删除。