Combining multiple hive tables into one
INSERT OVERWRITE TABLE FINAL_TBL
SELECT main_union.id, main_union.output_string
FROM (
select id, concat_ws("|",col1,col2,col3) output_string from table1
UNION ALL
select id, concat_ws("|",col1,col2,col3) output_string from table2
INSERT OVERWRITE TABLE FINAL_TBL
SELECT main_union.id, main_union.output_string
FROM (
select id, concat_ws("|",col1,col2,col3) output_string from table1
UNION ALL
select id, concat_ws("|",col1,col2,col3) output_string from table2
UNION ALL
select id, concat_ws("|",col1,col2,col3) output_string from table3
UNION ALL
select id, concat_ws("|",col1,col2,col3) output_string from table4
) main_union
CLUSTER BY main_union.id;
Solution in hive instead of exists clause
for e.g
Employee = individual works at institute, it has many departments.
joe works at institute1 - dept1
joe works at institute2 - dept3
jason works at institute1 - dept1
jason works at institute1 - dept2
UNION ALL
select id, concat_ws("|",col1,col2,col3) output_string from table4
) main_union
CLUSTER BY main_union.id;
Solution in hive instead of exists clause
for e.g
Employee = individual works at institute, it has many departments.
joe works at institute1 - dept1
joe works at institute2 - dept3
jason works at institute1 - dept1
jason works at institute1 - dept2
Query:
Select individual from individual ind join ind_related ir on ind.id = ir.id
join institute ins on ir.relatedkey = inst.instituteid
left semi join
dept d on d.deptid=ir.relatedkey
No comments:
Post a Comment