Table of contents
smaller size
Acid properties (insert update delete)
Columnar storage for fast retrieval
Tables used
select * from orc_table; -- stored as ORC format table
select * from stage_pwb_table; -- stored as text format table
Create ORC table
create table orc_table (
pid INT,
pname STRING,
drug STRING,
gender STRING,
tot_amt INT,
country STRING
) row format delimited fields terminated by ',' stored as orc;
Insert data form stage_pwb_table
table
insert into table orc_table select * from stage_pwb_table;
stage_pwb_table
create table stage_pwb_table (
pid INT,
pname STRING,
drug STRING,
gender STRING,
tot_amt INT,
country STRING
) row format delimited fields terminated by ',' stored as textfile;
-- load data to the stage table
load data local inpath '/home/navin/Documents/data1.txt'
into table stage_pwb_table;
Time difference
select count(*) from orc_table where country='ind';
select count(*) from stage_pwb_table where country='ind';
here, I have used a very small data set. So that couldn't able to see the big difference
Reference:
https://www.youtube.com/watch?v=Mw5GynxCWoQ&t=5s
ย