04 Hive Class 10 Nov 2019

Date:-10-Nov-2019
-------------------
Today's Agenda
-----------------------
00)Hive partitions overview
0)insert static partitiones
1)Dynamic partitions
2)Subpartitins
3)BUcketing
4)Partitions with bucketing
5)Bucketing logic for INT
6)Bucketing logic for Date
7) Bucketing logic for String
Hive partitions overview

--------------------------
create table txns_source_000(txnno INT, txndate STRING, custno INT, amount
DOUBLE,category STRING, product STRING, city STRING, state STRING, spendby
STRING,country string) row format delimited fields terminated by ',' lines
terminated by '\n' stored as textfile location '/user/cloudera/source_000';
load data local inpath '/home/cloudera/hive_directory/000000_0.txt' into table

txns_source_000;
0)insert static partitiones

--------------------------------
insert into table txnrecords_part_02 partition(country='IND') select txnno ,
txndate , custno , amount ,category , product , city , state , spendby from
txnrecords_part_01 where country ='IND';
1)Dynamic partitions
=====================
Dynamic spliting will take last column in the insert statement .
insert into table txnrecords_part_02 partition(country ) select txnno , txndate ,

custno , amount ,category , product , city , state ,country, spendby from
txnrecords_part_01 ;
Note ;-
----------
last column of the select query in the insert statement will become partition.
class assignment
-----------
create a directory in hdfs --- /user/cloudera/zeyo_dynamic_dir
Create a partitioned table on top of that directory -- zeyo_dyn_table
Insert the data into partitioned table with partition specified dynamically
create table zeyo_dyn_table(txnno INT, txndate STRING, custno INT, amount

DOUBLE,category STRING, product STRING, city STRING, state STRING, spendby STRING)
partitioned by (country string) row format delimited fields terminated by ',' lines
terminated by '\n' stored as textfile location '/user/cloudera/zeyo_dynamic_dir/';
insert into table zeyo_dyn_table partition(country ) select txnno , txndate ,
custno , amount ,category , product , city , state ,country, spendby from
txns_source_000;
got below error

----------------
FAILED: SemanticException [Error 10096]: Dynamic partition strict mode requires at
least one static partition column. To turn this off set
hive.exec.dynamic.partition.mode=nonstrict
hive> set hive.exec.dynamic.partition.mode= true;
create table zeyo_dyn_table_country(txnno INT, txndate STRING, custno INT, amount

DOUBLE,category STRING, product STRING, city STRING, state STRING, spendby STRING)
partitioned by (country string) row format delimited fields terminated by ',' lines
terminated by '\n' stored as textfile location
'/user/cloudera/zeyo_dynamic_dir_country/';
insert into table zeyo_dyn_table_country partition(country ) select txnno , txndate

, custno , amount ,category , product , city , state ,spendby ,country from
txns_source_000;
Note:-
3 country files created in the path /user/cloudera/zeyo_dynamic_dir_country/
2)sub partitined
---------------
Class_assignment
-------------------
create a directory in hdfs ---

/user/cloudera/zeyo_dynamic_dir_country/Dynamic_sub/
Create a partitioned table on top of that directory --

zeyo_dyn_table_country_dynamic_sub
create table zeyo_dyn_table_country_dynamic_sub(txnno INT, txndate STRING, custno

INT, amount DOUBLE,category STRING, product STRING, city STRING, state STRING)
partitioned by ( spendby STRING,country string) row format delimited fields
terminated by ',' lines terminated by '\n' stored as textfile location
'/user/cloudera/zeyo_dynamic_dir_country/Dynamic_sub/';
insert into table zeyo_dyn_table_country_dynamic_sub partition(spendby,country )

select txnno , txndate , custno , amount ,category , product , city , state
,spendby ,country from txns_source_000;
Note :-for each spendby column,3 files got created with country wise in the hdfs
directory
3)Bucketing (Important in interwies)(Creating a file )

--------------------------------------
>>Partioning is creating direcorys
>>Bucketing is Creating a file
>>If cordinality(more random category like first 3 columnms in the txns data) is
more in the data so better go for bucketing
>>If cordinality(like spendby and country columns in txns text file) is less then
go for partitioning.
>>>
>>no of buckets equals to no of reducers

>> it works on madulooooo means no of lines /buckets (remainder) all 1 remainder
will go in 1 bucket all 2 remainder will go in 2 bucket ..etc
create table txns_bucket (txnno INT, txndate STRING, custno INT ) clustered by
( txnno) into 10 buckets row format delimited fields terminated by ',' lines
terminated by '\n' stored as textfile location '/user/cloudera/txsn_bucket';
insert into table txns_bucket select txnno , txndate , custno from

txns_source_000;
set hive.enforce.bucketing = true;

04 Hive Class 10 Nov 2019

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

04 Hive Class 10 Nov 2019

Uploaded by

Copyright:

Available Formats

Date:-10-Nov-2019

Hive partitions overview

load data local inpath '/home/cloudera/hive_directory/000000_0.txt' into table

0)insert static partitiones

Dynamic spliting will take last column in the insert statement .

insert into table txnrecords_part_02 partition(country ) select txnno , txndate ,

create table zeyo_dyn_table(txnno INT, txndate STRING, custno INT, amount

got below error

create table zeyo_dyn_table_country(txnno INT, txndate STRING, custno INT, amount

insert into table zeyo_dyn_table_country partition(country ) select txnno , txndate

create a directory in hdfs ---

Create a partitioned table on top of that directory --

create table zeyo_dyn_table_country_dynamic_sub(txnno INT, txndate STRING, custno

insert into table zeyo_dyn_table_country_dynamic_sub partition(spendby,country )

3)Bucketing (Important in interwies)(Creating a file )

>>no of buckets equals to no of reducers

insert into table txns_bucket select txnno , txndate , custno from

set hive.enforce.bucketing = true;

You might also like