Professional Documents
Culture Documents
Hive Cammand
Hive Cammand
-----------------------------
CREATE DATABASE IF NOT EXISTS deepak;
CREATE TABLE IF NOT EXISTS student1 ( name string, id int , course string, year int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
CREATE TABLE IF NOT EXISTS student2 ( name string, id int , course string, year int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
CREATE EXTERNAL TABLE IF NOT EXISTS student3 ( name string, id int , course string,
year int )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
CREATE TEMPORARY TABLE IF NOT EXISTS student4 ( name string, id int , course
string, year int )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
INSERT INTO TABLE employee VALUES (1, 'mca', ‘1’), (2, 'mca', ‘1’), (3, 'mca',
‘2’), (4, 'mca', ‘2’);
set ;
set hive.cli.print.current.db=true;
set hive.cli.print.header=true;
INSERT INTO TABLE student VALUES ('arun', 1, 'mca', 1), ('anil', 2, 'mca', 1),
('sudheer', 3, 'mca', 2), ('santosh', 4, 'mca', 2);
sample data:
--------------------
venkat#math,phy,chem#math:40,phy:23,chem:60#kp,500004,pune#0,75.6
kumar#math,phy,chem#math:50,phy:20,chem:40#mundwa,500005,pune#1,78
rahul#math,phy,chem#math:30,phy:22,chem:30#hinjewadi,500003,pune#2,hi:hello
anil#math,phy,chem#math:13,phy:30,chem:60#tingre nagar,500002,pune#3,1:rank
arun#math,phy,chem#math:10,phy:20,chem:50#aundh,500001,pune#4,true
INSERT INTO TABLE student_partition PARTITION(course = 'it', year) SELECT name, id,
year FROM student WHERE course = 'it';
--------------------------------------------------------------
HOW TO WRITE BUCKET QUERIES IN HIVE
--------------------------------------------------------------
venkat 3
raj 2
appu 4
anil 8
anvith 9
sony 1
nani 6
rani 7
rajesh 10
lg 5
CREATE TABLE IF NOT EXISTS users1 ( name string, id int) CLUSTERED BY (id) INTO 4
BUCKETS;
CREATE TABLE IF NOT EXISTS users2 ( name string, id int) CLUSTERED BY (id) SORTED
BY (id DESC) INTO 5 BUCKETS;
set hive.enforce.bucketing=true;
or
FROM users
INSERT OVERWRITE TABLE users1 SELECT *
INSERT OVERWRITE TABLE users2 SELECT *;
--------------------------------------------------------------
HOW THE LOGIC OF BUCKET WORKS
--------------------------------------------------------------
x = hash_code(bucket_col) % no_of_buckets
1 % 5 => 1
2 % 5 => 2
3 % 5 => 3
4 % 5 => 4
5 % 5 => 0
6 % 5 => 1
7 % 5 => 2
8 % 5 => 3
9 % 5 => 4
10 % 5 => 0
1 % 4 => 1
2 % 4 => 2
3 % 4 => 3
4 % 4 => 0
5 % 4 => 1
6 % 4 => 2
7 % 4 => 3
8 % 4 => 0
9 % 4 => 1
10 % 4 => 2
1 % 3 => 1
2 % 3 => 2
3 % 3 => 0
4 % 3 => 1
5 % 3 => 2
6 % 3 => 0
7 % 3 => 1
8 % 3 => 2
9 % 3 => 0
10 % 3 => 1
1 % 2 => 1
2 % 2 => 0
3 % 2 => 1
4 % 2 => 0
5 % 2 => 1
6 % 2 => 0
7 % 2 => 1
8 % 2 => 0
9 % 2 => 1
10 % 2 => 0
Buckets:
----------------------------------------
1. no. of buckets is fixed
2. we can't change the bucket number later
3. we will use only for static data
Partitions:
----------------------------------------
1. no.of partition columns are fixed
2. we can't change the partition columns later
3. we can use for static / dynamic data also
4. static data (static partition) / dynamic data (dynamic partition)
--------------------------------------------------------------
HOW TO WRITE JOIN QUERIES IN HIVE
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS products ( name string, id int , price int )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
CREATE TABLE IF NOT EXISTS sales ( name string, year int , percentage double )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
--------------------------------------------------------------
REDUCE-SIDE-JOIN QUERIES IN HIVE
--------------------------------------------------------------
--------------------------------------------------------------
MAP-SIDE-JOIN QUERIES IN HIVE
--------------------------------------------------------------
SELECT /*+ MAPJOIN(sales) */ products.* , sales.* from products LEFT OUTER JOIN
sales ON products.name = sales.name;
SELECT /*+ MAPJOIN(products) */ products.* , sales.* from products RIGHT OUTER JOIN
sales ON products.name = sales.name;
SELECT /*+ MAPJOIN(sales) */ products.* , sales.* from products RIGHT OUTER JOIN
sales ON products.name = sales.name;
SELECT /*+ MAPJOIN(products) */ products.* , sales.* from products FULL OUTER JOIN
sales ON products.name = sales.name;
SELECT /*+ MAPJOIN(sales) */ products.* , sales.* from products FULL OUTER JOIN
sales ON products.name = sales.name;
Note:
------------
inner join => mapjoin(left table) & mapjoin(right table) are allowed
left outer join => mapjoin(left table) not allowed & mapjoin(right table) allowed
right outer join => mapjoin(left table) allowed & mapjoin(right table) not allowed
full outer join => mapjoin(left table) & mapjoin(right table) are not allowed
--------------------------------------------------------------
SEMI-JOIN QUERIES IN HIVE
--------------------------------------------------------------
SELECT products.* from products LEFT SEMI JOIN sales ON products.name = sales.name;
hadoop 1 1000
hive 2 500
pig 3 750
hbase 4 600
--------------------------------------------------------------
HOW TO WRITE CUSTOM UDF QUERIES IN HIVE
--------------------------------------------------------------
SHOW FUNCTIONS;
--------------------------------------------------------------
WORDCOUNT SOLUTION IN HIVE USING HIVE FUNCTIONS
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS docs(line string);
--------------------------------------------------------------
LIST JARS;
--------------------------------------------------------------
--------------------------------------------------------------
--------------------------------------------------------------
HOW TO WRITE SERDE QUERIES IN HIVE
--------------------------------------------------------------
What is a SerDe?
=> SerDe is a short name for "Serializer and Deserializer."
=> Hive uses SerDe (and FileFormat) to read and write table rows.
=> HDFS files --> InputFileFormat --> <key, value> --> Deserializer --> Row
object
=> Row object --> Serializer --> <key, value> --> OutputFileFormat --> HDFS
files
--------------------------------------------------------------
--------------------------------------------------------------
--------------------------------------------------------------
HOW TO WRITE CUSTOM SERDE QUERIES IN HIVE
--------------------------------------------------------------
--------------------------------------------------------------
--------------------------------------------------------------
Regex Serde for apache log
--------------------------------------------------------------
CREATE TABLE apache_log (
host STRING,
identity STRING,
user STRING,
time STRING,
request STRING,
status STRING,
size STRING,
referer STRING,
agent STRING
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|
[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?",
"output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s"
)
STORED AS TEXTFILE;
--------------------------------------------------------------
DIFFERENT WAYS TO STORE DATA USING HIVE QUERIES
--------------------------------------------------------------
or
--------------------------------------------------------------
or
--------------------------------------------------------------
or
CREATE TABLE IF NOT EXISTS student_rc
( name string, id int , course string, year int )
STORED AS
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat';
--------------------------------------------------------------
or
--------------------------------------------------------------
or
--------------------------------------------------------------
or
--------------------------------------------------------------
FROM STUDENT
INSERT OVERWRITE TABLE student_text SELECT *
INSERT OVERWRITE TABLE student_seq SELECT *
INSERT OVERWRITE TABLE student_rc SELECT *
INSERT OVERWRITE TABLE student_orc SELECT *
INSERT OVERWRITE TABLE student_avro SELECT *
INSERT OVERWRITE TABLE student_parquet SELECT *
;
--------------------------------------------------------------
HOW TO WRITE HIVE QUERIES USING TRANSFORM
--------------------------------------------------------------
SELECT TRANSFORM(name, id, year) USING '/bin/cat' AS (name, id, year) FROM student;
SELECT TRANSFORM(name, id, course) USING '/bin/grep cse' AS (name, id, course) FROM
student; .//grep serching the speciffic word
--------------------------------------------------------------
HOW TO WRITE ALTER QUERIES IN HIVE
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS test ( a string, b int , c int );
ALTER TABLE mytest ADD COLUMNS (c1 INT COMMENT 'c1 column', c2 STRING COMMENT 'c2
column');
hive> describe mytest;
x string
y double
c1 int c1 column
c2 string c2 column
ALTER TABLE mytest CHANGE c1 c11 STRING COMMENT 'new c11 column';
hive> describe mytest;
x string
y double
c11 string new c11 column
c2 string c2 column
ALTER TABLE mytest CHANGE c2 c21 STRING COMMENT 'new c21 column' after x;
hive> describe mytest;
x string
c21 string new c21 column
y double
c11 string new c11 column
ALTER TABLE mytest CHANGE c21 c2 STRING COMMENT 'new c2 column' FIRST;
hive> describe mytest;
c2 string new c2 column
x string
y double
c11 string new c11 column
or
--------------------------------------------------------------
DIFFERENT WAYS TO RUN HIVE QUERIES
--------------------------------------------------------------
Running HCatalog
--------------------------------------------------------------
$HIVE_HOME/hcatalog/sbin/hcat_server.sh
$HIVE_HOME/hcatalog/bin/hcat
Running WebHCat
--------------------------------------------------------------
$HIVE_HOME/hcatalog/sbin/webhcat_server.sh start
$HIVE_HOME/hcatalog/sbin/webhcat_server.sh stop
http://localhost:50111/templeton/v1
http://localhost:50111/templeton/v1/ddl/database/default/table/student?
user.name=hadoop
SHOW ROLES;
--------------------------------------------------------------
ACID functionality in HIVE
--------------------------------------------------------------
Add the below properties in "hive-site.xml" file & restart hive server
<property>
<name>hive.in.test</name>
<value>false</value>
</property>
<property>
<name>hive.support.concurrency</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>2</value>
</property>
UPDATE student_acid
SET year = 3, course = 'mech'
WHERE id = 4 ;
==================================================================
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student1.txt' INTO TABLE
student_partition PARTITION(course, year);
==================================================================
use deepak;
==================================================================
==================================================================