Hive Cammand

HOW TO CREATE DATABASE
-----------------------------
CREATE DATABASE IF NOT EXISTS deepak;
CREATE DATABASE IF NOT EXISTS deepak

COMMENT 'My database name is deepak'
LOCATION '/hive/deepak'
WITH DBPROPERTIES ('key1' = 'value1', 'key2' = 'value2');
DESCRIBE DATABASE deepak;
DESCRIBE DATABASE EXTENDED deepak;
HOW TO DROP DATABASE

-----------------------------
DROP DATABASE IF EXISTS deepak;
DROP DATABASE IF EXISTS deepak CASCADE;
DROP DATABASE IF EXISTS deepak RESTRICT;
HOW TO USE DATABASE

-----------------------------
USE deepak;
HOW TO ALTER DATABASE

-----------------------------
ALTER DATABASE deepak SET DBPROPERTIES ('key1' = 'value11', 'key3' = 'value3');
ALTER DATABASE deepak SET OWNER USER hadoop;
ALTER DATABASE deepak SET OWNER ROLE hadoop;
HOW TO CREATE TABLE

-----------------------------
CREATE TABLE IF NOT EXISTS student1 ( name string, id int , course string, year int
);
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
)
FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE;
CREATE EXTERNAL TABLE IF NOT EXISTS student3 ( name string, id int , course string,
year int )
STORED AS TEXTFILE;
CREATE TEMPORARY TABLE IF NOT EXISTS student4 ( name string, id int , course
string, year int )
STORED AS TEXTFILE;
HOW TO KNOW INFORMATION ABOUT TABLE

-----------------------------
DESCRIBE tbl_name;
DESCRIBE EXTENDED tbl_name;
DESCRIBE FORMATTED tbl_name;
LOAD DATA LOCAL INPATH '/home/deepak/work/hive_inputs/student.txt' OVERWRITE INTO

TABLE student1;

TABLE student2;
HOW TO LOAD DATA INTO TABLE

-----------------------------
TABLE student2;
LOAD DATA LOCAL INPATH '/home/deepak/work/hive_inputs/student.txt' INTO TABLE

student2;
hadoop fs -mkdir -p /home/deepak/work/hive_inputs
hadoop fs -put /home/deepak/work/hive_inputs/student.txt

/home/deepak/work/hive_inputs/student.txt
LOAD DATA INPATH '/home/deepak/work/hive_inputs/student.txt' OVERWRITE INTO TABLE

student2;

/home/deepak/work/hive_inputs/student.txt
LOAD DATA INPATH '/home/deepak/work/hive_inputs/student.txt' INTO TABLE student2;

HOW TO USE EXTERNAL TABLE
-----------------------------
hadoop fs -mkdir /hive/externaldata

/hive/externaldata/student.txt
CREATE EXTERNAL TABLE IF NOT EXISTS studentext1

( name string, id int , course string, year int )
STORED AS TEXTFILE;
SELECT * FROM studentext1;
DESCRIBE FORMATTED studentext1;
CREATE EXTERNAL TABLE IF NOT EXISTS studentext2

STORED AS TEXTFILE
LOCATION '/hive/externaldata';
SELECT * FROM studentext2;
DESCRIBE FORMATTED studentext2;
CREATE TABLE IF NOT EXISTS student

(
name string comment 'student name',
id int comment 'student id',
course string comment 'student course',
year int comment 'student year'
)
COMMENT 'My table name is student'
STORED AS TEXTFILE
LOCATION '/hive/deepak/student'
TBLPROPERTIES ('key1'='value1', 'key2'='value2')
;

TABLE student;
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student.txt' OVERWRITE INTO

TABLE student;
INSERT INTO TABLE employee VALUES (1, 'mca', ‘1’), (2, 'mca', ‘1’), (3, 'mca',
‘2’), (4, 'mca', ‘2’);
set ;
set hive.cli.print.current.db=true;
set hive.cli.print.header=true;
INSERT INTO TABLE student VALUES ('arun', 1, 'mca', 1), ('anil', 2, 'mca', 1),
('sudheer', 3, 'mca', 2), ('santosh', 4, 'mca', 2);
UPDATE student SET year = 3, name = 'san' WHERE id = 4 ;
HOW TO VERIFY SCHEMA IN METASTORE DATABASE

------------------------------------------
USE hive_mysql_db;
SELECT * FROM DBS;
SELECT * FROM TBLS;
SELECT * FROM DBS join TBLS on DBS.DB_ID = TBLS.DB_ID;

SELECT DBS.NAME, TBLS.TBL_NAME FROM DBS join TBLS on DBS.DB_ID = TBLS.DB_ID;
COMPLEX DATA TYPES

------------------------------------------
array_type : ARRAY < data_type >
map_type : MAP < primitive_type, data_type >
struct_type : STRUCT < col_name : data_type [COMMENT col_comment], ...>
union_type : UNIONTYPE < data_type, data_type, ... >
CREATE TABLE IF NOT EXISTS studentdata

(
name string comment 'student name',
subjects array<string> comment 'student subjects',
marks map<string,int> comment 'student marks',
address struct< loc : string comment 'student location', pincode : int comment
'student pincode', city : string comment 'student city'>,
details uniontype<double, int, array<string>, struct<a:int,b:string>, boolean>
comment 'student details'
)
COMMENT 'My table name is studentdata'
FIELDS TERMINATED BY '#'
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':'
STORED AS TEXTFILE
LOCATION '/hive/deepak/studentdata'
TBLPROPERTIES ('key1'='value1', 'key2'='value2')
;
sample data:
--------------------
venkat#math,phy,chem#math:40,phy:23,chem:60#kp,500004,pune#0,75.6
kumar#math,phy,chem#math:50,phy:20,chem:40#mundwa,500005,pune#1,78
rahul#math,phy,chem#math:30,phy:22,chem:30#hinjewadi,500003,pune#2,hi:hello
anil#math,phy,chem#math:13,phy:30,chem:60#tingre nagar,500002,pune#3,1:rank
arun#math,phy,chem#math:10,phy:20,chem:50#aundh,500001,pune#4,true
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/studentdata.txt' OVERWRITE

INTO TABLE studentdata;
SELECT * FROM studentdata;
SELECT name, subjects[0], marks['math'], address.pincode, details FROM

studentdata;
INSERT OVERWRITE LOCAL DIRECTORY '/home/deepak/work/hive_inputs/studentdata'

INSERT OVERWRITE LOCAL DIRECTORY '/home/deepak/work/hive_inputs/studentdata_1'

SELECT name, subjects[0], marks['math'], address.pincode, details FROM
studentdata;
INSERT OVERWRITE LOCAL DIRECTORY '/home/deepak/work/hive_inputs/studentdata_2'

FIELDS TERMINATED BY '#'
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':'
HOW TO CREATE PARTITION TABLE

------------------------------------------
CREATE TABLE IF NOT EXISTS student_partition ( name string , id int )
PARTITIONED BY ( course string , year int)
STORED AS TEXTFILE
;
HOW TO WRITE HIVE STATIC PARTITION QUERIES

--------------------------------------------------------------
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student_cse_1.txt' OVERWRITE
INTO TABLE student_partition PARTITION(course = 'cse', year = 1);
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student_cse_2.txt' OVERWRITE

INTO TABLE student_partition PARTITION(course = 'cse', year = 2);
HOW TO WRITE PARTITION TABLE QUERIES

--------------------------------------------------------------
SHOW PARTITIONS student_partition;
SELECT * FROM student_partition;
SELECT * FROM student_partition WHERE year = 1;

SELECT * FROM student_partition WHERE course = 'cse';
SELECT * FROM student_partition WHERE course = 'mech' and year = 1;

SELECT * FROM student_partition WHERE course = 'mech' or year = 1;
HOW TO WRITE HIVE DYNAMIC PARTITION QUERIES

--------------------------------------------------------------
set hive.exec.dynamic.partition.mode=nonstrict;
INSERT OVERWRITE TABLE student_partition PARTITION(course, year) select * from

student;
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student1.txt' INTO TABLE

student;
INSERT INTO TABLE student_partition PARTITION(course = 'it', year) SELECT name, id,
year FROM student WHERE course = 'it';
INSERT INTO TABLE student_partition PARTITION(course = 'ece', year = 3) SELECT

name, id FROM student WHERE course = 'it' AND year = 3;
INSERT OVERWRITE TABLE student_partition PARTITION(course = 'ece', year = 3) SELECT

name, id FROM student WHERE course = 'it' AND year = 3;
INSERT INTO TABLE student_partition PARTITION(course = 'mca', year = 1) VALUES

('arun', 1), ('anil', 2), ('sudheer', 3), ('santosh', 4);
BELOW HIVE QUERIES ARE NOT ALLOWED

--------------------------------------------------------------
Note:- Dynamic partition cannot be the parent of a static partition '3'

TABLE student_partition PARTITION(course = 'cse', year);

TABLE student_partition PARTITION(course, year = 1);

TABLE student_partition PARTITION(course, year);
INSERT INTO TABLE student_partition PARTITION(course , year = 3) SELECT name, id,

course FROM student WHERE year = 3;
DIFFERENT WAYS TO EXECUTE HIVE QUERIES

--------------------------------------------------------------
hive -e 'USE deepak; SELECT * FROM student;'
hive -S -e 'USE deepak; SELECT * FROM student;'
hadoop@hadoop:~$ hive -f '/home/deepak/work/hive_inputs/partition.hql';

hive> source /home/deepak/work/hive_inputs/partition.hql;
hadoop@hadoop:~$ hadoop fs -ls /

hive> dfs -ls / ;
hadoop@hadoop:~$ ls -l /home
hive> ! ls -l /home;
--------------------------------------------------------------
HOW TO WRITE BUCKET QUERIES IN HIVE
--------------------------------------------------------------
CREATE EXTERNAL TABLE IF NOT EXISTS users ( name string, id int)

STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/users.txt' OVERWRITE INTO

TABLE users;
SELECT * FROM users;
venkat 3
raj 2
appu 4
anil 8
anvith 9
sony 1
nani 6
rani 7
rajesh 10
lg 5
CREATE TABLE IF NOT EXISTS users1 ( name string, id int) CLUSTERED BY (id) INTO 4
BUCKETS;
CREATE TABLE IF NOT EXISTS users2 ( name string, id int) CLUSTERED BY (id) SORTED
BY (id DESC) INTO 5 BUCKETS;
set hive.enforce.bucketing=true;
INSERT OVERWRITE TABLE users1 SELECT * FROM users;

INSERT OVERWRITE TABLE users2 SELECT * FROM users;
or
FROM users
INSERT OVERWRITE TABLE users1 SELECT *
INSERT OVERWRITE TABLE users2 SELECT *;
--------------------------------------------------------------
HOW THE LOGIC OF BUCKET WORKS
--------------------------------------------------------------
x = hash_code(bucket_col) % no_of_buckets
bucket m out of n => if ( x + 1 == m) then print ( bucket_col )

bucket m out of n => if ( x == m - 1) then print ( bucket_col )
1 % 5 => 1
2 % 5 => 2
3 % 5 => 3
4 % 5 => 4
5 % 5 => 0
6 % 5 => 1
7 % 5 => 2
8 % 5 => 3
9 % 5 => 4
10 % 5 => 0
part_0 => 5,10 => bucket 1 out of 5

1 % 4 => 1
2 % 4 => 2
3 % 4 => 3
4 % 4 => 0
5 % 4 => 1
6 % 4 => 2
7 % 4 => 3
8 % 4 => 0
9 % 4 => 1
10 % 4 => 2

part_1 => 1,5,9 => bucket 2 out of 4
1 % 3 => 1
2 % 3 => 2
3 % 3 => 0
4 % 3 => 1
5 % 3 => 2
6 % 3 => 0
7 % 3 => 1
8 % 3 => 2
9 % 3 => 0
10 % 3 => 1

part_1 => 1,4,7,10 => bucket 2 out of 3
1 % 2 => 1
2 % 2 => 0
3 % 2 => 1
4 % 2 => 0
5 % 2 => 1
6 % 2 => 0
7 % 2 => 1
8 % 2 => 0
9 % 2 => 1
10 % 2 => 0
part_0 => 2,4,6,8,10 => bucket 1 out of 2

part_1 => 1,3,5,7,9 => bucket 2 out of 2
SELECT * FROM users1 TABLESAMPLE ( bucket 1 out of 4 on id );
anil 8
appu 4

anvith 9
nani 6
venkat 3

anil 8
appu 4
rajesh 10
nani 6
raj 2
SELECT * FROM users1 TABLESAMPLE ( bucket 1 out of 4 on rand() );
Buckets:
----------------------------------------
1. no. of buckets is fixed
2. we can't change the bucket number later
3. we will use only for static data
Partitions:
----------------------------------------
1. no.of partition columns are fixed
2. we can't change the partition columns later
3. we can use for static / dynamic data also
4. static data (static partition) / dynamic data (dynamic partition)
Hive => MapReduce

----------------------------------------
Partition => MultipleOutputs
Bucket => HashPartitioner
Note: Limitations of `HashPartitioner` is no.of partitions are fixed. we can solve

this problem using `MultipleOutputs`
--------------------------------------------------------------
HOW TO WRITE JOIN QUERIES IN HIVE
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS products ( name string, id int , price int )
STORED AS TEXTFILE;
CREATE TABLE IF NOT EXISTS sales ( name string, year int , percentage double )
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/products.txt' OVERWRITE INTO

TABLE products;
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/sales.txt' OVERWRITE INTO

TABLE sales;
select * from products;

hadoop 1 1000
hive 2 500
pig 3 750
hbase 4 600
spark 5 1500
select * from sales;

hadoop 2010 10.0
hadoop 2011 40.5
hadoop 2012 60.0
hive 2012 40.5
hive 2013 20.0
pig 2014 30.0
pig 2010 10.0
hbase 2014 70.5
mongodb 2013 50.5
--------------------------------------------------------------
REDUCE-SIDE-JOIN QUERIES IN HIVE
--------------------------------------------------------------
HOW TO WRITE INNER JOIN QUERY

--------------------------------------------------------------
SELECT products.* , sales.* from products JOIN sales ON products.name = sales.name;
hadoop 1 1000 hadoop 2010 10.0

hive 2 500 hive 2012 40.5
hive 2 500 hive 2013 20.0
pig 3 750 pig 2014 30.0
pig 3 750 pig 2010 10.0
hbase 4 600 hbase 2014 70.5
HOW TO WRITE LEFT OUTER JOIN QUERY

--------------------------------------------------------------
SELECT products.* , sales.* from products LEFT OUTER JOIN sales ON products.name =
sales.name;

hive 2 500 hive 2012 40.5
hive 2 500 hive 2013 20.0
pig 3 750 pig 2014 30.0
pig 3 750 pig 2010 10.0
hbase 4 600 hbase 2014 70.5
spark 5 1500 NULL NULL NULL
HOW TO WRITE RIGHT OUTER JOIN QUERY

--------------------------------------------------------------
SELECT products.* , sales.* from products RIGHT OUTER JOIN sales ON products.name =
sales.name;

hive 2 500 hive 2012 40.5
hive 2 500 hive 2013 20.0
pig 3 750 pig 2014 30.0
pig 3 750 pig 2010 10.0
hbase 4 600 hbase 2014 70.5
NULL NULL NULL mongodb 2013 50.5
HOW TO WRITE FULL OUTER JOIN QUERY

--------------------------------------------------------------
SELECT products.* , sales.* from products FULL OUTER JOIN sales ON products.name =
sales.name;

hbase 4 600 hbase 2014 70.5
hive 2 500 hive 2013 20.0
hive 2 500 hive 2012 40.5
NULL NULL NULL mongodb 2013 50.5
pig 3 750 pig 2010 10.0
pig 3 750 pig 2014 30.0
spark 5 1500 NULL NULL NULL
--------------------------------------------------------------
MAP-SIDE-JOIN QUERIES IN HIVE
--------------------------------------------------------------
WHICH MAP-SIDE-JOIN QUERIES ARE ALLOWED ?

--------------------------------------------------------------
SELECT /*+ MAPJOIN(sales) */ products.* , sales.* from products JOIN sales ON
products.name = sales.name;
SELECT /*+ MAPJOIN(products) */ products.* , sales.* from products JOIN sales ON

products.name = sales.name;
SELECT /*+ MAPJOIN(sales) */ products.* , sales.* from products LEFT OUTER JOIN
sales ON products.name = sales.name;
SELECT /*+ MAPJOIN(products) */ products.* , sales.* from products RIGHT OUTER JOIN
WHICH MAP-SIDE-JOIN QUERIES ARE NOT ALLOWED ?

--------------------------------------------------------------
SELECT /*+ MAPJOIN(products) */ products.* , sales.* from products LEFT OUTER JOIN
SELECT /*+ MAPJOIN(sales) */ products.* , sales.* from products RIGHT OUTER JOIN
SELECT /*+ MAPJOIN(products) */ products.* , sales.* from products FULL OUTER JOIN
SELECT /*+ MAPJOIN(sales) */ products.* , sales.* from products FULL OUTER JOIN
Note:
------------
inner join => mapjoin(left table) & mapjoin(right table) are allowed
left outer join => mapjoin(left table) not allowed & mapjoin(right table) allowed
right outer join => mapjoin(left table) allowed & mapjoin(right table) not allowed
full outer join => mapjoin(left table) & mapjoin(right table) are not allowed
--------------------------------------------------------------
SEMI-JOIN QUERIES IN HIVE
--------------------------------------------------------------
SELECT products.* from products LEFT SEMI JOIN sales ON products.name = sales.name;
hadoop 1 1000
hive 2 500
pig 3 750
hbase 4 600
--------------------------------------------------------------
HOW TO WRITE CUSTOM UDF QUERIES IN HIVE
--------------------------------------------------------------
SHOW FUNCTIONS;
DESCRIBE FUNCTION split;
DESCRIBE FUNCTION EXTENDED split;
--------------------------------------------------------------
WORDCOUNT SOLUTION IN HIVE USING HIVE FUNCTIONS
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS docs(line string);
LOAD DATA LOCAL INPATH '/home/deepak/work/hive_inputs/demoinput' OVERWRITE INTO

TABLE docs;
SELECT * FROM docs;
CREATE TABLE word_counts AS

SELECT word, count(1) as cnt FROM (SELECT explode(split(line,' ')) as word FROM
docs) w
GROUP BY word ORDER BY word;
SELECT * FROM word_counts;
--------------------------------------------------------------
ADD JAR /home/deepak/work/hive_inputs/hive-examples.jar;
LIST JARS;
DELETE JAR /home/deepak/work/hive_inputs/hive-examples.jar;
--------------------------------------------------------------
CREATE TEMPORARY FUNCTION md5sum AS

'com.deepak.deepak.hadoop.training.hive.udf.MD5SumUDF';
SELECT name, md5sum(name) FROM student;
DROP TEMPORARY FUNCTION IF EXISTS md5sum;
--------------------------------------------------------------
hadoop fs -put /home/deepak/work/hive_inputs/hive-examples.jar /hive/hive-

examples.jar
CREATE FUNCTION deepak.double_my_data AS

'com.deepak.deepak.hadoop.training.hive.udf.DoubleMyDataUDF' USING JAR
'hdfs:///hive/hive-examples.jar';
SELECT name, deepak.double_my_data(name, ":") FROM student;
DROP FUNCTION IF EXISTS deepak.double_my_data;
--------------------------------------------------------------
HOW TO WRITE SERDE QUERIES IN HIVE
--------------------------------------------------------------
What is a SerDe?
=> SerDe is a short name for "Serializer and Deserializer."
=> Hive uses SerDe (and FileFormat) to read and write table rows.
=> HDFS files --> InputFileFormat --> <key, value> --> Deserializer --> Row
object
=> Row object --> Serializer --> <key, value> --> OutputFileFormat --> HDFS
files
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS student_regex_serde_new

ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES ("input.regex" = "([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)")
;
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student.txt'

OVERWRITE INTO TABLE student_regex_serde_new;
SELECT * FROM student_regex_serde_new WHERE year = 1;
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS student_regex_serde_old

( name string, id string , course string, year string )
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
WITH SERDEPROPERTIES ("input.regex" = "([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)")
;
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student.txt'

OVERWRITE INTO TABLE student_regex_serde_old;
SELECT * FROM student_regex_serde_old WHERE year = 1;
--------------------------------------------------------------
HOW TO WRITE CUSTOM SERDE QUERIES IN HIVE
--------------------------------------------------------------
ADD JAR /home/deepak/work/hive_inputs/hive-examples.jar;
DROP TABLE student_multiply_serde;
CREATE TABLE IF NOT EXISTS student_multiply_serde

ROW FORMAT SERDE 'com.deepak.deepak.hadoop.training.hive.SerDe.ColumnMultiplySerDe'
WITH SERDEPROPERTIES ("number.multiply" = "3", "text.multiply" = "2")
;
DESCRIBE FORMATTED student_multiply_serde;
INSERT OVERWRITE TABLE student_multiply_serde SELECT * FROM student WHERE year = 1;
SELECT * FROM student WHERE year = 1;
SELECT * FROM student_multiply_serde WHERE year = 1;
--------------------------------------------------------------
DROP TABLE student_map_serde;
CREATE TABLE IF NOT EXISTS student_map_serde

( name string, id string , course string, year string )
ROW FORMAT SERDE 'com.deepak.deepak.hadoop.training.hive.SerDe.ColumnarMapSerDe'
;
INSERT OVERWRITE TABLE student_map_serde SELECT * FROM student WHERE year = 1;
SELECT * FROM student_map_serde WHERE year = 1;
--------------------------------------------------------------
Regex Serde for apache log
--------------------------------------------------------------
CREATE TABLE apache_log (
host STRING,
identity STRING,
user STRING,
time STRING,
request STRING,
status STRING,
size STRING,
referer STRING,
agent STRING
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|
[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?",
"output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s"
)
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/apache_clf.txt' OVERWRITE INTO

TABLE apache_log;
--------------------------------------------------------------
DIFFERENT WAYS TO STORE DATA USING HIVE QUERIES
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS student_text

( name string, id int , course string, year int ) STORED AS TEXTFILE;
or
CREATE TABLE IF NOT EXISTS student_text

STORED AS
INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat';
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS student_seq

( name string, id int , course string, year int ) STORED AS SEQUENCEFILE;
or
CREATE TABLE IF NOT EXISTS student_seq

STORED AS
INPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat';
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS student_rc

( name string, id int , course string, year int ) STORED AS RCFILE;
or
CREATE TABLE IF NOT EXISTS student_rc
STORED AS
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat';
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS student_orc

( name string, id int , course string, year int ) STORED AS ORC;
or
CREATE TABLE IF NOT EXISTS student_orc

ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat';
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS student_avro

( name string, id int , course string, year int ) STORED AS AVRO;
or
CREATE TABLE IF NOT EXISTS student_avro

ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat';
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS student_parquet

( name string, id int , course string, year int ) STORED AS PARQUET;
or
CREATE TABLE IF NOT EXISTS student_parquet

ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';
--------------------------------------------------------------
FROM STUDENT
INSERT OVERWRITE TABLE student_text SELECT *
INSERT OVERWRITE TABLE student_seq SELECT *
INSERT OVERWRITE TABLE student_rc SELECT *
INSERT OVERWRITE TABLE student_orc SELECT *
INSERT OVERWRITE TABLE student_avro SELECT *
INSERT OVERWRITE TABLE student_parquet SELECT *
;
--------------------------------------------------------------
HOW TO WRITE HIVE QUERIES USING TRANSFORM
--------------------------------------------------------------
SELECT TRANSFORM(name, id, year) USING '/bin/cat' AS (name, id, year) FROM student;
SELECT TRANSFORM(name, id, course) USING '/bin/grep cse' AS (name, id, course) FROM
student; .//grep serching the speciffic word
SELECT TRANSFORM(name, id, year, course) USING 'python

/home/deepak/work/hive_inputs/student.py' AS (name, id, year, course) FROM student;
--------------------------------------------------------------
HOW TO WRITE ALTER QUERIES IN HIVE
--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS test ( a string, b int , c int );
CREATE TABLE IF NOT EXISTS test1 LIKE test;
ALTER TABLE test1 RENAME TO mytest;

hive> describe mytest;
a string
b int
c int
ALTER TABLE mytest REPLACE COLUMNS ( x string, y double, a string, b int , c

float);
x string
y double
a string
b int
c float
ALTER TABLE mytest REPLACE COLUMNS (x string, y double);

x string
y double
ALTER TABLE mytest ADD COLUMNS (c1 INT COMMENT 'c1 column', c2 STRING COMMENT 'c2
column');
x string
y double
c1 int c1 column
c2 string c2 column
ALTER TABLE mytest CHANGE c1 c1 STRING COMMENT 'new c1 column';

x string
y double
c1 string new c1 column
c2 string c2 column
ALTER TABLE mytest CHANGE c1 c11 STRING COMMENT 'new c11 column';
x string
y double
c2 string c2 column
ALTER TABLE mytest CHANGE c2 c21 STRING COMMENT 'new c21 column' after x;
x string
y double
ALTER TABLE mytest CHANGE c21 c2 STRING COMMENT 'new c2 column' FIRST;
x string
y double
ALTER TABLE mytest SET TBLPROPERTIES ('comment' = 'add comment');
ALTER TABLE mytest SET TBLPROPERTIES ('comment' = 'new comment');
ALTER TABLE mytest SET TBLPROPERTIES ('key1' = 'value1', 'key2' = 'value2');
ALTER TABLE mytest SET SERDEPROPERTIES ('field.delim' = ',');
ALTER TABLE mytest SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' WITH

SERDEPROPERTIES ('field.delim' = ':');
ALTER TABLE mytest ADD PARTITION (partCol = 'value1') location 'loc1';

ALTER TABLE mytest ADD PARTITION (partCol = 'value2') location 'loc2';
or
ALTER TABLE mytest ADD

PARTITION (partCol = 'value1') location 'loc1'
PARTITION (partCol = 'value2') location 'loc2';
--------------------------------------------------------------
DIFFERENT WAYS TO RUN HIVE QUERIES
--------------------------------------------------------------
Running Hive CLI

--------------------------------------------------------------
$HIVE_HOME/bin/hive
//diff beel and hive beeli use in cluster jdbc connectivity
Running HiveServer2 and Beeline

--------------------------------------------------------------
$HIVE_HOME/bin/hiveserver2
$HIVE_HOME/bin/beeline -u jdbc:hive2://localhost:10000/default -n deepak -d
org.apache.hive.jdbc.HiveDriver
Go for this more info:

https://cwiki.apache.org/confluence/display/Hive/HiveServer2+Clients
Running HCatalog
--------------------------------------------------------------
$HIVE_HOME/hcatalog/sbin/hcat_server.sh
$HIVE_HOME/hcatalog/bin/hcat
Running WebHCat
--------------------------------------------------------------
$HIVE_HOME/hcatalog/sbin/webhcat_server.sh start
$HIVE_HOME/hcatalog/sbin/webhcat_server.sh stop
http://localhost:50111/templeton/v1
http://localhost:50111/templeton/v1/ddl/database/default/table/student?
user.name=hadoop
Role Management Commands //authoriazation

--------------------------------------------------------------
CREATE ROLE deepak;
DROP ROLE deepak;
SHOW ROLES;
SHOW CURRENT ROLES;
--------------------------------------------------------------
ACID functionality in HIVE
--------------------------------------------------------------
Add the below properties in "hive-site.xml" file & restart hive server
<property>
<name>hive.in.test</name>
<value>false</value>
</property>
<property>
<name>hive.support.concurrency</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>2</value>
</property>
CREATE HIVE TABLE WITH CLUSTERED BY, ORC, TBLPROPERTIES

--------------------------------------------------------------
CREATE TABLE IF NOT EXISTS student_acid
( name string, id int, course string, year int )
CLUSTERED BY (name) INTO 4 BUCKETS
STORED AS ORC
LOCATION '/hive/deepak/student_acid'
TBLPROPERTIES ('transactional' = 'true');
INSERT INTO TABLE student_acid VALUES

('arun', 1, 'mca', 1),
('anil', 2, 'mca', 1),
('sudheer', 3, 'mca', 2),
('santosh', 4, 'mca', 2)
;
UPDATE student_acid
SET year = 3, course = 'mech'
WHERE id = 4 ;
DELETE FROM student_acid WHERE name = 'anil';
==================================================================
LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student1.txt' INTO TABLE
student_partition PARTITION(course, year);
CREATE TABLE IF NOT EXISTS student_partition_test ( name string , id int )

PARTITIONED BY ( course string , year int)
STORED AS TEXTFILE
LOCATION '/hive/deepak/student_partition_test'
;
ALTER TABLE student_partition_test ADD PARTITION (course = 'cse' , year = 1)

location '/hive/deepak/student_partition_test/course=cse/year=1';
==================================================================
use deepak;
add jar /home/deepak/work/deepak-pdf-utils.jar;
drop table student_pdf;
CREATE TABLE IF NOT EXISTS student_pdf

ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES ("field.delim" = " ")
STORED AS
INPUTFORMAT 'com.deepak.deepak.hadoop.training.pdf.mapred.deepakPdfInputFormat'
OUTPUTFORMAT
'com.deepak.deepak.hadoop.training.pdf.mapred.deepakHivePdfOutputFormat'
;
LOAD DATA LOCAL INPATH '/home/deepak/work/input/student.pdf' OVERWRITE INTO TABLE

student_pdf;
select * from student_pdf;
INSERT OVERWRITE LOCAL DIRECTORY '/home/deepak/work/output/hive-op'

FIELDS TERMINATED BY ','
STORED AS
INPUTFORMAT 'com.deepak.deepak.hadoop.training.pdf.mapred.deepakPdfInputFormat'
OUTPUTFORMAT
'com.deepak.deepak.hadoop.training.pdf.mapred.deepakHivePdfOutputFormat'
SELECT * FROM student_pdf;
INSERT OVERWRITE LOCAL DIRECTORY '/home/deepak/work/output/hive-op-1'

FIELDS TERMINATED BY ':'
SELECT * FROM student_pdf;
==================================================================
hadoop jar /home/deepak/work/deepak-pdf-utils.jar

com.deepak.deepak.hadoop.training.pdf.utils.deepakPdfWordCountJob
file:///home/deepak/work/input/student.pdf file:///home/deepak/work/output/mr-op
hadoop jar /home/deepak/work/deepak-pdf-utils.jar

com.deepak.deepak.hadoop.training.pdf.utils.deepakPdfWordCountJob
-Dmapred.pdfoutputformat.separator=*** file:///home/deepak/work/input/student.pdf
file:///home/deepak/work/output/mr-op-1
==================================================================

Hive Cammand

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Hive Cammand

Uploaded by

Copyright:

Available Formats

HOW TO CREATE DATABASE

CREATE DATABASE IF NOT EXISTS deepak

DESCRIBE DATABASE deepak;

DESCRIBE DATABASE EXTENDED deepak;

HOW TO DROP DATABASE

DROP DATABASE IF EXISTS deepak CASCADE;

DROP DATABASE IF EXISTS deepak RESTRICT;

HOW TO USE DATABASE

HOW TO ALTER DATABASE

ALTER DATABASE deepak SET OWNER USER hadoop;

ALTER DATABASE deepak SET OWNER ROLE hadoop;

HOW TO CREATE TABLE

HOW TO KNOW INFORMATION ABOUT TABLE

DESCRIBE EXTENDED tbl_name;

DESCRIBE FORMATTED tbl_name;

LOAD DATA LOCAL INPATH '/home/deepak/work/hive_inputs/student.txt' OVERWRITE INTO

LOAD DATA LOCAL INPATH '/home/deepak/work/hive_inputs/student.txt' OVERWRITE INTO

HOW TO LOAD DATA INTO TABLE

LOAD DATA LOCAL INPATH '/home/deepak/work/hive_inputs/student.txt' INTO TABLE

hadoop fs -mkdir -p /home/deepak/work/hive_inputs

hadoop fs -put /home/deepak/work/hive_inputs/student.txt

LOAD DATA INPATH '/home/deepak/work/hive_inputs/student.txt' OVERWRITE INTO TABLE

hadoop fs -put /home/deepak/work/hive_inputs/student.txt

LOAD DATA INPATH '/home/deepak/work/hive_inputs/student.txt' INTO TABLE student2;

hadoop fs -put /home/deepak/work/hive_inputs/student.txt

CREATE EXTERNAL TABLE IF NOT EXISTS studentext1

SELECT * FROM studentext1;

DESCRIBE FORMATTED studentext1;

CREATE EXTERNAL TABLE IF NOT EXISTS studentext2

SELECT * FROM studentext2;

DESCRIBE FORMATTED studentext2;

CREATE TABLE IF NOT EXISTS student

LOAD DATA LOCAL INPATH '/home/deepak/work/hive_inputs/student.txt' OVERWRITE INTO

LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student.txt' OVERWRITE INTO

UPDATE student SET year = 3, name = 'san' WHERE id = 4 ;

HOW TO VERIFY SCHEMA IN METASTORE DATABASE

SELECT * FROM DBS join TBLS on DBS.DB_ID = TBLS.DB_ID;

COMPLEX DATA TYPES

CREATE TABLE IF NOT EXISTS studentdata

LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/studentdata.txt' OVERWRITE

SELECT * FROM studentdata;

SELECT name, subjects[0], marks['math'], address.pincode, details FROM

INSERT OVERWRITE LOCAL DIRECTORY '/home/deepak/work/hive_inputs/studentdata'

INSERT OVERWRITE LOCAL DIRECTORY '/home/deepak/work/hive_inputs/studentdata_1'

INSERT OVERWRITE LOCAL DIRECTORY '/home/deepak/work/hive_inputs/studentdata_2'

HOW TO CREATE PARTITION TABLE

HOW TO WRITE HIVE STATIC PARTITION QUERIES

LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student_cse_2.txt' OVERWRITE

HOW TO WRITE PARTITION TABLE QUERIES

SELECT * FROM student_partition;

SELECT * FROM student_partition WHERE year = 1;

SELECT * FROM student_partition WHERE course = 'mech' and year = 1;

HOW TO WRITE HIVE DYNAMIC PARTITION QUERIES

INSERT OVERWRITE TABLE student_partition PARTITION(course, year) select * from

LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student1.txt' INTO TABLE

INSERT INTO TABLE student_partition PARTITION(course = 'ece', year = 3) SELECT

INSERT OVERWRITE TABLE student_partition PARTITION(course = 'ece', year = 3) SELECT

INSERT INTO TABLE student_partition PARTITION(course = 'mca', year = 1) VALUES

BELOW HIVE QUERIES ARE NOT ALLOWED

LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student.txt' OVERWRITE INTO

LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student.txt' OVERWRITE INTO

LOAD DATA LOCAL INPATH '${env:HOME}/work/hive_inputs/student.txt' OVERWRITE INTO

INSERT INTO TABLE student_partition PARTITION(course , year = 3) SELECT name, id,

DIFFERENT WAYS TO EXECUTE HIVE QUERIES

SELECT /+ MAPJOIN(products) / products.* , sales.* from products JOIN sales ON