You are on page 1of 3

HIVE

Logging in Hive
conf/hive-log4j.properties –
bin/hive –hiveconf
hive.root.logger=DEBUG,console
hive.root.logger=WARN,DRFA
hive.log.dir=/tmp/${user.name}
hive.log.file=hive.log
log4j.threshhold=WARN

UDF & UDTF:


Show functions
describe function extended coalesce
describe function extended regexp
unix_time_stamp, ground,floor

UDTF – user defined table functions;


Array (1,2,3) => [1,2,3]
SELECT name, sub > FROM employees > LATERAL VIEW explode(subordinates) subView AS sub;
John Doe Mary Smith
John Doe Todd Jones
Mary Smith Bill King

HIVE STREAMING
SELECT TRANSFORM (a, b) USING '/bin/sed s/4/10/' AS newA, newB FROM a;

CREATE TABLE A_ORC (


customerID int, name string, age int, address string
) STORED AS ORC tblproperties (“orc.compress" = “SNAPPY”);

set hive.vectorized.execution.enabled = true;


set hive.vectorized.execution.reduce.enabled = true;

join_table:
table_reference JOIN table_factor [join_condition]
| table_reference {LEFT|RIGHT|FULL} [OUTER] JOIN table_reference join_condition
| table_reference LEFT SEMI JOIN table_reference join_condition
| table_reference CROSS JOIN table_reference [join_condition] (as of Hive 0.10)

SELECT a.key, a.val


FROM a LEFT SEMI JOIN b ON (a.key = b.key) a.key in (b.key)

set hive.optimize.bucketmapjoin = true

Predicate Pushdown Rules


The logic can be summarized by these two rules:
During Join predicates cannot be pushed past Preserved Row tables.
After Join predicates cannot be pushed past Null Supplying tables.

Partition and Bucketing:


SELECT firstname, country, state, city FROM bucketed_user TABLESAMPLE(1 PERCENT);

CREATE TABLE bucketed_user(address STRING, city VARCHAR(64),state VARCHAR(64), PARTITIONED


BY (country VARCHAR(64))
CLUSTERED BY (state) SORTED BY (city) INTO 32 BUCKETS
STORED AS SEQUENCEFILE;

 Static Partitioning—Used when the values for partition columns


are known well in advance of loading the data into a Hive table
 Dynamic Partitioning—Used when the values for partition
columns are known only during loading of the data into a Hive
table

 SET hive.exec.dynamic.partition=true;
SET hive.exec.max.dynamic.partitions=2048;
ET hive.exec.max.dynamic.partitions.pernode=256;

 Now, let’s look at a complete example demonstrating how to


create and load data using dynamic partitioning.

 Change dynamic partition mode to non-strict:

 SET hive.exec.dynamic.partition.mode=non-strict

 CREATE TABLE patents (


citing_patent      INT,
cited_patent       INT,
assignee           STRING,
companyname        STRING,
publication_date   STRING)

 PARTITIONED BY (
year  INT,
month INT,
day   INT)

CREATE TABLE page_view(viewTime INT, userid BIGINT,


     page_url STRING, referrer_url STRING,
     ip STRING COMMENT 'IP Address of the User')
 COMMENT 'This is the page view table'
 PARTITIONED BY(dt STRING, country STRING)
 CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS
 ROW FORMAT DELIMITED
   FIELDS TERMINATED BY '\001'
   COLLECTION ITEMS TERMINATED BY '\002'
   MAP KEYS TERMINATED BY '\003'
 STORED AS SEQUENCEFILE;

CREATE VIEW onion_referrers(url COMMENT 'URL of Referring page')


  COMMENT 'Referrers to The Onion website'
  AS
  SELECT DISTINCT referrer_url
  FROM page_view
  WHERE page_url='http://www.theonion.com';

ALTER INDEX index_name ON table_name [PARTITION partition_spec] REBUILD;


CREATE INDEX x ON TABLE t(j)
AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'
IN TABLE t_x;

SHOW CURRENT ROLES;


CREATE ROLE role_name;

REVOKE [ADMIN OPTION FOR] role_name [, role_name] ...


FROM principal_specification [, principal_specification] ... ;
 
principal_specification
  : USER user
  | ROLE role

You might also like