You are on page 1of 7

Put CSV files onto HDFS

------------------------
$ hdfs dfs -put tab1.csv /user/cloudera/sample_data/tab1
$ hdfs dfs -put tab2.csv /user/cloudera/sample_data/tab2

Create tables
--------------------
DROP TABLE IF EXISTS tab1;
-- The EXTERNAL clause means the data is located outside the central location
-- for Impala data files and is preserved when the associated Impala table is
dropped.
-- We expect the data to already exist in the directory specified by the LOCATION
clause.
CREATE EXTERNAL TABLE tab1
(
id INT,
col_1 BOOLEAN,
col_2 DOUBLE,
col_3 TIMESTAMP
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/user/cloudera/sample_data/tab1';

DROP TABLE IF EXISTS tab2;


-- TAB2 is an external table, similar to TAB1.
CREATE EXTERNAL TABLE tab2
(
id INT,
col_1 BOOLEAN,
col_2 DOUBLE
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/user/cloudera/sample_data/tab2';

DROP TABLE IF EXISTS tab3;


-- Leaving out the EXTERNAL clause means the data will be managed
-- in the central Impala data directory tree. Rather than reading
-- existing data files when the table is created, we load the
-- data after creating the table.
CREATE TABLE tab3
(
id INT,
col_1 BOOLEAN,
col_2 DOUBLE,
month INT,
day INT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

Load data to an internal table


--------------------------------
LOAD DATA INPATH 'filepath' into table tab3;
Describe table
----------------
Show examples on terminal.

Execute the script files


--------------------------
impala-shell -i localhost -f customer_table_script.sql

Example: Examining Contents of Tables


---------------------------------------
SELECT * FROM tab1;
SELECT * FROM tab2;
SELECT * FROM tab2 LIMIT 5;

Example: Aggregate and Join


-------------------------------
SELECT tab1.col_1, MAX(tab2.col_2), MIN(tab2.col_2)
FROM tab2 JOIN tab1 USING (id)
GROUP BY col_1 ORDER BY 1 LIMIT 5;

Example: Subquery, Aggregate and Joins


----------------------------------------
SELECT tab2.*
FROM tab2,
(SELECT tab1.col_1, MAX(tab2.col_2) AS max_col2
FROM tab2, tab1
WHERE tab1.id = tab2.id
GROUP BY col_1) subquery1
WHERE subquery1.max_col2 = tab2.col_2;

Example: INSERT Query


---------------------------
INSERT OVERWRITE TABLE tab3
SELECT id, col_1, col_2, MONTH(col_3), DAYOFMONTH(col_3)
FROM tab1 WHERE YEAR(col_3) = 2012;

Cross Join example [Supported only starting 1.2.2]


----------------------------------------------------
create table heroes (name string, era string, planet string);
create table villains (name string, era string, planet string);

insert into heroes values


('Tesla','20th century','Earth'),
('Pythagoras','Antiquity','Earth'),
('Zopzar','Far Future','Mars');
insert into villains values
('Caligula','Antiquity','Earth'),
('John Dillinger','20th century','Earth'),
('Xibulor','Far Future','Venus');

select concat(heroes.name,' vs. ',villains.name) as battle from heroes join


villains where heroes.era = villains.era and heroes.planet = villains.planet;

Cartesian product not possible in Impala 1.1.


------------------------------------------------
select concat(heroes.name,' vs. ',villains.name) as battle from heroes
join villains;

Cartesian product available in Impala 1.2.2 with the CROSS JOIN syntax.
-------------------------------------------------------------------------
select concat(heroes.name,' vs. ',villains.name) as battle from heroes
cross join villains;

Another example of Cross Product


-----------------------------------

create table x_axis (x int);


create table y_axis (y int);

insert into x_axis values (1),(2),(3),(4);

insert into y_axis values (2010),(2011),(2012),(2013),(2014);

select y as year, x as quarter from x_axis cross join y_axis;

select y as year, x as quarter from x_axis cross join y_axis where x in (1,3);

Creating views
---------------
create view myView as
SELECT tab2.*
FROM tab2,
(SELECT tab1.col_1, MAX(tab2.col_2) AS max_col2
FROM tab2, tab1
WHERE tab1.id = tab2.id
GROUP BY col_1) subquery1
WHERE subquery1.max_col2 = tab2.col_2;

select * from myView;

Alter views
---------------
Alter view myView as
SELECT * FROM tab2;

select * from myView;

Alter views to rename them


---------------------------
Alter view myView rename to myRenamedView;

select * from myRenamedView;

Logical Operators
-------------------

select true and true;


+---------------+
| true and true |
+---------------+
| true |
+---------------+

select true and false;


+----------------+
| true and false |
+----------------+
| false |
+----------------+

select false and false;


+-----------------+
| false and false |
+-----------------+
| false |
+-----------------+

select true and null;


+---------------+
| true and null |
+---------------+
| NULL |
+---------------+

select (10 > 2) and (6 != 9);


+-----------------------+
| (10 > 2) and (6 != 9) |
+-----------------------+
| true |
+-----------------------+
These examples demonstrate the OR operator:

select true or true;


+--------------+
| true or true |
+--------------+
| true |
+--------------+

select true or false;


+---------------+
| true or false |
+---------------+
| true |
+---------------+

select false or false;


+----------------+
| false or false |
+----------------+
| false |
+----------------+

select true or null;


+--------------+
| true or null |
+--------------+
| true |
+--------------+

select null or true;


+--------------+
| null or true |
+--------------+
| true |
+--------------+

select false or null;


+---------------+
| false or null |
+---------------+
| NULL |
+---------------+

select (1 = 1) or ('hello' = 'world');


+--------------------------------+
| (1 = 1) or ('hello' = 'world') |
+--------------------------------+
| true |
+--------------------------------+

select (2 + 2 != 4) or (-1 > 0);


+--------------------------+
| (2 + 2 != 4) or (-1 > 0) |
+--------------------------+

Using the REGEXP operator


--------------------------
select * from heroes where name regexp '^T.*';

Impala Type Conversion Functions


----------------------------------
Impala supports the following type conversion functions:

cast(expr as type)

select concat('Here are the first ',10,' results.'); -- Fails


select concat('Here are the first ',cast(10 as string),' results.'); -- Succeeds

Impala Date/Time functions


----------------------------

select from_unixtime(1392394861,"yyyy-MM-dd HH:mm:ss.SSSS");


+-------------------------------------------------------+
| from_unixtime(1392394861, 'yyyy-mm-dd hh:mm:ss.ssss') |
+-------------------------------------------------------+
| 2014-02-14 16:21:01.0000 |
+-------------------------------------------------------+

select from_unixtime(1392394861,"yyyy-MM-dd");
+-----------------------------------------+
| from_unixtime(1392394861, 'yyyy-mm-dd') |
+-----------------------------------------+
| 2014-02-14 |
+-----------------------------------------+

select from_unixtime(1392394861,"HH:mm:ss.SSSS");
+--------------------------------------------+
| from_unixtime(1392394861, 'hh:mm:ss.ssss') |
+--------------------------------------------+
| 16:21:01.0000 |
+--------------------------------------------+

select from_unixtime(1392394861,"HH:mm:ss");
+---------------------------------------+
| from_unixtime(1392394861, 'hh:mm:ss') |
+---------------------------------------+
| 16:21:01 |
+---------------------------------------+

select from_unixtime(unix_timestamp(now() + interval 3 days), 'yyyy/MM/dd HH:mm');

Regular expressions in Impala


------------------------------

select regexp_replace('aaabbbaaa','b+','xyz');
+------------------------------------------+
| regexp_replace('aaabbbaaa', 'b+', 'xyz') |
+------------------------------------------+
| aaaxyzaaa |
+------------------------------------------+

select regexp_replace('123-456-789','[^[:digit:]]','');
+---------------------------------------------------+
| regexp_replace('123-456-789', '[^[:digit:]]', '') |
+---------------------------------------------------+
| 123456789 |
+---------------------------------------------------+

Switch back and forth between Hive and Impala


------------------------------------------------
Time to use INVALIDATE METADATA command if the metadata is not automatically
refreshed

You might also like