Professional Documents
Culture Documents
Create Database
------------------
create database retail;
B. Select Database
------------------
use retail;
F. Counting no of records
-------------------------
select count(*) from sales_data;
H. 10 customers
--------------------
select custname, sum(price) from sales_data group by custname limit 10;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.enforce.bucketing=true;
set hive.vectorized.execution.enabled = true;
set hive.vectorized.execution.reduce.enabled = true;
from sales_data txn INSERT OVERWRITE TABLE sales_by_city PARTITION(city)
select txn.TransID,
txn.TransDate,txn.Product,txn.Price,txn.PaymentType,txn.CustName,txn.Stat
e,txn.Country,txn.city;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.enforce.bucketing=true;
==========================
find sales based on age group
==========================
describe out2;
create table out3 (level string, amount double) row format delimited
fields terminated by ',';
==============
simple join
==============
create table mailid (name string, email string) row format delimited
fields terminated by ',';
===============================================
Custom Mapper Code to manipulate unix timestamp
===============================================
CREATE TABLE u_data ( userid INT, movieid INT, rating INT, unixtime
STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS
TEXTFILE;
****Create weekday_mapper.py:
import sys
import datetime
for line in sys.stdin:
line = line.strip()
userid, movieid, rating, unixtime = line.split('\t')
weekday =
datetime.datetime.fromtimestamp(float(unixtime)).isoweekday()
print '\t'.join([userid, movieid, rating, str(weekday)])
CREATE TABLE u_data_new (userid INT, movieid INT, rating INT, weekday
INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
****The following command uses the TRANSFORM clause to embed the mapper
scripts.
===========
UDF
===========
import java.util.Date;
import java.text.DateFormat;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public class UnixtimeToDate extends UDF{
public Text evaluate(Text text){
if(text==null) return null;
long timestamp = Long.parseLong(text.toString());
return new Text(toDate(timestamp));
}
private String toDate(long timestamp) {
Date date = new Date (timestamp*1000);
return DateFormat.getInstance().format(date).toString();
}
}
****one,1386023259550
****two,1389523259550
****three,1389523259550
****four,1389523259550
****OK
****four 3/28/02 8:12 PM
****one 4/30/91 1:59 PM
****two 3/28/02 8:12 PM
****three 3/28/02 8:12 PM