You are on page 1of 58

Hadoop and Big Data Lab 2016

/*
* Java Program to Implement Singly Linked List
*/

import java.util.Scanner;

/* Class Node */
class Node
{
protected int data;
protected Node link;

/* Constructor */
public Node()
{
link = null;
data = 0;
}
/* Constructor */
public Node(int d,Node n)
{
data = d;
link = n;
}
/* Function to set link to next Node */
public void setLink(Node n)
{
link = n;
}
/* Function to set data to current Node */
public void setData(int d)
{
data = d;
}
/* Function to get link to next node */
public Node getLink()
{
return link;
}
/* Function to get data from current Node */
public int getData()
{
return data;
}
}

NS Raju Institute of Technology Page 1


Hadoop and Big Data Lab 2016
/* Class linkedList */
class linkedList
{
protected Node start;
protected Node end ;
public int size ;

/* Constructor */
public linkedList()
{
start = null;
end = null;
size = 0;
}
/* Function to check if list is empty */
public boolean isEmpty()
{
return start == null;
}
/* Function to get size of list */
public int getSize()
{
return size;
}
/* Function to insert an element at begining */
public void insertAtStart(int val)
{
Node nptr = new Node(val, null);
size++ ;
if(start == null)
{
start = nptr;
end = start;
}
else
{
nptr.setLink(start);
start = nptr;
}
}
/* Function to insert an element at end */
public void insertAtEnd(int val)
{
Node nptr = new Node(val,null);
size++ ;
if(start == null)

NS Raju Institute of Technology Page 2


Hadoop and Big Data Lab 2016
{
start = nptr;
end = start;
}
else
{
end.setLink(nptr);
end = nptr;
}
}
/* Function to insert an element at position */
public void insertAtPos(int val , int pos)
{
Node nptr = new Node(val, null);
Node ptr = start;
pos = pos - 1 ;
for (int i = 1; i < size; i++)
{
if (i == pos)
{
Node tmp = ptr.getLink() ;
ptr.setLink(nptr);
nptr.setLink(tmp);
break;
}
ptr = ptr.getLink();
}
size++ ;
}
/* Function to delete an element at position */
public void deleteAtPos(int pos)
{
if (pos == 1)
{
start = start.getLink();
size--;
return ;
}
if (pos == size)
{
Node s = start;
Node t = start;
while (s != end)
{
t = s;
s = s.getLink();

NS Raju Institute of Technology Page 3


Hadoop and Big Data Lab 2016
}
end = t;
end.setLink(null);
size --;
return;
}
Node ptr = start;
pos = pos - 1 ;
for (int i = 1; i < size - 1; i++)
{
if (i == pos)
{
Node tmp = ptr.getLink();
tmp = tmp.getLink();
ptr.setLink(tmp);
break;
}
ptr = ptr.getLink();
}
size-- ;
}
/* Function to display elements */
public void display()
{
System.out.print("\nSingly Linked List = ");
if (size == 0)
{
System.out.print("empty\n");
return;
}
if (start.getLink() == null)
{
System.out.println(start.getData() );
return;
}
Node ptr = start;
System.out.print(start.getData()+ "->");
ptr = start.getLink();
while (ptr.getLink() != null)
{
System.out.print(ptr.getData()+ "->");
ptr = ptr.getLink();
}
System.out.print(ptr.getData()+ "\n");
}
}

NS Raju Institute of Technology Page 4


Hadoop and Big Data Lab 2016

/* Class SinglyLinkedList */
public class SinglyLinkedList
{
public static void main(String[] args)
{
Scanner scan = new Scanner(System.in);
/* Creating object of class linkedList */
linkedList list = new linkedList();
System.out.println("Singly Linked List Test\n");
char ch;
/* Perform list operations */
do
{
System.out.println("\nSingly Linked List Operations\n");
System.out.println("1. insert at begining");
System.out.println("2. insert at end");
System.out.println("3. insert at position");
System.out.println("4. delete at position");
System.out.println("5. check empty");
System.out.println("6. get size");
int choice = scan.nextInt();
switch (choice)
{
case 1 :
System.out.println("Enter integer element to insert");
list.insertAtStart( scan.nextInt() );
break;
case 2 :
System.out.println("Enter integer element to insert");
list.insertAtEnd( scan.nextInt() );
break;
case 3 :
System.out.println("Enter integer element to insert");
int num = scan.nextInt() ;
System.out.println("Enter position");
int pos = scan.nextInt() ;
if (pos <= 1 || pos > list.getSize() )
System.out.println("Invalid position\n");
else
list.insertAtPos(num, pos);
break;
case 4 :
System.out.println("Enter position");
int p = scan.nextInt() ;
if (p < 1 || p > list.getSize() )

NS Raju Institute of Technology Page 5


Hadoop and Big Data Lab 2016
System.out.println("Invalid position\n");
else
list.deleteAtPos(p);
break;
case 5 :
System.out.println("Empty status = "+ list.isEmpty());
break;
case 6 :
System.out.println("Size = "+ list.getSize() +" \n");
break;
default :
System.out.println("Wrong Entry \n ");
break;
}
/* Display List */
list.display();
System.out.println("\nDo you want to continue (Type y or n) \n");
ch = scan.next().charAt(0);
} while (ch == 'Y'|| ch == 'y');
}
}
Linked Stack Test

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
1
Enter integer element to push
5

Stack = 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek

NS Raju Institute of Technology Page 6


Hadoop and Big Data Lab 2016
4. check empty
5. size
1
Enter integer element to push
33

Stack = 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
1
Enter integer element to push
24

Stack = 24 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
1
Enter integer element to push
87

Stack = 87 24 33 5

Do you want to continue (Type y or n)

NS Raju Institute of Technology Page 7


Hadoop and Big Data Lab 2016
y

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
1
Enter integer element to push
99

Stack = 99 87 24 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
1
Enter integer element to push
1

Stack = 1 99 87 24 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
5
Size = 6

NS Raju Institute of Technology Page 8


Hadoop and Big Data Lab 2016

Stack = 1 99 87 24 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
3
Peek Element = 1

Stack = 1 99 87 24 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
2
Popped Element = 1

Stack = 99 87 24 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty

NS Raju Institute of Technology Page 9


Hadoop and Big Data Lab 2016
5. size
2
Popped Element = 99

Stack = 87 24 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
2
Popped Element = 87

Stack = 24 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
5
Size = 3

Stack = 24 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push

NS Raju Institute of Technology Page 10


Hadoop and Big Data Lab 2016
2. pop
3. peek
4. check empty
5. size
2
Popped Element = 24

Stack = 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
3
Peek Element = 33

Stack = 33 5

Do you want to continue (Type y or n)

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
2
Popped Element = 33

Stack = 5

Do you want to continue (Type y or n)

NS Raju Institute of Technology Page 11


Hadoop and Big Data Lab 2016

Linked Stack Operations


1. push
2. pop
3. peek
4. check empty
5. size
2
Popped Element = 5

Stack = Empty

Do you want to continue (Type y or n) N


/*
* Java Program to Implement Queue
*/

import java.util.*;

/* Class arrayQueue */
class arrayQueue
{
protected int Queue[] ;
protected int front, rear, size, len;

/* Constructor */
public arrayQueue(int n)
{
size = n;
len = 0;
Queue = new int[size];
front = -1;
rear = -1;
}
/* Function to check if queue is empty */
public boolean isEmpty()
{
return front == -1;
}
/* Function to check if queue is full */
public boolean isFull()
{

NS Raju Institute of Technology Page 12


Hadoop and Big Data Lab 2016
return front==0 && rear == size -1 ;
}
/* Function to get the size of the queue */
public int getSize()
{
return len ;
}
/* Function to check the front element of the queue */
public int peek()
{
if (isEmpty())
throw new NoSuchElementException("Underflow Exception");
return Queue[front];
}
/* Function to insert an element to the queue */
public void insert(int i)
{
if (rear == -1)
{
front = 0;
rear = 0;
Queue[rear] = i;
}
else if (rear + 1 >= size)
throw new IndexOutOfBoundsException("Overflow Exception");
else if ( rear + 1 < size)
Queue[++rear] = i;
len++ ;
}
/* Function to remove front element from the queue */
public int remove()
{
if (isEmpty())
throw new NoSuchElementException("Underflow Exception");
else
{
len-- ;
int ele = Queue[front];
if ( front == rear)
{
front = -1;
rear = -1;

NS Raju Institute of Technology Page 13


Hadoop and Big Data Lab 2016
}
else
front++;
return ele;
}
}
/* Function to display the status of the queue */
public void display()
{
System.out.print("\nQueue = ");
if (len == 0)
{
System.out.print("Empty\n");
return ;
}
for (int i = front; i <= rear; i++)
System.out.print(Queue[i]+" ");
System.out.println();
}
}

/* Class QueueImplement */
public class QueueImplement
{
public static void main(String[] args)
{
Scanner scan = new Scanner(System.in);

System.out.println("Array Queue Test\n");


System.out.println("Enter Size of Integer Queue ");
int n = scan.nextInt();
/* creating object of class arrayQueue */
arrayQueue q = new arrayQueue(n);
/* Perform Queue Operations */
char ch;
do{
System.out.println("\nQueue Operations");
System.out.println("1. insert");
System.out.println("2. remove");
System.out.println("3. peek");
System.out.println("4. check empty");
System.out.println("5. check full");

NS Raju Institute of Technology Page 14


Hadoop and Big Data Lab 2016
System.out.println("6. size");
int choice = scan.nextInt();
switch (choice)
{
case 1 :
System.out.println("Enter integer element to insert");
try
{
q.insert( scan.nextInt() );
}
catch(Exception e)
{
System.out.println("Error : " +e.getMessage());
}
break;
case 2 :
try
{
System.out.println("Removed Element = "+q.remove());
}
catch(Exception e)
{
System.out.println("Error : " +e.getMessage());
}
break;
case 3 :
try
{
System.out.println("Peek Element = "+q.peek());
}
catch(Exception e)
{
System.out.println("Error : "+e.getMessage());
}
break;
case 4 :
System.out.println("Empty status = "+q.isEmpty());
break;
case 5 :
System.out.println("Full status = "+q.isFull());
break;
case 6 :

NS Raju Institute of Technology Page 15


Hadoop and Big Data Lab 2016
System.out.println("Size = "+ q.getSize());
break;
default : System.out.println("Wrong Entry \n ");
break;
}
/* display Queue */
q.display();
System.out.println("\nDo you want to continue (Type y or n) \n");
ch = scan.next().charAt(0);

} while (ch == 'Y'|| ch == 'y');


}
}

Array Queue Test

Enter Size of Integer Queue


5

Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
4
Empty status = true

Queue = Empty

Do you want to continue (Type y or n)

Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size

NS Raju Institute of Technology Page 16


Hadoop and Big Data Lab 2016
1
Enter integer element to insert
24

Queue = 24

Do you want to continue (Type y or n)

Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
1
Enter integer element to insert
6

Queue = 24 6

Do you want to continue (Type y or n)

Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
1
Enter integer element to insert
16

Queue = 24 6 16

Do you want to continue (Type y or n)

NS Raju Institute of Technology Page 17


Hadoop and Big Data Lab 2016
y

Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
1
Enter integer element to insert
19

Queue = 24 6 16 19

Do you want to continue (Type y or n)

Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
1
Enter integer element to insert
32

Queue = 24 6 16 19 32

Do you want to continue (Type y or n)

Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full

NS Raju Institute of Technology Page 18


Hadoop and Big Data Lab 2016
6. size
1
Enter integer element to insert
14
Error : Overflow Exception

Queue = 24 6 16 19 32

Do you want to continue (Type y or n)

Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
5
Full status = true

Queue = 24 6 16 19 32

Do you want to continue (Type y or n)

y
Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
3
Peek Element = 24
Queue = 24 6 16 19 32

Do you want to continue (Type y or n)y

Queue Operations
1. insert

NS Raju Institute of Technology Page 19


Hadoop and Big Data Lab 2016
2. remove
3. peek
4. check empty
5. check full
6. size
2
Removed Element = 24

Queue = 6 16 19 32

Do you want to continue (Type y or n)

/*
* Java Program to Implement Stack
*/

import java.util.*;

/* Class arrayStack */
class arrayStack
{
protected int arr[];
protected int top, size, len;
/* Constructor for arrayStack */
public arrayStack(int n)
{
size = n;
len = 0;
arr = new int[size];
top = -1;
}
/* Function to check if stack is empty */
public boolean isEmpty()
{
return top == -1;
}
/* Function to check if stack is full */
public boolean isFull()
{
return top == size -1 ;

NS Raju Institute of Technology Page 20


Hadoop and Big Data Lab 2016
}
/* Function to get the size of the stack */
public int getSize()
{
return len ;
}
/* Function to check the top element of the stack */
public int peek()
{
if( isEmpty() )
throw new NoSuchElementException("Underflow Exception");
return arr[top];
}
/* Function to add an element to the stack */
public void push(int i)
{
if(top + 1 >= size)
throw new IndexOutOfBoundsException("Overflow Exception");
if(top + 1 < size )
arr[++top] = i;
len++ ;
}
/* Function to delete an element from the stack */
public int pop()
{
if( isEmpty() )
throw new NoSuchElementException("Underflow Exception");
len-- ;
return arr[top--];
}
/* Function to display the status of the stack */
public void display()
{
System.out.print("\nStack = ");
if (len == 0)
{
System.out.print("Empty\n");
return ;
}
for (int i = top; i >= 0; i--)
System.out.print(arr[i]+" ");
System.out.println();

NS Raju Institute of Technology Page 21


Hadoop and Big Data Lab 2016
}
}

/* Class StackImplement */
public class StackImplement
{
public static void main(String[] args)
{
Scanner scan = new Scanner(System.in);
System.out.println("Stack Test\n");
System.out.println("Enter Size of Integer Stack ");
int n = scan.nextInt();
/* Creating object of class arrayStack */
arrayStack stk = new arrayStack(n);
/* Perform Stack Operations */
char ch;
do{
System.out.println("\nStack Operations");
System.out.println("1. push");
System.out.println("2. pop");
System.out.println("3. peek");
System.out.println("4. check empty");
System.out.println("5. check full");
System.out.println("6. size");
int choice = scan.nextInt();
switch (choice)
{
case 1 :
System.out.println("Enter integer element to push");
try
{
stk.push( scan.nextInt() );
}
catch (Exception e)
{
System.out.println("Error : " + e.getMessage());
}
break;
case 2 :
try
{
System.out.println("Popped Element = " + stk.pop());

NS Raju Institute of Technology Page 22


Hadoop and Big Data Lab 2016
}
catch (Exception e)
{
System.out.println("Error : " + e.getMessage());
}
break;
case 3 :
try
{
System.out.println("Peek Element = " + stk.peek());
}
catch (Exception e)
{
System.out.println("Error : " + e.getMessage());
}
break;
case 4 :
System.out.println("Empty status = " + stk.isEmpty());
break;
case 5 :
System.out.println("Full status = " + stk.isFull());
break;
case 6 :
System.out.println("Size = " + stk.getSize());
break;
default :
System.out.println("Wrong Entry \n ");
break;
}
/* display stack */
stk.display();
System.out.println("\nDo you want to continue (Type y or n) \n");
ch = scan.next().charAt(0);

} while (ch == 'Y'|| ch == 'y');


}
}

Stack Test

Enter Size of Integer Stack


5

NS Raju Institute of Technology Page 23


Hadoop and Big Data Lab 2016

Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size

4
Empty status = true

Stack = Empty

Do you want to continue (Type y or n)

Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size

1
Enter integer element to push
24

Stack = 24

Do you want to continue (Type y or n)

Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full

NS Raju Institute of Technology Page 24


Hadoop and Big Data Lab 2016
6. size

1
Enter integer element to push
6

Stack = 6 24

Do you want to continue (Type y or n)

Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size

1
Enter integer element to push
162

Stack = 162 6 24

Do you want to continue (Type y or n)

Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size

1
Enter integer element to push
19

NS Raju Institute of Technology Page 25


Hadoop and Big Data Lab 2016
Stack = 19 162 6 24

Do you want to continue (Type y or n)

Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size

1
Enter integer element to push
94

Stack = 94 19 162 6 24

Do you want to continue (Type y or n)

Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size

5
Full status = true

Stack = 94 19 162 6 24

Do you want to continue (Type y or n)

Stack Operations

NS Raju Institute of Technology Page 26


Hadoop and Big Data Lab 2016
1. push
2. pop
3. peek
4. check empty
5. check full
6. size

1
Enter integer element to push
32
Error : Overflow Exception

Stack = 94 19 162 6 24

Do you want to continue (Type y or n)

MAP Program

import java.util.HashMap;
import java.util.Map;

public class HashMapExample {

public static void main(String[] args) {


Map vehicles = new HashMap();

// Add some vehicles.


vehicles.put("BMW", 5);
vehicles.put("Mercedes", 3);
vehicles.put("Audi", 4);
vehicles.put("Ford", 10);

System.out.println("Total vehicles: " + vehicles.size());

// Iterate over all vehicles, using the keySet method.


for(String key: vehicles.keySet())
System.out.println(key + " - " + vehicles.get(key));
System.out.println();

NS Raju Institute of Technology Page 27


Hadoop and Big Data Lab 2016
String searchKey = "Audi";
if(vehicles.containsKey(searchKey))
System.out.println("Found total " + vehicles.get(searchKey) + " "
+ searchKey + " cars!\n");

// Clear all values.


vehicles.clear();

// Equals to zero.
System.out.println("After clear operation, size: " + vehicles.size());
}
}

Output:

Total vehicles: 4 Audi - 4 Ford - 10 BMW - 5 Mercedes - 3

Found total 4 Audi cars!

After clear operation, size: 0

SET Program

import java.util.HashSet;
public class HashSetExample {
public static void main(String args[]) {
// HashSet declaration
HashSet<String> hset =
new HashSet<String>();

// Adding elements to the HashSet


hset.add("Apple");
hset.add("Mango");
hset.add("Grapes");
hset.add("Orange");
hset.add("Fig");
//Addition of duplicate elements
hset.add("Apple");
hset.add("Mango");
//Addition of null values
hset.add(null);

NS Raju Institute of Technology Page 28


Hadoop and Big Data Lab 2016
hset.add(null);

//Displaying HashSet elements


System.out.println(hset);
}
}

Output:

[null, Mango, Grapes, Apple, Orange, Fig]

NS Raju Institute of Technology Page 29


Hadoop and Big Data Lab 2016
HADOOP INSTALLATION STEPS

$ sudo apt-get update

$ sudo apt-get install default-jdk

$ java -version

$ sudo apt-get install ssh

$ sudo apt-get install rsync

$ ssh-keygen -t dsa -P ' ' -f ~/.ssh/id_dsa

$ cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys

$ wget -c http://apache.mirrors.lucidnetworks.net/hadoop/common/hadoop-2.7.0/hadoop-
2.7.0.tar.gz

$ sudo tar -zxvf hadoop-2.7.0.tar.gz

$ sudo mv hadoop /usr/local/hadoop

$ update-alternatives --config java

$ sudo nano ~/.bashrc

#Hadoop Variables
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"

$ source ~/.bashrc

$ cd /usr/local/hadoop/etc/hadoop

$ sudo nano hadoop-env.sh

#The java implementation to use.

NS Raju Institute of Technology Page 30


Hadoop and Big Data Lab 2016
export JAVA_HOME="/usr/lib/jvm/java-7-openjdk-amd64"

$ sudo nano core-site.xml

<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>

$ sudo nano yarn-site.xml

<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value> org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>

$ sudo cp mapred.site.xml.template mapred-site.xml

$ sudo nano mapred-site.xml

<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>

$ sudo nano hdfs-site.xml

<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop/hadoop_data/hdfs/namenode</value>
</property>

NS Raju Institute of Technology Page 31


Hadoop and Big Data Lab 2016
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop/hadoop_store/hdfs/datanode</value>
</property>
</configuration>

$ cd

$ mkdir -p /usr/local/hadoop/hadoop_data/hdfs/namenode

$ mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode

$ sudo chown chaal:chaal -R /usr/local/hadoop

$ hdfs namenode -format

$ start-all.sh

$ jps
Jps
NodeManager
NameNode
ResourceManager
DataNode
SecondaryNameNode

NS Raju Institute of Technology Page 32


Hadoop and Big Data Lab 2016
Hadoop file commands take the following form:
hadoop fs -cmd

Adding Files and Directories to HDFS

$ hadoop fs -mkdir /user/chuck

$ hadoop fs -put example.txt


$ hadoop fs -put example.txt /user/chuck

Retrieving Files from HDFS


$ hadoop fs -cat example.txt

Deleting Files from HDFS


$ hadoop fs -rm example.txt

NS Raju Institute of Technology Page 33


Hadoop and Big Data Lab 2016

Word Count Map Reduce program


import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount {

public static class Map


extends Mapper<LongWritable, Text, Text, IntWritable>{

private final static IntWritable one = new IntWritable(1); // type of output value
private Text word = new Text(); // type of output key

public void map(LongWritable key, Text value, Context context


) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString()); // line to string token

while (itr.hasMoreTokens()) {
word.set(itr.nextToken()); // set word as each input keyword
context.write(word, one); // create a pair <keyword, 1>
}
}
}

public static class Reduce


extends Reducer<Text,IntWritable,Text,IntWritable> {

private IntWritable result = new IntWritable();

public void reduce(Text key, Iterable<IntWritable> values,


Context context
) throws IOException, InterruptedException {
int sum = 0; // initialize the sum for each keyword
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);

NS Raju Institute of Technology Page 34


Hadoop and Big Data Lab 2016
context.write(key, result); // create a pair <keyword, number of occurences>
}
}

// Driver program
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // get all args
if (otherArgs.length != 2) {
System.err.println("Usage: WordCount <in> <out>");
System.exit(2);
}

// create a job with name "wordcount"


Job job = new Job(conf, "wordcount");
job.setJarByClass(WordCount.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);

// uncomment the following line to add the Combiner


job.setCombinerClass(Reduce.class);

// set output key type


job.setOutputKeyClass(Text.class);
// set output value type
job.setOutputValueClass(IntWritable.class);
//set the HDFS path of the input data
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
// set the HDFS path for the output
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

//Wait till job completion


System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

Execution Process:

hadoop jar wordcount.jar /usr/local/hadoop/input /usr/local/hadoop/output

NS Raju Institute of Technology Page 35


Hadoop and Big Data Lab 2016

Copy the sample input text file into this hdfs directory –

hadoop dfs -copyFromLocal /home/rk/Desktop/sample.txt /usr/local/hadoop/input

To view the results-

hdfs dfs -cat /usr/local/hadoop/output/part-r-00000

NS Raju Institute of Technology Page 36


Hadoop and Big Data Lab 2016

NS Raju Institute of Technology Page 37


Hadoop and Big Data Lab 2016

Write a Map Reduce program that mines weather data

/**
* Question:- To find Max and Min temperature from record set stored in
* text file. Schema of record set :- tab separated (\t) CA_25-Jan-2014
* 00:12:345 15.7 01:19:345 23.1 02:34:542 12.3 03:12:187 16 04:00:093
* -14 05:12:345 35.7 06:19:345 23.1 07:34:542 12.3 08:12:187 16
* 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 -22.3 13:12:187
* 16 14:00:093 -7 15:12:345 15.7 16:19:345 23.1 19:34:542 12.3
* 20:12:187 16 22:00:093 -7
* Expected output:- Creates files for each city and store maximum & minimum temperature
for each day along with time.
*/

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

/**
* @author http://www.devinline.com
*/
public class CalculateMaxAndMinTemeratureWithTime {
public static String calOutputName = "California";
public static String nyOutputName = "Newyork";
public static String njOutputName = "Newjersy";
public static String ausOutputName = "Austin";
public static String bosOutputName = "Boston";
public static String balOutputName = "Baltimore";

public static class WhetherForcastMapper extends


Mapper<Object, Text, Text, Text> {

public void map(Object keyOffset, Text dayReport, Context con)


throws IOException, InterruptedException {
StringTokenizer strTokens = new StringTokenizer(

NS Raju Institute of Technology Page 38


Hadoop and Big Data Lab 2016
dayReport.toString(), "\t");
int counter = 0;
Float currnetTemp = null;
Float minTemp = Float.MAX_VALUE;
Float maxTemp = Float.MIN_VALUE;
String date = null;
String currentTime = null;
String minTempANDTime = null;
String maxTempANDTime = null;

while (strTokens.hasMoreElements()) {
if (counter == 0) {
date = strTokens.nextToken();
} else {
if (counter % 2 == 1) {
currentTime = strTokens.nextToken();
} else {
currnetTemp =
Float.parseFloat(strTokens.nextToken());
if (minTemp > currnetTemp) {
minTemp = currnetTemp;
minTempANDTime = minTemp + "AND" +
currentTime;
}
if (maxTemp < currnetTemp) {
maxTemp = currnetTemp;
maxTempANDTime = maxTemp + "AND"
+ currentTime;
}
}
}
counter++;
}
// Write to context - MinTemp, MaxTemp and corresponding time
Text temp = new Text();
temp.set(maxTempANDTime);
Text dateText = new Text();
dateText.set(date);
try {
con.write(dateText, temp);
} catch (Exception e) {
e.printStackTrace();
}

temp.set(minTempANDTime);
dateText.set(date);

NS Raju Institute of Technology Page 39


Hadoop and Big Data Lab 2016
con.write(dateText, temp);

}
}

public static class WhetherForcastReducer extends


Reducer<Text, Text, Text, Text> {
MultipleOutputs<Text, Text> mos;

public void setup(Context context) {


mos = new MultipleOutputs<Text, Text>(context);
}

public void reduce(Text key, Iterable<Text> values, Context context)


throws IOException, InterruptedException {
int counter = 0;
String reducerInputStr[] = null;
String f1Time = "";
String f2Time = "";
String f1 = "", f2 = "";
Text result = new Text();
for (Text value : values) {

if (counter == 0) {
reducerInputStr = value.toString().split("AND");
f1 = reducerInputStr[0];
f1Time = reducerInputStr[1];
}

else {
reducerInputStr = value.toString().split("AND");
f2 = reducerInputStr[0];
f2Time = reducerInputStr[1];
}

counter = counter + 1;
}
if (Float.parseFloat(f1) > Float.parseFloat(f2)) {

result = new Text("Time: " + f2Time + " MinTemp: " + f2 + "\t"


+ "Time: " + f1Time + " MaxTemp: " + f1);
} else {

result = new Text("Time: " + f1Time + " MinTemp: " + f1 + "\t"


+ "Time: " + f2Time + " MaxTemp: " + f2);
}

NS Raju Institute of Technology Page 40


Hadoop and Big Data Lab 2016
String fileName = "";
if (key.toString().substring(0, 2).equals("CA")) {
fileName = CalculateMaxAndMinTemerature.calOutputName;
} else if (key.toString().substring(0, 2).equals("NY")) {
fileName = CalculateMaxAndMinTemerature.nyOutputName;
} else if (key.toString().substring(0, 2).equals("NJ")) {
fileName = CalculateMaxAndMinTemerature.njOutputName;
} else if (key.toString().substring(0, 3).equals("AUS")) {
fileName = CalculateMaxAndMinTemerature.ausOutputName;
} else if (key.toString().substring(0, 3).equals("BOS")) {
fileName = CalculateMaxAndMinTemerature.bosOutputName;
} else if (key.toString().substring(0, 3).equals("BAL")) {
fileName = CalculateMaxAndMinTemerature.balOutputName;
}
String strArr[] = key.toString().split("_");
key.set(strArr[1]);
mos.write(fileName, key, result);
}

@Override
public void cleanup(Context context) throws IOException,
InterruptedException {
mos.close();
}
}

public static void main(String[] args) throws IOException,


ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Wheather Statistics of USA");
job.setJarByClass(CalculateMaxAndMinTemeratureWithTime.class);

job.setMapperClass(WhetherForcastMapper.class);
// job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(WhetherForcastReducer.class);

job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);

job.setOutputKeyClass(Text.class);// <hadoop,4>
job.setOutputValueClass(Text.class);

MultipleOutputs.addNamedOutput(job, calOutputName,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.addNamedOutput(job, nyOutputName,
TextOutputFormat.class, Text.class, Text.class);

NS Raju Institute of Technology Page 41


Hadoop and Big Data Lab 2016
MultipleOutputs.addNamedOutput(job, njOutputName,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.addNamedOutput(job, bosOutputName,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.addNamedOutput(job, ausOutputName,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.addNamedOutput(job, balOutputName,
TextOutputFormat.class, Text.class, Text.class);

// FileInputFormat.addInputPath(job, new Path(args[0]));


// FileOutputFormat.setOutputPath(job, new Path(args[1]));
Path pathInput = new Path(

"hdfs://192.168.213.133:54310/wheatherInputData/input_temp.txt");
Path pathOutputDir = new Path(

"hdfs://192.168.213.133:54310/user/hduser1/testfs/output_mapred5");
FileInputFormat.addInputPath(job, pathInput);
FileOutputFormat.setOutputPath(job, pathOutputDir);

try {
System.exit(job.waitForCompletion(true) ? 0 : 1);
System.out.println("Job executed successfully!!");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

Output:

NS Raju Institute of Technology Page 42


Hadoop and Big Data Lab 2016

25-Jan-2014 Time: 12:34:542 MinTemp: -22.3 Time: 05:12:345 MaxTemp: 35.7


26-Jan-2014 Time: 04:00:093 MinTemp: -14.0 Time: 05:12:345 MaxTemp: 55.7
27-Jan-2014 Time: 02:34:542 MinTemp: -22.3 Time: 00:14:045 MaxTemp: 35.7
28-Jan-2014 Time: 11:19:345 MinTemp: -23.3 Time: 05:12:345 MaxTemp: 35.7
29-Jan-2014 Time: 14:00:093 MinTemp: -17.0 Time: 02:34:542 MaxTemp: 52.9
30-Jan-2014 Time: 15:12:345 MinTemp: -15.7 Time: 03:12:187 MaxTemp: 56.0
31-Jan-2014 Time: 22:00:093 MinTemp: -27.0 Time: 05:12:345 MaxTemp: 49.2
25-Jan-2014 Time: 12:34:542 MinTemp: -22.3 Time: 05:12:345 MaxTemp: 35.7
26-Jan-2014 Time: 22:00:093 MinTemp: -27.0 Time: 05:12:345 MaxTemp: 55.7
27-Jan-2014 Time: 02:34:542 MinTemp: -22.3 Time: 05:12:345 MaxTemp: 55.7
29-Jan-2014 Time: 14:00:093 MinTemp: -17.0 Time: 02:34:542 MaxTemp: 62.9
30-Jan-2014 Time: 22:00:093 MinTemp: -27.0 Time: 05:12:345 MaxTemp: 49.2
31-Jan-2014 Time: 14:00:093 MinTemp: -17.0 Time: 03:12:187 MaxTemp: 56.0
29-Jan-2014 Time: 14:00:093 MinTemp: -17.0 Time: 02:34:542 MaxTemp: 52.9
30-Jan-2014 Time: 15:12:345 MinTemp: -15.7 Time: 03:12:187 MaxTemp: 56.0
28-Jan-2014 Time: 11:19:345 MinTemp: -23.3 Time: 05:12:345 MaxTemp: 35.7
29-Jan-2014 Time: 14:00:093 MinTemp: -17.0 Time: 02:34:542 MaxTemp: 52.9
30-Jan-2014 Time: 15:12:345 MinTemp: -15.7 Time: 03:12:187 MaxTemp: 56.0

NS Raju Institute of Technology Page 43


Hadoop and Big Data Lab 2016

Implementation
In this implementation for ease of understanding I have hardcoded the dimension of matrix as (5
* 5).
package
com.hadoopgeek.matrix;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class Matrix
{

public static void main(String[] args) throws


IOException, ClassNotFoundException,
InterruptedException
{

if(args.length !=2)
{
System.err.println("Usage : Weather
<input path> <output path>");
System.exit(-1);
}

Configuration conf = new Configuration();


conf.set("dimension", "5"); // set the matrix
dimension here.
Job job = Job.getInstance(conf);

//conf.set("fs.defaultFS",

NS Raju Institute of Technology Page 44


Hadoop and Big Data Lab 2016
"hdfs://quickstart.cloudera:8020"); // take this value from
core-site.xml
FileSystem fs = FileSystem.get(conf);

job.setJarByClass(Matrix.class);

// Need to set this since, map out is different


from reduce out
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

job.setMapperClass(Matrix_Mapper.class);
job.setReducerClass(Matrix_Reducer.class);

job.setInputFormatClass(TextInputFormat.class);

job.setOutputFormatClass(TextOutputFormat.class);

Path input = new Path(args[0]);


Path output = new Path(args[1]);

// Set the dimension of matrix

if(!fs.exists(input))
{
System.err.println("Input file doesn't
exists");
return;
}
if(fs.exists(output))
{
fs.delete(output, true);
System.err.println("Output file
deleted");
}

NS Raju Institute of Technology Page 45


Hadoop and Big Data Lab 2016
fs.close();

FileInputFormat.addInputPath(job, input);

FileOutputFormat.setOutputPath(job,
output);

job.waitForCompletion(true);

System.out.println("MR Job Completed !");

}
}

Map
In the map function each input from the dataset is organized to produce a key value pair such that
reducer can do the entire computation of the corresponding output cell. The source code is given
below.

public class MatrixMapper extends


Mapper<LongWritable, Text, Text, Text>
{
@Override
protected void map
(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
// input format is ["a", 0, 0, 63]
String[] csv = value.toString().split(",");
String matrix = csv[0].trim();
int row = Integer.parseInt(csv[1].trim());
int col = Integer.parseInt(csv[2].trim());
if(matrix.contains("a"))

NS Raju Institute of Technology Page 46


Hadoop and Big Data Lab 2016

{
for (int i=0; i < lMax; i++)
{
String akey = Integer.toString(row) + "," +
Integer.toString(i);
context.write(new Text(akey), value);
}
}
if(matrix.contains("b"))
{
for (int i=0; i < iMax; i++)
{
String akey = Integer.toString(i) + "," +
Integer.toString(col);
context.write(new Text(akey), value);
}
}
}
}
[/code]

Reducer
Input to the reducer is the key that corresponds to the output cell of resultant matrix and values
required to do computation of that cell. The source code of reduce function is given below.
[code language=”java”]
public class MatrixReducer extends
Reducer<Text, Text, Text, IntWritable> {

@Override
protected void reduce

NS Raju Institute of Technology Page 47


Hadoop and Big Data Lab 2016

(Text key, Iterable<Text> values, Context context)


throws IOException, InterruptedException {

int[] a = new int[5];


int[] b = new int[5];
// b, 2, 0, 30
for (Text value : values) {
System.out.println(value);
String cell[] = value.toString().split(",");
if (cell[0].contains("a")) // take rows here
{
int col = Integer.parseInt(cell[2].trim());
a[col] = Integer.parseInt(cell[3].trim());
}
else if (cell[0].contains("b")) // take col here
{
int row = Integer.parseInt(cell[1].trim());
b[row] = Integer.parseInt(cell[3].trim());
}
}
int total = 0;
for (int i = 0; i < 5; i++) {
int val = a[i] * b[i];
total += val;
}
context.write(key, new IntWritable(total));
}
}
[/code]

NS Raju Institute of Technology Page 48


Hadoop and Big Data Lab 2016

Output
The above MR job will generate output as shown below.
0,0 11878
0,1 14044
0,2 16031
0,3 5964
0,4 15874
1,0 4081
1,1 6914
1,2 8282
1,3 7479
1,4 9647
2,0 6844
2,1 9880
2,2 10636
2,3 6973
2,4 8873
3,0 10512
3,1 12037
3,2 10587
3,3 2934
3,4 5274
4,0 11182
4,1 14591
4,2 10954
4,3 1660
4,4 9981

NS Raju Institute of Technology Page 49


Hadoop and Big Data Lab 2016

PIG Installation

$ cd Downloads/

 Unzip the tar file.


$ tar -xvf pig-0.11.1.tar.gz

 Create directory
$ sudo mkdir /usr/lib/pig

 move pig-0.11.1 to pig


$ mv pig-0.11.1 /usr/lib/pig/

 Set the PIG_HOME path in bashrc file


To open bashrc file use this command
$ gedit ~/.bashrc

In bashrc file append the below 2 statements


export PIG_HOME=/usr/lib/pig/pig-0.11.1

export PATH=$PATH:$PIG_HOME/bin

 Restart your computer or use [ . .bashrc]


Now let’s test the installation
On the command prompt type
$ pig -h

It shows the help related to Pig, and its various commands.


 Starting pig in local mode
$ pig -x local grunt>

 Starting pig in mapreduce mode


$ pig -x mapreduce

or
$ pig

----loading and parsing data-----

A = load '/weatherPIG' using TextLoader as (data:chararray);


AF = foreach A generate TRIM(SUBSTRING(data, 6, 14)), TRIM(SUBSTRING(data, 46, 53)),
TRIM(SUBSTRING(data, 38, 45));
store AF into '/data6' using PigStorage(',');
S = load '/data6/part-m-00000' using PigStorage(',') as (date:chararray, min:double, max:double);
NS Raju Institute of Technology Page 50
Hadoop and Big Data Lab 2016

-------Hot Days------

X = filter S by max > 25;


dump X;

-------Cold Days------

X = filter S by min < 0;


dump X;

-------Hottest Day-----

/* puts S's data in H1's Tuple */

H1 = group S all;
I = foreach H1 generate MAX(S.max) as maximum;
X = filter S by max == I.maximum;

-------Coldest Day------

H2 = group S all;
J = foreach H2 generate MIN(S.min) as minimum;
X = filter S by min == J.minimum;

-----UDF-----
register PIGUdfCorrupt.jar;

A = load '/weatherPIG' using TextLoader as (data:chararray);


AF = foreach A generate TRIM(SUBSTRING(data, 6, 14)),
IfCorrupted(TRIM(SUBSTRING(data, 46, 53))), IfCorrupted(TRIM(SUBSTRING(data, 38,
45)));
store AF into '/data2' using PigStorage(',');
S = load '/data2/part-m-00000' using PigStorage(',') as (date:chararray, min:double, max:double);

------------------

A = load '/data1' as (a1:int, a2:int);


B = load '/data2' as (b1:int, b2:int);
X = UNION A, B;
dump X;
//onschema

NS Raju Institute of Technology Page 51


Hadoop and Big Data Lab 2016
----------------------------------

A = LOAD '/j1' as (a1:int, a2:int, a3:int);


B = LOAD '/j2' as (b1:int, b2:int);
X = JOIN A BY a1, B BY b1;
dump X;

------------------------------------

A = load '/student' as (name:chararray, age:int, gpa:float);


B = load '/studentRoll' as (name:chararray, rollno:int);

X = group A by name;
dump X;

X = cogroup Aby name, B by name;


dump X;

register myudf.jar;
X = filter A by IsOfAge(age);

NS Raju Institute of Technology Page 52


Hadoop and Big Data Lab 2016

Hive Installation
Installing HIVE:

 Browse to the link: http://apache.claz.org/hive/stable/


 Click the apache-hive-0.13.0-bin.tar.gz
 Save and Extract it

Commands
user@ubuntu:~$ cd /usr/lib/
user@ubuntu:~$ sudo mkdir hive
user@ubuntu:~$ cd Downloads
user@ubuntu:~$ sudo mv apache-hive-0.13.0-bin /usr/lib/hive
Setting Hive environment variable:

Commands

user@ubuntu:~$ cd
user@ubuntu:~$ sudo gedit ~/.bashrc
Copy and paste the following lines at end of the file

# Set HIVE_HOME
export HIVE_HOME="/usr/lib/hive/apache-hive-0.13.0-bin"
PATH=$PATH:$HIVE_HOME/bin
export PATH
Setting HADOOP_PATH in HIVE config.sh

Commands

user@ubuntu:~$ cd /usr/lib/hive/apache-hive-0.13.0-bin/bin
user@ubuntu:~$ sudo gedit hive-config.sh
Go to the line where the following statements are written

# Allow alternate conf dir location.


HIVE_CONF_DIR="${HIVE_CONF_DIR:-$HIVE_HOME/conf"
export HIVE_CONF_DIR=$HIVE_CONF_DIR
export HIVE_AUX_JARS_PATH=$HIVE_AUX_JARS_PATH
Below this write the following

export HADOOP_HOME=/usr/local/hadoop (write the path where hadoop file is there)


Create Hive directories within HDFS

NS Raju Institute of Technology Page 53


Hadoop and Big Data Lab 2016

Command

user@ubuntu:~$ hadoop fs -mkdir /usr/hive/warehouse


Setting READ/WRITE permission for table

Command

user@ubuntu:~$ hadoop fs -chmod g+w /usr/hive/warehouse


HIVE launch

Command

user@ubuntu:~$ hive
Hive shell will prompt:

Creating a database

Command

hive> create database mydb;

OUTPUT

OK
Time taken: 0.369 seconds
hive>

Configuring hive-site.xml:

Open with text-editor and change the following property

NS Raju Institute of Technology Page 54


Hadoop and Big Data Lab 2016
<property>
<name>hive.metastore.local</name>
<value>TRUE</value>
<description>controls whether to connect to remove metastore server or open a new metastore
server in Hive Client JVM</description>
</property>

<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://usr/lib/hive/apache-hive-0.13.0-bin/metastore_db?
createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>

<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>

<property>
<name>hive.metastore.warehouse.dir</name>
<value>/usr/hive/warehouse</value>
<description>location of default database for the warehouse</description>
</property>

user@ubuntu:~$ sudo gedit sample.sql

create database sample;


use sample;

NS Raju Institute of Technology Page 55


Hadoop and Big Data Lab 2016
create table product(product int, productname string, price float)[row format delimited fields
terminated by ',';]
describe product;

CREATE TABLE drivers (driverId INT, name STRING, ssn BIGINT, location STRING,
certified STRING, wageplan STRING);

insert overwrite table drivers


SELECT
regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId,
regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) name,
regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) ssn,
regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) location,
regexp_extract(col_value, '^(?:([^,]*),?){5}', 1) certified,
regexp_extract(col_value, '^(?:([^,]*),?){6}', 1) wageplan

from temp_drivers;

Select * from drivers LIMIT 100;

CREATE TABLE temp_timesheet (col_value string);

CREATE TABLE timesheet (driverId INT, week INT, hours_logged INT , miles_logged INT);

LOAD DATA INPATH '/user/maria_dev/timesheet.csv' OVERWRITE INTO TABLE


temp_timesheet;

insert overwrite table timesheet


SELECT
regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId,
regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) week,

NS Raju Institute of Technology Page 56


Hadoop and Big Data Lab 2016
regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) hours_logged,
regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) miles_logged

from temp_timesheet;

SELECT driverId, sum(hours_logged), sum(miles_logged) FROM timesheet GROUP BY


driverId;

NS Raju Institute of Technology Page 57


Hadoop and Big Data Lab 2016

SELECT d.driverId, d.name, t.total_hours, t.total_miles from drivers d


JOIN (SELECT driverId, sum(hours_logged)total_hours, sum(miles_logged)total_miles FROM
timesheet GROUP BY driverId ) t
ON (d.driverId = t.driverId);

NS Raju Institute of Technology Page 58

You might also like