Hadpoop Lab

Hadoop and Big Data Lab 2016
/*
* Java Program to Implement Singly Linked List
*/
import java.util.Scanner;
/* Class Node */
class Node
{
protected int data;
protected Node link;
/* Constructor */
public Node()
{
link = null;
data = 0;
}
/* Constructor */
public Node(int d,Node n)
{
data = d;
link = n;
}
/* Function to set link to next Node */
public void setLink(Node n)
{
link = n;
}
/* Function to set data to current Node */
public void setData(int d)
{
data = d;
}
/* Function to get link to next node */
public Node getLink()
{
return link;
}
/* Function to get data from current Node */
public int getData()
{
return data;
}
}
NS Raju Institute of Technology Page 1

/* Class linkedList */
class linkedList
{
protected Node start;
protected Node end ;
public int size ;
/* Constructor */
public linkedList()
{
start = null;
end = null;
size = 0;
}
/* Function to check if list is empty */
public boolean isEmpty()
{
return start == null;
}
/* Function to get size of list */
public int getSize()
{
return size;
}
/* Function to insert an element at begining */
public void insertAtStart(int val)
{
Node nptr = new Node(val, null);
size++ ;
if(start == null)
{
start = nptr;
end = start;
}
else
{
nptr.setLink(start);
start = nptr;
}
}
/* Function to insert an element at end */
public void insertAtEnd(int val)
{
Node nptr = new Node(val,null);
size++ ;
if(start == null)

{
start = nptr;
end = start;
}
else
{
end.setLink(nptr);
end = nptr;
}
}
/* Function to insert an element at position */
public void insertAtPos(int val , int pos)
{
Node nptr = new Node(val, null);
Node ptr = start;
pos = pos - 1 ;
for (int i = 1; i < size; i++)
{
if (i == pos)
{
Node tmp = ptr.getLink() ;
ptr.setLink(nptr);
nptr.setLink(tmp);
break;
}
ptr = ptr.getLink();
}
size++ ;
}
/* Function to delete an element at position */
public void deleteAtPos(int pos)
{
if (pos == 1)
{
start = start.getLink();
size--;
return ;
}
if (pos == size)
{
Node s = start;
Node t = start;
while (s != end)
{
t = s;
s = s.getLink();

}
end = t;
end.setLink(null);
size --;
return;
}
Node ptr = start;
pos = pos - 1 ;
for (int i = 1; i < size - 1; i++)
{
if (i == pos)
{
Node tmp = ptr.getLink();
tmp = tmp.getLink();
ptr.setLink(tmp);
break;
}
}
size-- ;
}
/* Function to display elements */
public void display()
{
System.out.print("\nSingly Linked List = ");
if (size == 0)
{
System.out.print("empty\n");
return;
}
if (start.getLink() == null)
{
System.out.println(start.getData() );
return;
}
Node ptr = start;
System.out.print(start.getData()+ "->");
ptr = start.getLink();
while (ptr.getLink() != null)
{
System.out.print(ptr.getData()+ "->");
}
System.out.print(ptr.getData()+ "\n");
}
}

/* Class SinglyLinkedList */
public class SinglyLinkedList
{
public static void main(String[] args)
{
Scanner scan = new Scanner(System.in);
/* Creating object of class linkedList */
linkedList list = new linkedList();
System.out.println("Singly Linked List Test\n");
char ch;
/* Perform list operations */
do
{
System.out.println("\nSingly Linked List Operations\n");
System.out.println("1. insert at begining");
System.out.println("2. insert at end");
System.out.println("3. insert at position");
System.out.println("4. delete at position");
System.out.println("5. check empty");
System.out.println("6. get size");
int choice = scan.nextInt();
switch (choice)
{
case 1 :
System.out.println("Enter integer element to insert");
list.insertAtStart( scan.nextInt() );
break;
case 2 :
list.insertAtEnd( scan.nextInt() );
break;
case 3 :
int num = scan.nextInt() ;
System.out.println("Enter position");
int pos = scan.nextInt() ;
if (pos <= 1 || pos > list.getSize() )
System.out.println("Invalid position\n");
else
list.insertAtPos(num, pos);
break;
case 4 :
System.out.println("Enter position");
int p = scan.nextInt() ;
if (p < 1 || p > list.getSize() )

System.out.println("Invalid position\n");
else
list.deleteAtPos(p);
break;
case 5 :
System.out.println("Empty status = "+ list.isEmpty());
break;
case 6 :
System.out.println("Size = "+ list.getSize() +" \n");
break;
default :
System.out.println("Wrong Entry \n ");
break;
}
/* Display List */
list.display();
System.out.println("\nDo you want to continue (Type y or n) \n");
ch = scan.next().charAt(0);
} while (ch == 'Y'|| ch == 'y');
}
}
Linked Stack Test
Linked Stack Operations

1. push
2. pop
3. peek
4. check empty
5. size
1
Enter integer element to push
5
Stack = 5
Do you want to continue (Type y or n)

1. push
2. pop
3. peek

4. check empty
5. size
1
33
Stack = 33 5

1. push
2. pop
3. peek
4. check empty
5. size
1
24
Stack = 24 33 5

1. push
2. pop
3. peek
4. check empty
5. size
1
87
Stack = 87 24 33 5

y

1. push
2. pop
3. peek
4. check empty
5. size
1
99
Stack = 99 87 24 33 5

1. push
2. pop
3. peek
4. check empty
5. size
1
1
Stack = 1 99 87 24 33 5

1. push
2. pop
3. peek
4. check empty
5. size
5
Size = 6

Stack = 1 99 87 24 33 5

1. push
2. pop
3. peek
4. check empty
5. size
3
Peek Element = 1
Stack = 1 99 87 24 33 5

1. push
2. pop
3. peek
4. check empty
5. size
2
Popped Element = 1
Stack = 99 87 24 33 5

1. push
2. pop
3. peek
4. check empty

5. size
2
Popped Element = 99
Stack = 87 24 33 5

1. push
2. pop
3. peek
4. check empty
5. size
2
Popped Element = 87
Stack = 24 33 5

1. push
2. pop
3. peek
4. check empty
5. size
5
Size = 3
Stack = 24 33 5

1. push

2. pop
3. peek
4. check empty
5. size
2
Popped Element = 24
Stack = 33 5

1. push
2. pop
3. peek
4. check empty
5. size
3
Peek Element = 33
Stack = 33 5

1. push
2. pop
3. peek
4. check empty
5. size
2
Popped Element = 33
Stack = 5


1. push
2. pop
3. peek
4. check empty
5. size
2
Popped Element = 5
Stack = Empty
Do you want to continue (Type y or n) N

/*
* Java Program to Implement Queue
*/
import java.util.*;
/* Class arrayQueue */
class arrayQueue
{
protected int Queue[] ;
protected int front, rear, size, len;
/* Constructor */
public arrayQueue(int n)
{
size = n;
len = 0;
Queue = new int[size];
front = -1;
rear = -1;
}
/* Function to check if queue is empty */
{
return front == -1;
}
/* Function to check if queue is full */
public boolean isFull()
{

return front==0 && rear == size -1 ;
}
/* Function to get the size of the queue */
{
return len ;
}
/* Function to check the front element of the queue */
public int peek()
{
if (isEmpty())
throw new NoSuchElementException("Underflow Exception");
return Queue[front];
}
/* Function to insert an element to the queue */
public void insert(int i)
{
if (rear == -1)
{
front = 0;
rear = 0;
Queue[rear] = i;
}
else if (rear + 1 >= size)
throw new IndexOutOfBoundsException("Overflow Exception");
else if ( rear + 1 < size)
Queue[++rear] = i;
len++ ;
}
/* Function to remove front element from the queue */
public int remove()
{
if (isEmpty())
else
{
len-- ;
int ele = Queue[front];
if ( front == rear)
{
front = -1;
rear = -1;

}
else
front++;
return ele;
}
}
/* Function to display the status of the queue */
{
System.out.print("\nQueue = ");
if (len == 0)
{
System.out.print("Empty\n");
return ;
}
for (int i = front; i <= rear; i++)
System.out.print(Queue[i]+" ");
System.out.println();
}
}
/* Class QueueImplement */
public class QueueImplement
{
{
System.out.println("Array Queue Test\n");

System.out.println("Enter Size of Integer Queue ");
int n = scan.nextInt();
/* creating object of class arrayQueue */
arrayQueue q = new arrayQueue(n);
/* Perform Queue Operations */
char ch;
do{
System.out.println("\nQueue Operations");
System.out.println("1. insert");
System.out.println("2. remove");
System.out.println("3. peek");
System.out.println("5. check full");

System.out.println("6. size");
switch (choice)
{
case 1 :
try
{
q.insert( scan.nextInt() );
}
catch(Exception e)
{
System.out.println("Error : " +e.getMessage());
}
break;
case 2 :
try
{
System.out.println("Removed Element = "+q.remove());
}
catch(Exception e)
{
System.out.println("Error : " +e.getMessage());
}
break;
case 3 :
try
{
System.out.println("Peek Element = "+q.peek());
}
catch(Exception e)
{
System.out.println("Error : "+e.getMessage());
}
break;
case 4 :
System.out.println("Empty status = "+q.isEmpty());
break;
case 5 :
System.out.println("Full status = "+q.isFull());
break;
case 6 :

System.out.println("Size = "+ q.getSize());
break;
default : System.out.println("Wrong Entry \n ");
break;
}
/* display Queue */
q.display();
} while (ch == 'Y'|| ch == 'y');

}
}
Array Queue Test
Enter Size of Integer Queue

5
Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
4
Empty status = true
Queue = Empty
Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size

1
Enter integer element to insert
24
Queue = 24
Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
1
6
Queue = 24 6
Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
1
16
Queue = 24 6 16

y
Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
1
19
Queue = 24 6 16 19
Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
1
32
Queue = 24 6 16 19 32
Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full

6. size
1
14
Error : Overflow Exception
Queue = 24 6 16 19 32
Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
5
Full status = true
Queue = 24 6 16 19 32
y
Queue Operations
1. insert
2. remove
3. peek
4. check empty
5. check full
6. size
3
Peek Element = 24
Queue = 24 6 16 19 32
Do you want to continue (Type y or n)y
Queue Operations
1. insert

2. remove
3. peek
4. check empty
5. check full
6. size
2
Removed Element = 24
Queue = 6 16 19 32
/*
* Java Program to Implement Stack
*/
import java.util.*;
/* Class arrayStack */
class arrayStack
{
protected int arr[];
protected int top, size, len;
/* Constructor for arrayStack */
public arrayStack(int n)
{
size = n;
len = 0;
arr = new int[size];
top = -1;
}
/* Function to check if stack is empty */
{
return top == -1;
}
/* Function to check if stack is full */
public boolean isFull()
{
return top == size -1 ;

}
/* Function to get the size of the stack */
{
return len ;
}
/* Function to check the top element of the stack */
public int peek()
{
if( isEmpty() )
return arr[top];
}
/* Function to add an element to the stack */
public void push(int i)
{
if(top + 1 >= size)
throw new IndexOutOfBoundsException("Overflow Exception");
if(top + 1 < size )
arr[++top] = i;
len++ ;
}
/* Function to delete an element from the stack */
public int pop()
{
if( isEmpty() )
len-- ;
return arr[top--];
}
/* Function to display the status of the stack */
{
System.out.print("\nStack = ");
if (len == 0)
{
System.out.print("Empty\n");
return ;
}
for (int i = top; i >= 0; i--)
System.out.print(arr[i]+" ");

}
}
/* Class StackImplement */
public class StackImplement
{
{
System.out.println("Stack Test\n");
System.out.println("Enter Size of Integer Stack ");
int n = scan.nextInt();
/* Creating object of class arrayStack */
arrayStack stk = new arrayStack(n);
/* Perform Stack Operations */
char ch;
do{
System.out.println("\nStack Operations");
System.out.println("1. push");
System.out.println("2. pop");
System.out.println("3. peek");
System.out.println("5. check full");
System.out.println("6. size");
switch (choice)
{
case 1 :
System.out.println("Enter integer element to push");
try
{
stk.push( scan.nextInt() );
}
catch (Exception e)
{
System.out.println("Error : " + e.getMessage());
}
break;
case 2 :
try
{
System.out.println("Popped Element = " + stk.pop());

}
catch (Exception e)
{
}
break;
case 3 :
try
{
System.out.println("Peek Element = " + stk.peek());
}
catch (Exception e)
{
}
break;
case 4 :
System.out.println("Empty status = " + stk.isEmpty());
break;
case 5 :
System.out.println("Full status = " + stk.isFull());
break;
case 6 :
System.out.println("Size = " + stk.getSize());
break;
default :
System.out.println("Wrong Entry \n ");
break;
}
/* display stack */
stk.display();
} while (ch == 'Y'|| ch == 'y');

}
}
Stack Test
Enter Size of Integer Stack

5

Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size
4
Empty status = true
Stack = Empty
Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size
1
24
Stack = 24
Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full

6. size
1
6
Stack = 6 24
Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size
1
162
Stack = 162 6 24
Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size
1
19

Stack = 19 162 6 24
Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size
1
94
Stack = 94 19 162 6 24
Stack Operations
1. push
2. pop
3. peek
4. check empty
5. check full
6. size
5
Full status = true
Stack = 94 19 162 6 24
Stack Operations

1. push
2. pop
3. peek
4. check empty
5. check full
6. size
1
32
Error : Overflow Exception
Stack = 94 19 162 6 24
MAP Program
import java.util.HashMap;
import java.util.Map;
public class HashMapExample {
public static void main(String[] args) {

Map vehicles = new HashMap();
// Add some vehicles.

vehicles.put("BMW", 5);
vehicles.put("Mercedes", 3);
vehicles.put("Audi", 4);
vehicles.put("Ford", 10);
System.out.println("Total vehicles: " + vehicles.size());
// Iterate over all vehicles, using the keySet method.

for(String key: vehicles.keySet())
System.out.println(key + " - " + vehicles.get(key));

String searchKey = "Audi";
if(vehicles.containsKey(searchKey))
System.out.println("Found total " + vehicles.get(searchKey) + " "
+ searchKey + " cars!\n");
// Clear all values.

vehicles.clear();
// Equals to zero.
System.out.println("After clear operation, size: " + vehicles.size());
}
}
Output:
Total vehicles: 4 Audi - 4 Ford - 10 BMW - 5 Mercedes - 3
Found total 4 Audi cars!
After clear operation, size: 0
SET Program
import java.util.HashSet;
public class HashSetExample {
public static void main(String args[]) {
// HashSet declaration
HashSet<String> hset =
new HashSet<String>();
// Adding elements to the HashSet

hset.add("Apple");
hset.add("Mango");
hset.add("Grapes");
hset.add("Orange");
hset.add("Fig");
//Addition of duplicate elements
hset.add("Apple");
hset.add("Mango");
//Addition of null values
hset.add(null);

hset.add(null);
//Displaying HashSet elements

System.out.println(hset);
}
}
Output:
[null, Mango, Grapes, Apple, Orange, Fig]

HADOOP INSTALLATION STEPS
$ sudo apt-get update
$ sudo apt-get install default-jdk
$ java -version
$ sudo apt-get install ssh
$ sudo apt-get install rsync
$ ssh-keygen -t dsa -P ' ' -f ~/.ssh/id_dsa
$ cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
$ wget -c http://apache.mirrors.lucidnetworks.net/hadoop/common/hadoop-2.7.0/hadoop-
2.7.0.tar.gz
$ sudo tar -zxvf hadoop-2.7.0.tar.gz
$ sudo mv hadoop /usr/local/hadoop
$ update-alternatives --config java
$ sudo nano ~/.bashrc
#Hadoop Variables
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
$ source ~/.bashrc
$ cd /usr/local/hadoop/etc/hadoop
$ sudo nano hadoop-env.sh
#The java implementation to use.

export JAVA_HOME="/usr/lib/jvm/java-7-openjdk-amd64"
$ sudo nano core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
$ sudo nano yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value> org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
$ sudo cp mapred.site.xml.template mapred-site.xml
$ sudo nano mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
$ sudo nano hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop/hadoop_data/hdfs/namenode</value>
</property>

<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop/hadoop_store/hdfs/datanode</value>
</property>
</configuration>
$ cd
$ mkdir -p /usr/local/hadoop/hadoop_data/hdfs/namenode
$ mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode
$ sudo chown chaal:chaal -R /usr/local/hadoop
$ hdfs namenode -format
$ start-all.sh
$ jps
Jps
NodeManager
NameNode
ResourceManager
DataNode
SecondaryNameNode

Hadoop file commands take the following form:
hadoop fs -cmd
Adding Files and Directories to HDFS
$ hadoop fs -mkdir /user/chuck
$ hadoop fs -put example.txt

$ hadoop fs -put example.txt /user/chuck
Retrieving Files from HDFS

$ hadoop fs -cat example.txt
Deleting Files from HDFS

$ hadoop fs -rm example.txt

Word Count Map Reduce program

import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
public static class Map

extends Mapper<LongWritable, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1); // type of output value
private Text word = new Text(); // type of output key
public void map(LongWritable key, Text value, Context context

) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString()); // line to string token
while (itr.hasMoreTokens()) {
word.set(itr.nextToken()); // set word as each input keyword
context.write(word, one); // create a pair <keyword, 1>
}
}
}
public static class Reduce

extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,

Context context
) throws IOException, InterruptedException {
int sum = 0; // initialize the sum for each keyword
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);

context.write(key, result); // create a pair <keyword, number of occurences>
}
}
// Driver program
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // get all args
if (otherArgs.length != 2) {
System.err.println("Usage: WordCount <in> <out>");
System.exit(2);
}
// create a job with name "wordcount"

Job job = new Job(conf, "wordcount");
job.setJarByClass(WordCount.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
// uncomment the following line to add the Combiner

job.setCombinerClass(Reduce.class);
// set output key type

job.setOutputKeyClass(Text.class);
// set output value type
job.setOutputValueClass(IntWritable.class);
//set the HDFS path of the input data
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
// set the HDFS path for the output
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
//Wait till job completion

System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Execution Process:
hadoop jar wordcount.jar /usr/local/hadoop/input /usr/local/hadoop/output

Copy the sample input text file into this hdfs directory –
hadoop dfs -copyFromLocal /home/rk/Desktop/sample.txt /usr/local/hadoop/input
To view the results-
hdfs dfs -cat /usr/local/hadoop/output/part-r-00000


Write a Map Reduce program that mines weather data
/**
* Question:- To find Max and Min temperature from record set stored in
* text file. Schema of record set :- tab separated (\t) CA_25-Jan-2014
* 00:12:345 15.7 01:19:345 23.1 02:34:542 12.3 03:12:187 16 04:00:093
* -14 05:12:345 35.7 06:19:345 23.1 07:34:542 12.3 08:12:187 16
* 09:00:093 -7 10:12:345 15.7 11:19:345 23.1 12:34:542 -22.3 13:12:187
* 16 14:00:093 -7 15:12:345 15.7 16:19:345 23.1 19:34:542 12.3
* 20:12:187 16 22:00:093 -7
* Expected output:- Creates files for each city and store maximum & minimum temperature
for each day along with time.
*/
import java.util.StringTokenizer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
/**
* @author http://www.devinline.com
*/
public class CalculateMaxAndMinTemeratureWithTime {
public static String calOutputName = "California";
public static String nyOutputName = "Newyork";
public static String njOutputName = "Newjersy";
public static String ausOutputName = "Austin";
public static String bosOutputName = "Boston";
public static String balOutputName = "Baltimore";
public static class WhetherForcastMapper extends

Mapper<Object, Text, Text, Text> {
public void map(Object keyOffset, Text dayReport, Context con)

throws IOException, InterruptedException {
StringTokenizer strTokens = new StringTokenizer(

dayReport.toString(), "\t");
int counter = 0;
Float currnetTemp = null;
Float minTemp = Float.MAX_VALUE;
Float maxTemp = Float.MIN_VALUE;
String date = null;
String currentTime = null;
String minTempANDTime = null;
String maxTempANDTime = null;
while (strTokens.hasMoreElements()) {
if (counter == 0) {
date = strTokens.nextToken();
} else {
if (counter % 2 == 1) {
currentTime = strTokens.nextToken();
} else {
currnetTemp =
Float.parseFloat(strTokens.nextToken());
if (minTemp > currnetTemp) {
minTemp = currnetTemp;
minTempANDTime = minTemp + "AND" +
currentTime;
}
if (maxTemp < currnetTemp) {
maxTemp = currnetTemp;
maxTempANDTime = maxTemp + "AND"
+ currentTime;
}
}
}
counter++;
}
// Write to context - MinTemp, MaxTemp and corresponding time
Text temp = new Text();
temp.set(maxTempANDTime);
Text dateText = new Text();
dateText.set(date);
try {
con.write(dateText, temp);
} catch (Exception e) {
e.printStackTrace();
}
temp.set(minTempANDTime);
dateText.set(date);

con.write(dateText, temp);
}
}
public static class WhetherForcastReducer extends

Reducer<Text, Text, Text, Text> {
MultipleOutputs<Text, Text> mos;
public void setup(Context context) {

mos = new MultipleOutputs<Text, Text>(context);
}
public void reduce(Text key, Iterable<Text> values, Context context)

int counter = 0;
String reducerInputStr[] = null;
String f1Time = "";
String f2Time = "";
String f1 = "", f2 = "";
Text result = new Text();
for (Text value : values) {
if (counter == 0) {
reducerInputStr = value.toString().split("AND");
f1 = reducerInputStr[0];
f1Time = reducerInputStr[1];
}
else {
reducerInputStr = value.toString().split("AND");
f2 = reducerInputStr[0];
f2Time = reducerInputStr[1];
}
counter = counter + 1;
}
if (Float.parseFloat(f1) > Float.parseFloat(f2)) {
result = new Text("Time: " + f2Time + " MinTemp: " + f2 + "\t"

+ "Time: " + f1Time + " MaxTemp: " + f1);
} else {
result = new Text("Time: " + f1Time + " MinTemp: " + f1 + "\t"

+ "Time: " + f2Time + " MaxTemp: " + f2);
}

String fileName = "";
if (key.toString().substring(0, 2).equals("CA")) {
fileName = CalculateMaxAndMinTemerature.calOutputName;
} else if (key.toString().substring(0, 2).equals("NY")) {
fileName = CalculateMaxAndMinTemerature.nyOutputName;
} else if (key.toString().substring(0, 2).equals("NJ")) {
fileName = CalculateMaxAndMinTemerature.njOutputName;
} else if (key.toString().substring(0, 3).equals("AUS")) {
fileName = CalculateMaxAndMinTemerature.ausOutputName;
} else if (key.toString().substring(0, 3).equals("BOS")) {
fileName = CalculateMaxAndMinTemerature.bosOutputName;
} else if (key.toString().substring(0, 3).equals("BAL")) {
fileName = CalculateMaxAndMinTemerature.balOutputName;
}
String strArr[] = key.toString().split("_");
key.set(strArr[1]);
mos.write(fileName, key, result);
}
@Override
public void cleanup(Context context) throws IOException,
InterruptedException {
mos.close();
}
}
public static void main(String[] args) throws IOException,

ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(conf, "Wheather Statistics of USA");
job.setJarByClass(CalculateMaxAndMinTemeratureWithTime.class);
job.setMapperClass(WhetherForcastMapper.class);
// job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(WhetherForcastReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);// <hadoop,4>
job.setOutputValueClass(Text.class);
MultipleOutputs.addNamedOutput(job, calOutputName,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.addNamedOutput(job, nyOutputName,

MultipleOutputs.addNamedOutput(job, njOutputName,
MultipleOutputs.addNamedOutput(job, bosOutputName,
MultipleOutputs.addNamedOutput(job, ausOutputName,
MultipleOutputs.addNamedOutput(job, balOutputName,
// FileInputFormat.addInputPath(job, new Path(args[0]));

// FileOutputFormat.setOutputPath(job, new Path(args[1]));
Path pathInput = new Path(
"hdfs://192.168.213.133:54310/wheatherInputData/input_temp.txt");
Path pathOutputDir = new Path(
"hdfs://192.168.213.133:54310/user/hduser1/testfs/output_mapred5");
FileInputFormat.addInputPath(job, pathInput);
FileOutputFormat.setOutputPath(job, pathOutputDir);
try {
System.exit(job.waitForCompletion(true) ? 0 : 1);
System.out.println("Job executed successfully!!");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Output:

25-Jan-2014 Time: 12:34:542 MinTemp: -22.3 Time: 05:12:345 MaxTemp: 35.7


Implementation
In this implementation for ease of understanding I have hardcoded the dimension of matrix as (5
* 5).
package
com.hadoopgeek.matrix;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class Matrix
{
public static void main(String[] args) throws

IOException, ClassNotFoundException,
InterruptedException
{
if(args.length !=2)
{
System.err.println("Usage : Weather
<input path> <output path>");
System.exit(-1);
}

conf.set("dimension", "5"); // set the matrix
dimension here.
Job job = Job.getInstance(conf);
//conf.set("fs.defaultFS",

"hdfs://quickstart.cloudera:8020"); // take this value from
core-site.xml
FileSystem fs = FileSystem.get(conf);
job.setJarByClass(Matrix.class);
// Need to set this since, map out is different

from reduce out
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Matrix_Mapper.class);
job.setReducerClass(Matrix_Reducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path input = new Path(args[0]);

Path output = new Path(args[1]);
// Set the dimension of matrix
if(!fs.exists(input))
{
System.err.println("Input file doesn't
exists");
return;
}
if(fs.exists(output))
{
fs.delete(output, true);
System.err.println("Output file
deleted");
}

fs.close();
FileInputFormat.addInputPath(job, input);
FileOutputFormat.setOutputPath(job,
output);
job.waitForCompletion(true);
System.out.println("MR Job Completed !");
}
}
Map
In the map function each input from the dataset is organized to produce a key value pair such that
reducer can do the entire computation of the corresponding output cell. The source code is given
below.
public class MatrixMapper extends

Mapper<LongWritable, Text, Text, Text>
{
@Override
protected void map
(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
// input format is ["a", 0, 0, 63]
String[] csv = value.toString().split(",");
String matrix = csv[0].trim();
int row = Integer.parseInt(csv[1].trim());
int col = Integer.parseInt(csv[2].trim());
if(matrix.contains("a"))

{
for (int i=0; i < lMax; i++)
{
String akey = Integer.toString(row) + "," +
Integer.toString(i);
context.write(new Text(akey), value);
}
}
if(matrix.contains("b"))
{
for (int i=0; i < iMax; i++)
{
String akey = Integer.toString(i) + "," +
Integer.toString(col);
context.write(new Text(akey), value);
}
}
}
}
[/code]
Reducer
Input to the reducer is the key that corresponds to the output cell of resultant matrix and values
required to do computation of that cell. The source code of reduce function is given below.
[code language=”java”]
public class MatrixReducer extends
Reducer<Text, Text, Text, IntWritable> {
@Override
protected void reduce

(Text key, Iterable<Text> values, Context context)

int[] a = new int[5];

int[] b = new int[5];
// b, 2, 0, 30
for (Text value : values) {
System.out.println(value);
String cell[] = value.toString().split(",");
if (cell[0].contains("a")) // take rows here
{
int col = Integer.parseInt(cell[2].trim());
a[col] = Integer.parseInt(cell[3].trim());
}
else if (cell[0].contains("b")) // take col here
{
int row = Integer.parseInt(cell[1].trim());
b[row] = Integer.parseInt(cell[3].trim());
}
}
int total = 0;
for (int i = 0; i < 5; i++) {
int val = a[i] * b[i];
total += val;
}
context.write(key, new IntWritable(total));
}
}
[/code]

Output
The above MR job will generate output as shown below.
0,0 11878
0,1 14044
0,2 16031
0,3 5964
0,4 15874
1,0 4081
1,1 6914
1,2 8282
1,3 7479
1,4 9647
2,0 6844
2,1 9880
2,2 10636
2,3 6973
2,4 8873
3,0 10512
3,1 12037
3,2 10587
3,3 2934
3,4 5274
4,0 11182
4,1 14591
4,2 10954
4,3 1660
4,4 9981

PIG Installation
$ cd Downloads/
 Unzip the tar file.

$ tar -xvf pig-0.11.1.tar.gz
 Create directory
$ sudo mkdir /usr/lib/pig
 move pig-0.11.1 to pig

$ mv pig-0.11.1 /usr/lib/pig/
 Set the PIG_HOME path in bashrc file

To open bashrc file use this command
$ gedit ~/.bashrc
In bashrc file append the below 2 statements

export PIG_HOME=/usr/lib/pig/pig-0.11.1
export PATH=$PATH:$PIG_HOME/bin
 Restart your computer or use [ . .bashrc]

Now let’s test the installation
On the command prompt type
$ pig -h
It shows the help related to Pig, and its various commands.

 Starting pig in local mode
$ pig -x local grunt>
 Starting pig in mapreduce mode

$ pig -x mapreduce
or
$ pig
----loading and parsing data-----
A = load '/weatherPIG' using TextLoader as (data:chararray);

AF = foreach A generate TRIM(SUBSTRING(data, 6, 14)), TRIM(SUBSTRING(data, 46, 53)),
TRIM(SUBSTRING(data, 38, 45));
store AF into '/data6' using PigStorage(',');
S = load '/data6/part-m-00000' using PigStorage(',') as (date:chararray, min:double, max:double);
-------Hot Days------
X = filter S by max > 25;

dump X;
-------Cold Days------
X = filter S by min < 0;

dump X;
-------Hottest Day-----
/* puts S's data in H1's Tuple */
H1 = group S all;
I = foreach H1 generate MAX(S.max) as maximum;
X = filter S by max == I.maximum;
-------Coldest Day------
H2 = group S all;
J = foreach H2 generate MIN(S.min) as minimum;
X = filter S by min == J.minimum;
-----UDF-----
register PIGUdfCorrupt.jar;
A = load '/weatherPIG' using TextLoader as (data:chararray);

AF = foreach A generate TRIM(SUBSTRING(data, 6, 14)),
IfCorrupted(TRIM(SUBSTRING(data, 46, 53))), IfCorrupted(TRIM(SUBSTRING(data, 38,
45)));
store AF into '/data2' using PigStorage(',');
S = load '/data2/part-m-00000' using PigStorage(',') as (date:chararray, min:double, max:double);
------------------
A = load '/data1' as (a1:int, a2:int);

B = load '/data2' as (b1:int, b2:int);
X = UNION A, B;
dump X;
//onschema

----------------------------------
A = LOAD '/j1' as (a1:int, a2:int, a3:int);

B = LOAD '/j2' as (b1:int, b2:int);
X = JOIN A BY a1, B BY b1;
dump X;
------------------------------------
A = load '/student' as (name:chararray, age:int, gpa:float);

B = load '/studentRoll' as (name:chararray, rollno:int);
X = group A by name;
dump X;
X = cogroup Aby name, B by name;

dump X;
register myudf.jar;
X = filter A by IsOfAge(age);

Hive Installation
Installing HIVE:
 Browse to the link: http://apache.claz.org/hive/stable/

 Click the apache-hive-0.13.0-bin.tar.gz
 Save and Extract it
Commands
user@ubuntu:~$ cd /usr/lib/
user@ubuntu:~$ sudo mkdir hive
user@ubuntu:~$ cd Downloads
user@ubuntu:~$ sudo mv apache-hive-0.13.0-bin /usr/lib/hive
Setting Hive environment variable:
Commands
user@ubuntu:~$ cd
user@ubuntu:~$ sudo gedit ~/.bashrc
Copy and paste the following lines at end of the file
# Set HIVE_HOME
export HIVE_HOME="/usr/lib/hive/apache-hive-0.13.0-bin"
PATH=$PATH:$HIVE_HOME/bin
export PATH
Setting HADOOP_PATH in HIVE config.sh
Commands
user@ubuntu:~$ cd /usr/lib/hive/apache-hive-0.13.0-bin/bin
user@ubuntu:~$ sudo gedit hive-config.sh
Go to the line where the following statements are written
# Allow alternate conf dir location.

HIVE_CONF_DIR="${HIVE_CONF_DIR:-$HIVE_HOME/conf"
export HIVE_CONF_DIR=$HIVE_CONF_DIR
export HIVE_AUX_JARS_PATH=$HIVE_AUX_JARS_PATH
Below this write the following
export HADOOP_HOME=/usr/local/hadoop (write the path where hadoop file is there)

Create Hive directories within HDFS

Command
user@ubuntu:~$ hadoop fs -mkdir /usr/hive/warehouse

Setting READ/WRITE permission for table
Command
user@ubuntu:~$ hadoop fs -chmod g+w /usr/hive/warehouse

HIVE launch
Command
user@ubuntu:~$ hive
Hive shell will prompt:
Creating a database
Command
hive> create database mydb;
OUTPUT
OK
Time taken: 0.369 seconds
hive>
Configuring hive-site.xml:
Open with text-editor and change the following property

<property>
<name>hive.metastore.local</name>
<value>TRUE</value>
<description>controls whether to connect to remove metastore server or open a new metastore
server in Hive Client JVM</description>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://usr/lib/hive/apache-hive-0.13.0-bin/metastore_db?
createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/usr/hive/warehouse</value>
<description>location of default database for the warehouse</description>
</property>
user@ubuntu:~$ sudo gedit sample.sql
create database sample;

use sample;

create table product(product int, productname string, price float)[row format delimited fields
terminated by ',';]
describe product;
CREATE TABLE drivers (driverId INT, name STRING, ssn BIGINT, location STRING,
certified STRING, wageplan STRING);
insert overwrite table drivers

SELECT
regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId,
regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) name,
regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) ssn,
regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) location,
regexp_extract(col_value, '^(?:([^,]*),?){5}', 1) certified,
regexp_extract(col_value, '^(?:([^,]*),?){6}', 1) wageplan
from temp_drivers;
Select * from drivers LIMIT 100;
CREATE TABLE temp_timesheet (col_value string);
CREATE TABLE timesheet (driverId INT, week INT, hours_logged INT , miles_logged INT);
LOAD DATA INPATH '/user/maria_dev/timesheet.csv' OVERWRITE INTO TABLE

temp_timesheet;
insert overwrite table timesheet

SELECT
regexp_extract(col_value, '^(?:([^,]*),?){1}', 1) driverId,
regexp_extract(col_value, '^(?:([^,]*),?){2}', 1) week,

regexp_extract(col_value, '^(?:([^,]*),?){3}', 1) hours_logged,
regexp_extract(col_value, '^(?:([^,]*),?){4}', 1) miles_logged
from temp_timesheet;
SELECT driverId, sum(hours_logged), sum(miles_logged) FROM timesheet GROUP BY

driverId;

SELECT d.driverId, d.name, t.total_hours, t.total_miles from drivers d

JOIN (SELECT driverId, sum(hours_logged)total_hours, sum(miles_logged)total_miles FROM
timesheet GROUP BY driverId ) t
ON (d.driverId = t.driverId);

Hadpoop Lab

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Hadpoop Lab

Uploaded by

Copyright:

Available Formats

Hadoop and Big Data Lab 2016

NS Raju Institute of Technology Page 1

NS Raju Institute of Technology Page 2

NS Raju Institute of Technology Page 3

NS Raju Institute of Technology Page 4

NS Raju Institute of Technology Page 5

Linked Stack Operations

Do you want to continue (Type y or n)

Linked Stack Operations

NS Raju Institute of Technology Page 6

Do you want to continue (Type y or n)

Linked Stack Operations

Do you want to continue (Type y or n)

Linked Stack Operations

Do you want to continue (Type y or n)

NS Raju Institute of Technology Page 7

Linked Stack Operations

Do you want to continue (Type y or n)

Linked Stack Operations

Do you want to continue (Type y or n)

Linked Stack Operations

NS Raju Institute of Technology Page 8

Do you want to continue (Type y or n)

Linked Stack Operations

Do you want to continue (Type y or n)

Linked Stack Operations

Do you want to continue (Type y or n)

Linked Stack Operations

NS Raju Institute of Technology Page 9

Do you want to continue (Type y or n)

Linked Stack Operations

Do you want to continue (Type y or n)

Linked Stack Operations

Do you want to continue (Type y or n)

Linked Stack Operations

NS Raju Institute of Technology Page 10

Do you want to continue (Type y or n)

Linked Stack Operations

Do you want to continue (Type y or n)

Linked Stack Operations

Do you want to continue (Type y or n)

NS Raju Institute of Technology Page 11

Linked Stack Operations

Do you want to continue (Type y or n) N

NS Raju Institute of Technology Page 12

NS Raju Institute of Technology Page 13

System.out.println("Array Queue Test\n");

NS Raju Institute of Technology Page 14

NS Raju Institute of Technology Page 15

} while (ch == 'Y'|| ch == 'y');

Array Queue Test

Enter Size of Integer Queue

Do you want to continue (Type y or n)

NS Raju Institute of Technology Page 16

Do you want to continue (Type y or n)

Do you want to continue (Type y or n)

Do you want to continue (Type y or n)

NS Raju Institute of Technology Page 17

Do you want to continue (Type y or n)

Do you want to continue (Type y or n)

NS Raju Institute of Technology Page 18

Do you want to continue (Type y or n)

Do you want to continue (Type y or n)