You are on page 1of 6

OpenEndedProject

ImplementandsubscribetoRSSnewsfeedtogetlatestnewsinIndia.
Wewilluserome-1.0.0.jarandjdom-1.1.1.jarexternallibrariestoparsetheXMLgeneratedbythe
RSSfeed.
Startallhadoopservices
$hdfsnamenode-format
$./start-all.sh
CreateMapper,ReducerandControllerclasses
GetFeed.java
importorg.apache.hadoop.conf.Configuration;
importorg.apache.hadoop.fs.Path;
importorg.apache.hadoop.io.*;
importorg.apache.hadoop.mapreduce.Job;
importorg.apache.hadoop.mapreduce.Mapper;
importorg.apache.hadoop.mapreduce.Reducer;
importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat; import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
importjava.io.IOException;
importcom.sun.syndication.feed.synd.SyndEntry;
importcom.sun.syndication.feed.synd.SyndFeed;
importcom.sun.syndication.io.SyndFeedInput;
importcom.sun.syndication.io.XmlReader;
importjava.net.URL;
importjava.util.List;
publicclassGetFeed
{
publicstaticclassFeedMapperextendsMapper<LongWritable,Text,Text,Text>
{
publicvoidmap(LongWritablekey,Textvalue,Contextcontext)throwsIOException,
InterruptedException
{
Stringhead=newString();
Stringtitle=newString(); try
{
URLurl=newURL(value.toString());
SyndFeedInputinput=newSyndFeedInput();
SyndFeedfeed=input.build(newXmlReader(url));
Listlist=feed.getEntries();
if(list.size()>0)
{
for(intj=0;j<list.size();j++)
{
head=feed.getTitle();
title=((SyndEntry)list.get(j)).getTitle();
}

39
}
}
catch(Exceptione)
{
context.write(newText("Exception"),newText(value.toString()));
}
finally
{
context.write(newText(head),newText(title));
}
}
}
publicstaticclassFeedReducerextendsReducer<Text,Text,Text,Text>
{
publicvoidreduce(Textkey,Iterable<Text>values,Contextcontext)throwsIOException, Interrupted
Exception
{
for(Textvalue:values)
{
context.write(key,newText(value.toString()));
}
}
}
publicstaticvoidmain(String[]args)
{
try
{
Configurationconf=newConfiguration();
Jobjob=Job.getInstance(conf,"wordcount");
job.setJarByClass(FeedMapper.class);
job.setMapperClass(FeedMapper.class);
job.setCombinerClass(FeedReducer.class);
job.setReducerClass(FeedReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

FileInputFormat.addInputPath(job,newPath(args[0]));
FileOutputFormat.setOutputPath(job,newPath(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
catch(Exceptioner)
{
System.out.println(er);
}
}
}

Compiletheprogramto.jarpackages.jar

40
$mkdirfeeds
$export CLASSPATH=“/usr/local/hadoop-2.7.2/share/hadoop/common/hadoop-
common2.7.2.jar::/usr/local/hadoop-2.7.2/share/hadoop/mapreduce/hadoop- mapreduce-
clientcore-2.7.2.jar:/usr/local/hadoop -
2.7.2/share/hadoop/common/lib/commons-cli-1.2.jar”
$source~/.bashrc
$javac-cp$CLASSPATH:rome-1.0.jar:jdom-1.1.1.jar:.-dfeeds/GetFeed.java$jar
-cvfFeeds.jar-Cfeeds/.

StoringtheinputsfileinHDFS
$hadoopfs-mkdir/inputOEP
$hadoopfs-copyFromLocalinput.txtinputOEP/
$hadoopfs-mkdir/outputOEP

Input.txt
http://www.business-standard.com/rss/current-affairs-115.rss
http://www.indianexpress.com/rss/721/india.xml

StartingMapReducejobs
$hadoopjarFeeds.jarGetFeed/inputOEP/outputOEP

Checkingtheoutput
$hadoopfs-cat/outputOEP/part-00000

41
Output:

42
43
44

You might also like