NYSE Result

1.
NYSE:
divs = load '/D:/Downloads D/NYSE_dividends.txt' as (exchange, symbol, date, dividend);
ranked = rank divs;

/* This statement will arrange the data in ascending order*/
dividends = filter ranked by (rank_divs > 1);
grouped = group dividends by symbol;
avg = foreach grouped generate group, AVG(dividends.dividend);
dump avg;
store avg into 'average_dividend';
top3 = foreach grouped {
sorted = order dividends by dividend desc;
top = limit sorted 3;
generate group, flatten(top);
};
/*
dump top3; /* dump command is used to see what is there in top3 */
NYSE Result
2.PIG Script:
a = load '/user/maria_dev/Data/Pig_Tutorial/A.int' using PigStorage(',') as (a1:int, a2:int, a3:int);
b = load '/user/maria_dev/Data/Pig_Tutorial/B.int' using PigStorage(',') as (b1:int, b2:int, b3:int);
-- The files required for analysis are loaded into relations a and b. The PigStorage() fucntions is used to
structure and separate the text file using delimiters, in this case comma(,) is used. The data has 3 fields,
all of which are integers.
DUMP a;
DUMP b;
c = UNION a, b;
DUMP c;
-- The union functions joins or merges the contents of two relations. The merged relation is stored into
c.
SPLIT c INTO d IF $0 == 0, e IF $0 == 1;
DUMP d;
DUMP e;
-- The value of the first field (positional notation - $0) of c is checked. If value is 0, the tuple is stored in d.
If the value is 1, the tuple is stored in e.
f = FILTER c BY $1 > 3;
DUMP f;
-- The values in the second field (Poistional notation - $1) are filtered. If the values are greater than 3,
the tuple is stored in relation f.
g = GROUP c BY $2;
DUMP g;
-- In relation g, the values are grouped by the values in the third field (positional notation - $2). The
tuples which have the values are grouped together.
DESCRIBE c;
DESCRIBE g;
--The describe function returns the schema of the relation, i.e. the field names and data type of each
field.
h = GROUP c ALL;
DUMP h;
-- In relation h, all the values are grouped together
i = FOREACH h GENERATE COUNT($1);
dump i; -- Count of tuples in h
-- The GENERATE COUNT () function returns the number of tuples in h, this result is stored in relation i.
j = COGROUP a BY $2, b BY $2;
DUMP j;
DESCRIBE j;
-- The Cogroup function is similar to group function, but can be used for multiple relations.
j = COGROUP a BY $2, b BY $2 INNER;
dump j;
-- This script returns values which are in common between the tables and values in relation b, grouped
by the third field.(Positional notation - $2)
j = COGROUP a BY $2 INNER, b BY $2 INNER;
dump j;
-- This script returns only values which are common in both the relations, grouped by the third field.
j = JOIN a BY $2, b BY $2;
dump j;
DESCRIBE j;
-- The join function merges the two relations into one but by common values of field 3 in both the
relations.
k = FOREACH c GENERATE a2, a2 * a3;
DUMP k;
--
k = FOREACH g GENERATE group, c.(a1,a2);
DUMP k;
cnt = FOREACH g GENERATE group, COUNT(c);
DUMP cnt;
k = FOREACH g GENERATE group, FLATTEN(c);
DUMP k;
DESCRIBE k;
A = LOAD '/user/maria_dev/Data/Pig_Tutorial/A.txt' using PigStorage(',') AS (id :int,type:chararray);
B = LOAD '/user/maria_dev/Data/Pig_Tutorial/B.txt' using PigStorage(',') AS (id :int,type:chararray);
DUMP A;
DUMP B;
INNER_JOIN = JOIN A BY id, B BY id; -- Only records with the same id in both datasets
DUMP INNER_JOIN;
LEFT_JOIN = JOIN A BY id LEFT, B BY id; -- Look up for matching records from other datasets
DUMP LEFT_JOIN;
RIGHT_JOIN = JOIN A BY id RIGHT, B BY id; -- All the data of the look-up table with only matching records
of the left table
DUMP RIGHT_JOIN;
FULL_JOIN = JOIN A BY id FULL, B BY id; -- All the matched and unmatched records out of both datasets
DUMP FULL_JOIN;
students = LOAD '/user/maria_dev/Data/Pig_Tutorial/students.txt' using PigStorage(',') as (id:int,

firstname:chararray, lastname:chararray, age:int, phone:chararray, city:chararray);
dump students;
age_group = GROUP students by age;
dump age_group;
3.WORD COUNT:
lines = LOAD ‘/ D:/Downloads D/’ AS (line:chararray);
/* This statement will load the file containing the data */
words = FOREACH lines GENERATE FLATTEN(TOKENIZE(line)) as word;

// Here we have created a output relation called words//
grouped = GROUP words BY word;
wordcount = FOREACH grouped GENERATE group, COUNT(words) as cnt;

//This statement will count how many times each word occurred//
topwords = ORDER wordcount BY DESC;

//This statement will give us the list of occurring words in descending order//
DUMP topwords;
// This statement will print the output of topwords//
Word count Result

4. TUTORIAL:
a = load ‘/D:/Downloads D/’A.int' using PigStorage(',') as (a1:int, a2:int, a3:int);
/* here we have load the data using the file in our system, which has 3 integers a1, a2 and a3 separated
by comma */
b = load ‘/ D:/Downloads D/’B.int' using PigStorage(',') as (b1:int, b2:int, b3:int);

/* here we have load the data using the file in our system, which has 3 integers b1, b2 and b3 separated
by comma */
DUMP a;
DUMP b;
c = UNION a, b;
/* This statement will combine relation a and b*/
DUMP c;
Tutorial Result

NYSE Result

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

NYSE Result

Uploaded by

Copyright:

Available Formats

1.

divs = load '/D:/Downloads D/NYSE_dividends.txt' as (exchange, symbol, date, dividend);

ranked = rank divs;

dividends = filter ranked by (rank_divs > 1);

grouped = group dividends by symbol;

avg = foreach grouped generate group, AVG(dividends.dividend);

store avg into 'average_dividend';

top3 = foreach grouped {

sorted = order dividends by dividend desc;

top = limit sorted 3;

generate group, flatten(top);

dump top3; /* dump command is used to see what is there in top3 */

a = load '/user/maria_dev/Data/Pig_Tutorial/A.int' using PigStorage(',') as (a1:int, a2:int, a3:int);

b = load '/user/maria_dev/Data/Pig_Tutorial/B.int' using PigStorage(',') as (b1:int, b2:int, b3:int);

-- In relation h, all the values are grouped together

i = FOREACH h GENERATE COUNT($1);

dump i; -- Count of tuples in h

j = COGROUP a BY $2, b BY $2;

j = COGROUP a BY $2 INNER, b BY $2 INNER;

j = JOIN a BY $2, b BY $2;

k = FOREACH c GENERATE a2, a2 * a3;

k = FOREACH g GENERATE group, c.(a1,a2);

cnt = FOREACH g GENERATE group, COUNT(c);

k = FOREACH g GENERATE group, FLATTEN(c);

B = LOAD '/user/maria_dev/Data/Pig_Tutorial/B.txt' using PigStorage(',') AS (id :int,type:chararray);

students = LOAD '/user/maria_dev/Data/Pig_Tutorial/students.txt' using PigStorage(',') as (id:int,

age_group = GROUP students by age;

words = FOREACH lines GENERATE FLATTEN(TOKENIZE(line)) as word;

grouped = GROUP words BY word;

wordcount = FOREACH grouped GENERATE group, COUNT(words) as cnt;

topwords = ORDER wordcount BY DESC;

Word count Result

b = load ‘/ D:/Downloads D/’B.int' using PigStorage(',') as (b1:int, b2:int, b3:int);

You might also like