BigData PIG Practicals
- LOAD
- FILTER
- FOREACH ... GENERATE
- SPLIT
- GROUP
- JOIN
- DESCRIBE
- EXPLAIN
- ILLUSTRATE
- DUMP
> pig -x local
> pig -x local [script]
> pig -x hadoop [script]
[cloudera@quickstart ~]$ pwd
/home/cloudera
[cloudera@quickstart ~]$ pig -x local
grunt>
/home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/students.txt
grunt> A = load 'Desktop/Basha/Basha2019/PIG_Practicals/students.txt';
grunt> describe A;
Schema for A unknown.
grunt> dump A;
(John,21,2.89)
(Sally,19,2.56)
(Alice,22,3.76)
(Doug,19,1.98)
(Susan,26,3.25)
(John,35,5.00)
(Doug,40,3.50)
(Alice,22,5.25)
grunt> A = load 'Desktop/Basha/Basha2019/PIG_Practicals/students.txt' AS (name:chararray, age:int, gpa:float);
grunt> describe A;
A: {name: chararray,age: int,gpa: float}
grunt> dump A;
(John,21,2.89)
(Sally,19,2.56)
(Alice,22,3.76)
(Doug,19,1.98)
(Susan,26,3.25)
(John,35,5.0)
(Doug,40,3.5)
(Alice,22,5.25)
grunt> R = filter A by (age>=20);
grunt> dump R;
(John,21,2.89)
(Alice,22,3.76)
(Susan,26,3.25)
(John,35,5.0)
(Doug,40,3.5)
(Alice,22,5.25)
grunt> R = filter A by (age>=20) and (gpa>=3.5);
grunt> dump R;
(Alice,22,3.76)
(John,35,5.0)
(Doug,40,3.5)
(Alice,22,5.25)
grunt> illustrate R;
---------------------------------------------------------
| A | name:chararray | age:int | gpa:float |
---------------------------------------------------------
| | John | 35 | 5.0 |
| | John | 21 | 2.89 |
| | Doug | 19 | 1.98 |
---------------------------------------------------------
---------------------------------------------------------
| R | name:chararray | age:int | gpa:float |
---------------------------------------------------------
| | John | 35 | 5.0 |
---------------------------------------------------------
grunt> F = foreach A generate age,gpa;
grunt> dump F;
(21,2.89)
(19,2.56)
(22,3.76)
(19,1.98)
(26,3.25)
(35,5.0)
(40,3.5)
(22,5.25)
grunt> G = group A by age;
grunt> dump G;
(19,{(Doug,19,1.98),(Sally,19,2.56)})
(21,{(John,21,2.89)})
(22,{(Alice,22,5.25),(Alice,22,3.76)})
(26,{(Susan,26,3.25)})
(35,{(John,35,5.0)})
(40,{(Doug,40,3.5)})
grunt> describe G;
G: {group: int,A: {(name: chararray,age: int,gpa: float)}}
grunt> H = foreach G generate group,A.name;
grunt> dump H;
(19,{(Doug),(Sally)})
(21,{(John)})
(22,{(Alice),(Alice)})
(26,{(Susan)})
(35,{(John)})
(40,{(Doug)})
grunt> store A into 'Desktop/Basha/Basha2019/PIG_Practicals/outputdir';
Input(s):
Successfully read records from: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/students.txt"
Output(s):
Successfully stored records in: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/outputdir"
grunt> store H into 'Desktop/Basha/Basha2019/PIG_Practicals/outputdir2' using PigStorage('|');
Input(s):
Successfully read records from: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/students.txt"
Output(s):
Successfully stored records in: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/outputdir2"
19|{(Doug),(Sally)}
21|{(John)}
22|{(Alice),(Alice)}
26|{(Susan)}
35|{(John)}
40|{(Doug)}
[cloudera@quickstart ~]$ pig -x local Desktop/Basha/Basha2019/PIG_Practicals/students.pig
Input(s):
Successfully read records from: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/students.txt"
Output(s):
Successfully stored records in: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/outputdir3"
19|{(Doug),(Sally)}
21|{(John)}
22|{(Alice),(Alice)}
26|{(Susan)}
35|{(John)}
40|{(Doug)}
[cloudera@quickstart ~]$ pwd
/home/cloudera
[cloudera@quickstart ~]$ pig -x local
grunt>
/home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/students.txt
grunt> A = load 'Desktop/Basha/Basha2019/PIG_Practicals/students.txt';
grunt> describe A;
Schema for A unknown.
grunt> dump A;
(John,21,2.89)
(Sally,19,2.56)
(Alice,22,3.76)
(Doug,19,1.98)
(Susan,26,3.25)
(John,35,5.00)
(Doug,40,3.50)
(Alice,22,5.25)
grunt> A = load 'Desktop/Basha/Basha2019/PIG_Practicals/students.txt' AS (name:chararray, age:int, gpa:float);
grunt> describe A;
A: {name: chararray,age: int,gpa: float}
grunt> dump A;
(John,21,2.89)
(Sally,19,2.56)
(Alice,22,3.76)
(Doug,19,1.98)
(Susan,26,3.25)
(John,35,5.0)
(Doug,40,3.5)
(Alice,22,5.25)
grunt> R = filter A by (age>=20);
grunt> dump R;
(John,21,2.89)
(Alice,22,3.76)
(Susan,26,3.25)
(John,35,5.0)
(Doug,40,3.5)
(Alice,22,5.25)
grunt> R = filter A by (age>=20) and (gpa>=3.5);
grunt> dump R;
(Alice,22,3.76)
(John,35,5.0)
(Doug,40,3.5)
(Alice,22,5.25)
grunt> illustrate R;
---------------------------------------------------------
| A | name:chararray | age:int | gpa:float |
---------------------------------------------------------
| | John | 35 | 5.0 |
| | John | 21 | 2.89 |
| | Doug | 19 | 1.98 |
---------------------------------------------------------
---------------------------------------------------------
| R | name:chararray | age:int | gpa:float |
---------------------------------------------------------
| | John | 35 | 5.0 |
---------------------------------------------------------
grunt> F = foreach A generate age,gpa;
grunt> dump F;
(21,2.89)
(19,2.56)
(22,3.76)
(19,1.98)
(26,3.25)
(35,5.0)
(40,3.5)
(22,5.25)
grunt> G = group A by age;
grunt> dump G;
(19,{(Doug,19,1.98),(Sally,19,2.56)})
(21,{(John,21,2.89)})
(22,{(Alice,22,5.25),(Alice,22,3.76)})
(26,{(Susan,26,3.25)})
(35,{(John,35,5.0)})
(40,{(Doug,40,3.5)})
grunt> describe G;
G: {group: int,A: {(name: chararray,age: int,gpa: float)}}
grunt> H = foreach G generate group,A.name;
grunt> dump H;
(19,{(Doug),(Sally)})
(21,{(John)})
(22,{(Alice),(Alice)})
(26,{(Susan)})
(35,{(John)})
(40,{(Doug)})
grunt> store A into 'Desktop/Basha/Basha2019/PIG_Practicals/outputdir';
Input(s):
Successfully read records from: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/students.txt"
Output(s):
Successfully stored records in: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/outputdir"
grunt> store H into 'Desktop/Basha/Basha2019/PIG_Practicals/outputdir2' using PigStorage('|');
Input(s):
Successfully read records from: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/students.txt"
Output(s):
Successfully stored records in: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/outputdir2"
19|{(Doug),(Sally)}
21|{(John)}
22|{(Alice),(Alice)}
26|{(Susan)}
35|{(John)}
40|{(Doug)}
[cloudera@quickstart ~]$ pig -x local Desktop/Basha/Basha2019/PIG_Practicals/students.pig
Input(s):
Successfully read records from: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/students.txt"
Output(s):
Successfully stored records in: "file:///home/cloudera/Desktop/Basha/Basha2019/PIG_Practicals/outputdir3"
19|{(Doug),(Sally)}
21|{(John)}
22|{(Alice),(Alice)}
26|{(Susan)}
35|{(John)}
40|{(Doug)}
No comments:
Post a Comment