This is a basic project made with PigLatin scripts to perform a movie analysis along with Read, write operations in various formats.
movies = load 'movie.txt' using PigStorage(',') as (id:int, title:chararray, year:int, rating:float, views:int)
SPLIT movies into lowrated if rating<3.0f, highrated if rating>3.0f;
grouplow = group lowrated all; grouphigh = group highrated all; lowratedcount = foreach grouplow generate COUNT(lowrated.id); dump lowratedcount; highratedcount = foreach grouphigh generate COUNT(highrated.id); dump highratedcount;
groupmovies = group movies all; avgmovies = foreach groupmovies generate AVG(movies.views) dump avgmovies
maxview = foreach grouplow generate MAX(lowrated.title),MAX(lowrated.views)
minview = foreach grouphigh generate min(highrated.title),min(highrated.views)
groupyear = group lowrated by year ASC; maxyear4 = foreach groupyear generate group as release, COUNT(lowrated.rating) as count, AVG(lowrated.rating);
hgroupyear = group highrated by year DESC; hyear = foreach hgroupyear generate group as relase, COUNT(highrated.rating) as count;
store lowrated into 'json' USING JsonStorage(); store highrated into 'jsonh' USING JsonStorage(); store lowrated into 'avro' USING AvroStorage(); store highrated into 'havro' USING AvroStorage(); store maxyear4 into 'text' USING PigStorage(',')