-
Notifications
You must be signed in to change notification settings - Fork 0
/
NYCMapper_IMC.java
121 lines (81 loc) · 3.13 KB
/
NYCMapper_IMC.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/*
* --------------------------------------------------------------------
* Developer Name : Udeshika Dissanayake
* Subject : COSC2637 Big Data Processing
* Assignment : Assignment 1 - Semester 2, 2019
* Student Number : s3400652
* Date : 12/10/2019 *
*--------------------------------------------------------------------
*/
package edu.rmit.cosc2637.s3400652.Assignment;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
//Mapper class with In-Mapper Combiner
//------Data Pattern of Green Taxi data
//2,21/12/2018 15:17,21/12/2018 15:18,N,1,264,264,5,0,3,0.5,0.5,0,0,,0.3,4.3,2,1,
//6th Column = PickUp Location; 7th Column = Drop Off Location
//17th Column = Total fare; 4th Column = Number of Passengers
//------Data Pattern of Green Taxi data
//1 01-02-19 0:59 01-02-19 1:07 1 2.1 1 N 48 234 1 9 0.5 0.5 2 0 0.3 12.3 0
//8th Column = PickUp Location; 9th Column = Drop Off Location
//17th Column = Total fare; 8th Column = Number of Passengers
public class NYCMapper_IMC extends Mapper<LongWritable, Text, Text, FloatWritable> //for total fare
{
private final static IntWritable one = new IntWritable(1);
//define associative array for holding Mapper outputs and to perform local aggregation
String line;
Map<Text, FloatWritable> partial_sum_fare;
@Override
protected void setup(Context context) throws IOException, InterruptedException
{
partial_sum_fare = new HashMap<Text, FloatWritable>();
}
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
line = value.toString();
String[] fields = line.split(",");
FloatWritable Total_fare = new FloatWritable();
if(fields.length > 16)
{
Text PULocation = new Text("PULoc_"+fields[7]); //implemented for yellow taxi data format
if(fields[16].matches("\\d+.+")) //check whether its numeric
{
float f = Float.parseFloat(fields[16]);
if(partial_sum_fare.containsKey(PULocation))
{
FloatWritable temp = (FloatWritable) partial_sum_fare.get(PULocation); //perform aggregation using temp variable
float summ = f + temp.get();
Total_fare.set(summ);
partial_sum_fare.put(PULocation, Total_fare);
} else {
Total_fare.set(f);
partial_sum_fare.put(PULocation, Total_fare);
}
}
}
}
//clean up the variables before next round
@Override
public void cleanup(Context context) throws IOException, InterruptedException
{
for (Text key : partial_sum_fare.keySet())
{
FloatWritable Total_fare_1 = partial_sum_fare.get(key);
context.write(key, Total_fare_1);
}
}
}