diff --git a/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/avg/level1/metadata.json b/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/avg/level1/metadata.json new file mode 100644 index 0000000..26530fa --- /dev/null +++ b/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/avg/level1/metadata.json @@ -0,0 +1 @@ +{"level1/s0.csv": 1607310026831041} \ No newline at end of file diff --git a/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/level0/metadata.json b/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/level0/metadata.json new file mode 100644 index 0000000..2a30e68 --- /dev/null +++ b/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/level0/metadata.json @@ -0,0 +1 @@ +{"level0/s0.csv": 1607310020479006.0, "level0/s1.csv": 1607310936975548.0} \ No newline at end of file diff --git a/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/max/level1/metadata.json b/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/max/level1/metadata.json new file mode 100644 index 0000000..0324a3a --- /dev/null +++ b/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/max/level1/metadata.json @@ -0,0 +1 @@ +{"level1/s0.csv": 1607310021218054.0} \ No newline at end of file diff --git a/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/metadata.json b/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/metadata.json new file mode 100644 index 0000000..b9e570d --- /dev/null +++ b/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/metadata.json @@ -0,0 +1 @@ +{"raw_file": "ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z.csv", "levels": {"names": ["level0", "level1"], "level0": {"names": ["level0/s0.csv", "level0/s1.csv"], "frequency": 0.00010911451500060223, "number": 195335}, "level1": {"names": ["level1/s0.csv"], "frequency": 1.0909496393179725e-06, "number": 1953}}, "raw_number": 195335, "start": 1607310020479006.0, "end": 1607311810662466.0} \ No newline at end of file diff --git a/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/min/level1/metadata.json b/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/min/level1/metadata.json new file mode 100644 index 0000000..4a54fd7 --- /dev/null +++ b/backend/ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z/min/level1/metadata.json @@ -0,0 +1 @@ +{"level1/s0.csv": 1607310021084773.0} \ No newline at end of file diff --git a/backend/app.yaml b/backend/app.yaml index 70ef7f5..63ea40f 100644 --- a/backend/app.yaml +++ b/backend/app.yaml @@ -9,7 +9,7 @@ runtime: python37 env: standard manual_scaling: - instances: 1 + instances: 10 # Set App Engine instance class (defaults to F1) # See https://cloud.google.com/appengine/docs/standard/#instance_classes diff --git a/backend/cron.yaml b/backend/cron.yaml index 1a0ef91..ddfb805 100644 --- a/backend/cron.yaml +++ b/backend/cron.yaml @@ -2,4 +2,4 @@ cron: - description: "Scan new files to downsample" url: /downsample target: api - schedule: every 10 mins + schedule: every 5 mins diff --git a/backend/data_fetcher.py b/backend/data_fetcher.py index 8ee552e..2005420 100644 --- a/backend/data_fetcher.py +++ b/backend/data_fetcher.py @@ -15,7 +15,7 @@ """A module for fetching from multiple-level preprocessing.""" import utils - +import time from level_slices_reader import LevelSlices from metadata import Metadata @@ -77,9 +77,17 @@ def fetch(self, strategy, number_records, timespan_start, timespan_end): } ] """ + + prevTime = time.time() + print("fetch data starts", prevTime) + self._metadata = Metadata( self._preprocess_dir, bucket=self._preprocess_bucket) self._metadata.load() + + diff = time.time() - prevTime + prevTime = time.time() + print("meta data done", diff) if timespan_start is None: timespan_start = self._metadata['start'] @@ -100,6 +108,10 @@ def fetch(self, strategy, number_records, timespan_start, timespan_end): target_level = self._metadata['levels'][self._metadata['levels'] ['names'][target_level_index]] + diff = time.time() - prevTime + prevTime = time.time() + print("target level located",diff) + level_metadata = Metadata( self._preprocess_dir, strategy, utils.get_level_name( target_level_index), bucket=self._preprocess_bucket) @@ -115,15 +127,62 @@ def fetch(self, strategy, number_records, timespan_start, timespan_end): self._preprocess_dir, utils.get_level_name(target_level_index), single_slice, strategy) for single_slice in target_slices_names] + + diff = time.time() - prevTime + prevTime = time.time() + print("all slice found", diff) + + target_slice_paths_min = [utils.get_slice_path( + self._preprocess_dir, + utils.get_level_name(target_level_index), + single_slice, 'min') for single_slice in target_slices_names] + + target_slice_paths_max = [utils.get_slice_path( + self._preprocess_dir, + utils.get_level_name(target_level_index), + single_slice, 'max') for single_slice in target_slices_names] + + diff = time.time() - prevTime + prevTime = time.time() + print("min max slice found", diff) # Reads records and downsamples. target_slices = LevelSlices( target_slice_paths, self._preprocess_bucket) + + + target_slices.read(timespan_start, timespan_end) - number_target_records = target_slices.get_records_count() + diff = time.time() - prevTime + prevTime = time.time() + print("main file read", diff) + + target_slices_min = LevelSlices( + target_slice_paths_min, self._preprocess_bucket) + + target_slices_max = LevelSlices( + target_slice_paths_max, self._preprocess_bucket) + target_slices_min.read(timespan_start, timespan_end) + target_slices_max.read(timespan_start, timespan_end) + + diff = time.time() - prevTime + prevTime = time.time() + print("min max file read", diff) + + minList = target_slices_min.get_min() + maxList = target_slices_max.get_max() + + diff = time.time() - prevTime + prevTime = time.time() + print("min max get", diff) + number_target_records = target_slices.get_records_count() target_slices.downsample(strategy, max_records=number_records) - downsampled_data = target_slices.format_response() + downsampled_data = target_slices.format_response(minList, maxList) + + diff = time.time() - prevTime + prevTime = time.time() + print("dowmsample finished", diff) number_result_records = target_slices.get_records_count() if number_target_records == 0: @@ -146,6 +205,8 @@ def _binary_search(self, data_list, value, reverse=False): Returns: An int of index for the result. """ + print(data_list) + if not data_list: return -1 diff --git a/backend/level_slices_reader.py b/backend/level_slices_reader.py index d9ff684..3f269f4 100644 --- a/backend/level_slices_reader.py +++ b/backend/level_slices_reader.py @@ -27,6 +27,8 @@ def __init__(self, filenames, bucket=None): self._filenames = filenames self._bucket = bucket self._records = defaultdict(list) + self._minList = defaultdict(float) + self._maxList = defaultdict(float) def read(self, start, end): """Reads and loads records from a set of slices, only records in the range @@ -75,7 +77,7 @@ def downsample(self, strategy, downsample_factor=1, max_records=None): self._records[channel], strategy, downsample_factor) return self._records - def format_response(self): + def format_response(self, minList: defaultdict(float), maxList: defaultdict(float)): """Gets current data in dict type for http response. Returns: @@ -85,6 +87,30 @@ def format_response(self): for channel in self._records.keys(): response.append({ 'name': channel, - 'data': [[record[0], record[1]] for record in self._records[channel]] + 'data': [[record[0], record[1]] for record in self._records[channel]], + 'min': minList[channel], + 'max': maxList[channel], }) return response + + def get_min(self): + if self._records is not None: + for channel in self._records.keys(): + channelData = self._records[channel] + min = channelData[0][1] + for data in channelData: + if data[1] < min: + min = data[1] + self._minList[channel] = min + return self._minList + + def get_max(self): + if self._records is not None: + for channel in self._records.keys(): + channelData = self._records[channel] + max = channelData[0][1] + for data in channelData: + if data[1] > max: + max = data[1] + self._minList[channel] = max + return self._minList \ No newline at end of file diff --git a/backend/multiple_level_preprocess.py b/backend/multiple_level_preprocess.py index 0b23432..51703a4 100644 --- a/backend/multiple_level_preprocess.py +++ b/backend/multiple_level_preprocess.py @@ -151,18 +151,22 @@ def _raw_preprocess(self, number_per_slice): level_slice = LevelSlice( slice_name, bucket=self._preprocess_bucket) raw_slice = raw_data.read_next_slice() - print(raw_slice) - if isinstance(raw_slice, str): - return raw_slice - level_slice.save(raw_slice) - raw_start_times.append(raw_slice[0][0]) - - slice_index += 1 - record_count += len(raw_slice) - if timespan_start == -1: - timespan_start = raw_slice[0][0] - timespan_end = raw_slice[-1][0] + if len(raw_slice) > 0 and len(raw_slice[0]) > 0: + print(raw_slice) + if isinstance(raw_slice, str): + return raw_slice + level_slice.save(raw_slice) + raw_start_times.append(raw_slice[0][0]) + slice_index += 1 + record_count += len(raw_slice) + if timespan_start == -1: + timespan_start = raw_slice[0][0] + timespan_end = raw_slice[-1][0] + else: + print('Invalid Slice: ', raw_slice) + slice_index += 1 + record_count += len(raw_slice) self._metadata['raw_number'] = record_count self._metadata['start'] = timespan_start self._metadata['end'] = timespan_end diff --git a/frontend/dist/frontend/index.html b/frontend/dist/frontend/index.html index 48806a7..8fa611d 100644 --- a/frontend/dist/frontend/index.html +++ b/frontend/dist/frontend/index.html @@ -31,8 +31,8 @@ rel="stylesheet" /> - +