Skip to content

Commit

Permalink
Merge pull request #80 from googleinterns/main
Browse files Browse the repository at this point in the history
update most recent version
  • Loading branch information
Longzhao-Google authored Nov 21, 2023
2 parents 4b4a46d + a804cbc commit c02eefc
Show file tree
Hide file tree
Showing 40 changed files with 4,269 additions and 3,293 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"level1/s0.csv": 1607310026831041}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"level0/s0.csv": 1607310020479006.0, "level0/s1.csv": 1607310936975548.0}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"level1/s0.csv": 1607310021218054.0}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"raw_file": "ClockworkMapTests-testMapWear__2020-12-07T03:00:20.471Z.csv", "levels": {"names": ["level0", "level1"], "level0": {"names": ["level0/s0.csv", "level0/s1.csv"], "frequency": 0.00010911451500060223, "number": 195335}, "level1": {"names": ["level1/s0.csv"], "frequency": 1.0909496393179725e-06, "number": 1953}}, "raw_number": 195335, "start": 1607310020479006.0, "end": 1607311810662466.0}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"level1/s0.csv": 1607310021084773.0}
2 changes: 1 addition & 1 deletion backend/app.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ runtime: python37
env: standard

manual_scaling:
instances: 1
instances: 10

# Set App Engine instance class (defaults to F1)
# See https://cloud.google.com/appengine/docs/standard/#instance_classes
Expand Down
2 changes: 1 addition & 1 deletion backend/cron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ cron:
- description: "Scan new files to downsample"
url: /downsample
target: api
schedule: every 10 mins
schedule: every 5 mins
67 changes: 64 additions & 3 deletions backend/data_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"""A module for fetching from multiple-level preprocessing."""

import utils

import time
from level_slices_reader import LevelSlices
from metadata import Metadata

Expand Down Expand Up @@ -77,9 +77,17 @@ def fetch(self, strategy, number_records, timespan_start, timespan_end):
}
]
"""

prevTime = time.time()
print("fetch data starts", prevTime)

self._metadata = Metadata(
self._preprocess_dir, bucket=self._preprocess_bucket)
self._metadata.load()

diff = time.time() - prevTime
prevTime = time.time()
print("meta data done", diff)

if timespan_start is None:
timespan_start = self._metadata['start']
Expand All @@ -100,6 +108,10 @@ def fetch(self, strategy, number_records, timespan_start, timespan_end):
target_level = self._metadata['levels'][self._metadata['levels']
['names'][target_level_index]]

diff = time.time() - prevTime
prevTime = time.time()
print("target level located",diff)

level_metadata = Metadata(
self._preprocess_dir, strategy, utils.get_level_name(
target_level_index), bucket=self._preprocess_bucket)
Expand All @@ -115,15 +127,62 @@ def fetch(self, strategy, number_records, timespan_start, timespan_end):
self._preprocess_dir,
utils.get_level_name(target_level_index),
single_slice, strategy) for single_slice in target_slices_names]

diff = time.time() - prevTime
prevTime = time.time()
print("all slice found", diff)

target_slice_paths_min = [utils.get_slice_path(
self._preprocess_dir,
utils.get_level_name(target_level_index),
single_slice, 'min') for single_slice in target_slices_names]

target_slice_paths_max = [utils.get_slice_path(
self._preprocess_dir,
utils.get_level_name(target_level_index),
single_slice, 'max') for single_slice in target_slices_names]

diff = time.time() - prevTime
prevTime = time.time()
print("min max slice found", diff)

# Reads records and downsamples.
target_slices = LevelSlices(
target_slice_paths, self._preprocess_bucket)



target_slices.read(timespan_start, timespan_end)
number_target_records = target_slices.get_records_count()

diff = time.time() - prevTime
prevTime = time.time()
print("main file read", diff)

target_slices_min = LevelSlices(
target_slice_paths_min, self._preprocess_bucket)

target_slices_max = LevelSlices(
target_slice_paths_max, self._preprocess_bucket)
target_slices_min.read(timespan_start, timespan_end)
target_slices_max.read(timespan_start, timespan_end)

diff = time.time() - prevTime
prevTime = time.time()
print("min max file read", diff)

minList = target_slices_min.get_min()
maxList = target_slices_max.get_max()

diff = time.time() - prevTime
prevTime = time.time()
print("min max get", diff)
number_target_records = target_slices.get_records_count()
target_slices.downsample(strategy, max_records=number_records)
downsampled_data = target_slices.format_response()
downsampled_data = target_slices.format_response(minList, maxList)

diff = time.time() - prevTime
prevTime = time.time()
print("dowmsample finished", diff)
number_result_records = target_slices.get_records_count()

if number_target_records == 0:
Expand All @@ -146,6 +205,8 @@ def _binary_search(self, data_list, value, reverse=False):
Returns:
An int of index for the result.
"""
print(data_list)

if not data_list:
return -1

Expand Down
30 changes: 28 additions & 2 deletions backend/level_slices_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def __init__(self, filenames, bucket=None):
self._filenames = filenames
self._bucket = bucket
self._records = defaultdict(list)
self._minList = defaultdict(float)
self._maxList = defaultdict(float)

def read(self, start, end):
"""Reads and loads records from a set of slices, only records in the range
Expand Down Expand Up @@ -75,7 +77,7 @@ def downsample(self, strategy, downsample_factor=1, max_records=None):
self._records[channel], strategy, downsample_factor)
return self._records

def format_response(self):
def format_response(self, minList: defaultdict(float), maxList: defaultdict(float)):
"""Gets current data in dict type for http response.
Returns:
Expand All @@ -85,6 +87,30 @@ def format_response(self):
for channel in self._records.keys():
response.append({
'name': channel,
'data': [[record[0], record[1]] for record in self._records[channel]]
'data': [[record[0], record[1]] for record in self._records[channel]],
'min': minList[channel],
'max': maxList[channel],
})
return response

def get_min(self):
if self._records is not None:
for channel in self._records.keys():
channelData = self._records[channel]
min = channelData[0][1]
for data in channelData:
if data[1] < min:
min = data[1]
self._minList[channel] = min
return self._minList

def get_max(self):
if self._records is not None:
for channel in self._records.keys():
channelData = self._records[channel]
max = channelData[0][1]
for data in channelData:
if data[1] > max:
max = data[1]
self._minList[channel] = max
return self._minList
26 changes: 15 additions & 11 deletions backend/multiple_level_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,18 +151,22 @@ def _raw_preprocess(self, number_per_slice):
level_slice = LevelSlice(
slice_name, bucket=self._preprocess_bucket)
raw_slice = raw_data.read_next_slice()
print(raw_slice)
if isinstance(raw_slice, str):
return raw_slice
level_slice.save(raw_slice)
raw_start_times.append(raw_slice[0][0])

slice_index += 1
record_count += len(raw_slice)
if timespan_start == -1:
timespan_start = raw_slice[0][0]
timespan_end = raw_slice[-1][0]
if len(raw_slice) > 0 and len(raw_slice[0]) > 0:
print(raw_slice)
if isinstance(raw_slice, str):
return raw_slice
level_slice.save(raw_slice)
raw_start_times.append(raw_slice[0][0])

slice_index += 1
record_count += len(raw_slice)
if timespan_start == -1:
timespan_start = raw_slice[0][0]
timespan_end = raw_slice[-1][0]
else:
print('Invalid Slice: ', raw_slice)
slice_index += 1
record_count += len(raw_slice)
self._metadata['raw_number'] = record_count
self._metadata['start'] = timespan_start
self._metadata['end'] = timespan_end
Expand Down
4 changes: 2 additions & 2 deletions frontend/dist/frontend/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
rel="stylesheet"
/>
<script src="https://api-dot-tank-big-data-plotting-285623.googleplex.com/empty.js"></script>
<link rel="stylesheet" href="styles.9cc795de30faa8473576.css"></head>
<link rel="stylesheet" href="styles.f97ec82a51482a781519.css"></head>
<body class="mat-typography">
<app-root></app-root>
<script src="runtime-es2015.0dae8cbc97194c7caed4.js" type="module"></script><script src="runtime-es5.0dae8cbc97194c7caed4.js" nomodule defer></script><script src="polyfills-es5.69c85fcea851aa3b77d9.js" nomodule defer></script><script src="polyfills-es2015.a9dba46e05c1741973db.js" type="module"></script><script src="main-es2015.4e97e65bb9fba75be658.js" type="module"></script><script src="main-es5.4e97e65bb9fba75be658.js" nomodule defer></script></body>
<script src="runtime-es2015.17457c14264390561f33.js" type="module"></script><script src="runtime-es5.17457c14264390561f33.js" nomodule defer></script><script src="polyfills-es5.5e8623bcfdf20c840d43.js" nomodule defer></script><script src="polyfills-es2015.46efc8b3dd2d70094f0e.js" type="module"></script><script src="main-es2015.2253e31ede5e2e5ce4db.js" type="module"></script><script src="main-es5.2253e31ede5e2e5ce4db.js" nomodule defer></script></body>
</html>
1 change: 1 addition & 0 deletions frontend/dist/frontend/main-es2015.2253e31ede5e2e5ce4db.js

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion frontend/dist/frontend/main-es2015.4e97e65bb9fba75be658.js

This file was deleted.

1 change: 1 addition & 0 deletions frontend/dist/frontend/main-es5.2253e31ede5e2e5ce4db.js

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion frontend/dist/frontend/main-es5.4e97e65bb9fba75be658.js

This file was deleted.

1 change: 0 additions & 1 deletion frontend/dist/frontend/styles.9cc795de30faa8473576.css

This file was deleted.

1 change: 1 addition & 0 deletions frontend/dist/frontend/styles.f97ec82a51482a781519.css

Large diffs are not rendered by default.

Loading

0 comments on commit c02eefc

Please sign in to comment.