Skip to content

Commit

Permalink
Merge pull request #45 from TutteInstitute/chunk_data_files
Browse files Browse the repository at this point in the history
Chunk data files and increase size on selected points
  • Loading branch information
lmcinnes authored Oct 16, 2024
2 parents 954bba1 + 8f5295c commit 9770d85
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 23 deletions.
103 changes: 87 additions & 16 deletions datamapplot/deckgl_template.html
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,39 @@
return result;
}

function mergeTypedArrays(arrays) {
let totalLength = arrays.reduce((acc, arr) => acc + arr.length, 0);
let result = new arrays[0].constructor(totalLength);
let currentLength = 0;
for (let arr of arrays) {
result.set(arr, currentLength);
currentLength += arr.length;
}
return result;
}

{% if not inline_data -%}
function combineTypedTableChunks(tableChunks) {
tableChunks.sort((a, b) => a.chunkIndex - b.chunkIndex);
const combinedTable = {};
Object.keys(tableChunks[0].chunkData).forEach((key) => {
const arrays = tableChunks.map((chunk) => chunk.chunkData[key]);
combinedTable[key] = mergeTypedArrays(arrays);
});
return combinedTable;
}

function combineTableChunks(tableChunks) {
tableChunks.sort((a, b) => a.chunkIndex - b.chunkIndex);
const combinedTable = {};
Object.keys(tableChunks[0].chunkData).forEach((key) => {
const arrays = tableChunks.map((chunk) => chunk.chunkData[key]);
combinedTable[key] = arrays.flat();
});
return combinedTable;
}
{% endif -%}

if (!("CompressionStream" in window)) {
throw new Error(
"Your browser doesn't support the Compression Streams API " +
Expand Down Expand Up @@ -294,12 +327,20 @@
const directoryPath = currentURL.substring(0, currentURL.lastIndexOf('/') + 1);
const originURL = self.location.origin + directoryPath;

const pointDataEncoded = `${originURL}/{{file_prefix}}_point_data.zip`;
const hoverDataEncoded = `${originURL}/{{file_prefix}}_meta_data.zip`;
const labelDataEncoded = `${originURL}/{{file_prefix}}_label_data.zip`;
const pointDataEncoded = [
{% for chunk_index in range(n_data_chunks) -%}
`${originURL}/{{file_prefix}}_point_data_{{chunk_index}}.zip`,
{% endfor -%}
];
const hoverDataEncoded = [
{% for chunk_index in range(n_data_chunks) -%}
`${originURL}/{{file_prefix}}_meta_data_{{chunk_index}}.zip`,
{% endfor -%}
];
const labelDataEncoded = [`${originURL}/{{file_prefix}}_label_data.zip`];
{% if enable_histogram %}
const histogramBinDataEncoded = `${originURL}/{{file_prefix}}_histogram_bin_data.zip`;
const histogramIndexDataEncoded = `${originURL}/{{file_prefix}}_histogram_index_data.zip`;
const histogramBinDataEncoded = [`${originURL}/{{file_prefix}}_histogram_bin_data.zip`];
const histogramIndexDataEncoded = [`${originURL}/{{file_prefix}}_histogram_index_data.zip`];
{% endif %}

// Blob for the parsing worker
Expand Down Expand Up @@ -342,14 +383,20 @@
throw error;
}
}
const binaryData = await decompressFile(encodedData);
if (JSONParse) {
const parsedData = JSON.parse(new TextDecoder("utf-8").decode(binaryData));
self.postMessage({ type: "data", data: parsedData });
} else {
// Send the parsed table back to the main thread
self.postMessage({ type: "data", data: binaryData });
}
let processedCount = 0;
const decodedData = encodedData.map(async (file, i) => {
const binaryData = await decompressFile(file);
processedCount += 1;
self.postMessage({ type: "progress", progress: Math.round(((processedCount) / encodedData.length) * 95) });
if (JSONParse) {
const parsedData = JSON.parse(new TextDecoder("utf-8").decode(binaryData));
return { chunkIndex: i, chunkData: parsedData };
} else {
return { chunkIndex: i, chunkData: binaryData };
}
});
self.postMessage({ type: "data", data: await Promise.all(decodedData) });
}
`], { type: 'application/javascript' });
const workerUrl = URL.createObjectURL(parsingWorkerBlob);
Expand Down Expand Up @@ -402,7 +449,14 @@
updateProgressBar('point-data-progress', event.data.progress);
} else {
const { data } = event.data;
{% if inline_data %}
const pointData = await simpleArrowParser(data);
{% else %}
const chunkArray = data.map(async ({ chunkIndex, chunkData }) => {
return {chunkIndex: chunkIndex, chunkData: await simpleArrowParser(chunkData)};
});
const pointData = await Promise.all(chunkArray).then(combineTypedTableChunks);
{% endif %}
datamap.addPoints(
pointData,
{
Expand All @@ -417,16 +471,17 @@
}
);

document.getElementById("loading").style.display = "none";
updateProgressBar('point-data-progress', 100);
checkAllDataLoaded();

{%- if enable_lasso_selection %}
/* Lasso Selection */
const lassoSelector = new LassoSelectionTool(
datamap,
lassoSelectionCallback
);
{% endif -%}
document.getElementById("loading").style.display = "none";
updateProgressBar('point-data-progress', 100);
checkAllDataLoaded();
}
};
}
Expand All @@ -439,7 +494,11 @@
updateProgressBar('label-data-progress', event.data.progress);
} else {
const { data } = event.data;
{% if inline_data -%}
const labelData = data;
{% else -%}
const labelData = data[0].chunkData;
{% endif -%}
datamap.addLabels(labelData, {
labelTextColor: {{label_text_color}},
textMinPixelSize: {{text_min_pixel_size}},
Expand Down Expand Up @@ -473,7 +532,11 @@
updateProgressBar('meta-data-progress', event.data.progress);
} else {
const { data } = event.data;
{% if inline_data -%}
const hoverData = data;
{% else -%}
const hoverData = combineTableChunks(data);
{% endif -%}
datamap.addMetaData(hoverData, {
tooltipFunction: {{get_tooltip}},
onClickFunction: {{on_click if on_click else "null"}},
Expand Down Expand Up @@ -508,7 +571,11 @@
updateProgressBar('histogram-bin-data-progress', event.data.progress);
} else {
const { data } = event.data;
{% if inline_data -%}
const histogramBinData = data;
{% else -%}
const histogramBinData = data[0].chunkData;
{% endif -%}
resolve(histogramBinData);
updateProgressBar('histogram-bin-data-progress', 100);
checkAllDataLoaded();
Expand All @@ -526,7 +593,11 @@
updateProgressBar('histogram-index-data-progress', event.data.progress);
} else {
const { data } = event.data;
{% if inline_data -%}
const histogramIndexData = simpleArrowParser(data);
{% else -%}
const histogramIndexData = simpleArrowParser(data[0].chunkData);
{% endif -%}
resolve(histogramIndexData);
updateProgressBar('histogram-index-data-progress', 100);
checkAllDataLoaded();
Expand Down
16 changes: 11 additions & 5 deletions datamapplot/interactive_rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def save_bundle(self, filename):
"""Save an interactive figure to a zip file with name `filename`"""
with zipfile.ZipFile(filename, "w") as zf:
zf.writestr("index.html", self._html_str)
for filename in re.findall(r"/(.*?_data\.zip)", self._html_str):
for filename in re.findall(r"/(.*?_data(?:_\d)?\.zip)", self._html_str):
print(f"Adding {filename} to bundle")
zf.write(filename)

Expand Down Expand Up @@ -888,6 +888,7 @@ def render_html(
base64_histogram_bin_data = None
base64_histogram_index_data = None
file_prefix = None
n_chunks = 0
else:
base64_point_data = ""
base64_hover_data = ""
Expand All @@ -897,10 +898,14 @@ def render_html(
file_prefix = (
offline_data_prefix if offline_data_prefix is not None else "datamapplot"
)
with gzip.open(f"{file_prefix}_point_data.zip", "wb") as f:
point_data.to_feather(f, compression="uncompressed")
with gzip.open(f"{file_prefix}_meta_data.zip", "wb") as f:
f.write(json.dumps(hover_data.to_dict(orient="list")).encode())
n_chunks = (point_data.shape[0] // 500000) + 1
for i in range(n_chunks):
chunk_start = i * 500000
chunk_end = min((i + 1) * 500000, point_data.shape[0])
with gzip.open(f"{file_prefix}_point_data_{i}.zip", "wb") as f:
point_data[chunk_start:chunk_end].to_feather(f, compression="uncompressed")
with gzip.open(f"{file_prefix}_meta_data_{i}.zip", "wb") as f:
f.write(json.dumps(hover_data[chunk_start:chunk_end].to_dict(orient="list")).encode())
label_data_json = label_dataframe.to_json(path_or_buf=None, orient="records")
with gzip.open(f"{file_prefix}_label_data.zip", "wb") as f:
f.write(bytes(label_data_json, "utf-8"))
Expand Down Expand Up @@ -1036,6 +1041,7 @@ def render_html(
cluster_boundary_polygons="polygon" in label_dataframe.columns,
cluster_boundary_line_width=cluster_boundary_line_width,
data_bounds=bounds,
n_data_chunks=n_chunks,
on_click=on_click,
enable_lasso_selection=enable_lasso_selection,
get_tooltip=get_tooltip,
Expand Down
8 changes: 6 additions & 2 deletions datamapplot/static/js/datamap.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ function getInitialViewportSize() {
return { viewportWidth: width, viewportHeight: height };
}

function calculateZoomLevel(bounds, viewportWidth, viewportHeight, padding = 0) {
function calculateZoomLevel(bounds, viewportWidth, viewportHeight, padding = 0.5) {
// Calculate the range of the bounds
const lngRange = bounds[1] - bounds[0];
const latRange = bounds[3] - bounds[2];
Expand Down Expand Up @@ -342,6 +342,8 @@ class DataMap {
// Increment update trigger
this.updateTriggerCounter++;

const sizeAdjust = 1/(1 + (Math.sqrt(selectedIndices.size) / Math.log2(this.selected.length)));

const updatedPointLayer = this.pointLayer.clone({
data: {
...this.pointLayer.props.data,
Expand All @@ -350,8 +352,10 @@ class DataMap {
getFilterValue: { value: this.selected, size: 1 }
}
},
radiusMinPixels: hasSelectedIndices ? 2 * (this.pointRadiusMinPixels + sizeAdjust) : this.pointRadiusMinPixels,
updateTriggers: {
getFilterValue: this.updateTriggerCounter
getFilterValue: this.updateTriggerCounter,
radiusMinPixels: this.updateTriggerCounter,
}
});

Expand Down

0 comments on commit 9770d85

Please sign in to comment.