From 5f56c8325deda2622ca3a7aeb1af550d8b650e38 Mon Sep 17 00:00:00 2001 From: ZabuzaW Date: Tue, 4 Aug 2020 00:21:49 +0200 Subject: [PATCH] Added ChunkMetadata, added LocalChunkCache example --- .../fastcdc4j/examples/LocalChunkCache.java | 49 +++++++++++++ .../fastcdc4j/examples/PatchSummary.java | 42 +++--------- .../fastcdc4j/external/chunking/Chunk.java | 6 ++ .../external/chunking/ChunkMetadata.java | 40 +++++++++++ .../chunking/SimpleChunkMetadata.java | 68 +++++++++++++++++++ 5 files changed, 172 insertions(+), 33 deletions(-) create mode 100644 de.zabuza.fastcdc4j.examples/src/de/zabuza/fastcdc4j/examples/LocalChunkCache.java create mode 100644 de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/external/chunking/ChunkMetadata.java create mode 100644 de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/internal/chunking/SimpleChunkMetadata.java diff --git a/de.zabuza.fastcdc4j.examples/src/de/zabuza/fastcdc4j/examples/LocalChunkCache.java b/de.zabuza.fastcdc4j.examples/src/de/zabuza/fastcdc4j/examples/LocalChunkCache.java new file mode 100644 index 0000000..ab36cdf --- /dev/null +++ b/de.zabuza.fastcdc4j.examples/src/de/zabuza/fastcdc4j/examples/LocalChunkCache.java @@ -0,0 +1,49 @@ +package de.zabuza.fastcdc4j.examples; + +import de.zabuza.fastcdc4j.external.chunking.Chunk; +import de.zabuza.fastcdc4j.external.chunking.ChunkerBuilder; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +/** + * Class offering a {@link #main(String[])} method that chunks a given build and populates a local chunk cache. + * + * @author Daniel Tischner {@literal } + */ +@SuppressWarnings({ "UseOfSystemOutOrSystemErr", "ClassIndependentOfModule", "ClassOnlyUsedInOneModule" }) +enum LocalChunkCache { + ; + + /** + * Starts the application. + * + * @param args Two arguments, the path to the build and the path to the local chunk cache + */ + public static void main(final String[] args) throws IOException { + if (args.length != 2) { + throw new IllegalArgumentException( + "Expected two arguments buildPath and cachePath, where buildPath denotes the path to the build and cachePath the path to the local chunk cache."); + } + final Path buildPath = Path.of(args[0]); + final Path cachePath = Path.of(args[1]); + + int cachedChunks = 0; + int uncachedChunks = 0; + + final var chunker = new ChunkerBuilder().build(); + final var chunks = chunker.chunk(buildPath); + for (final Chunk chunk : chunks) { + final Path chunkPath = cachePath.resolve(chunk.getHexHash()); + if (Files.exists(chunkPath)) { + cachedChunks++; + } else { + Files.write(chunkPath, chunk.getData()); + uncachedChunks++; + } + } + + System.out.printf("%d cached chunks, %d uncached chunks%n", cachedChunks, uncachedChunks); + } +} diff --git a/de.zabuza.fastcdc4j.examples/src/de/zabuza/fastcdc4j/examples/PatchSummary.java b/de.zabuza.fastcdc4j.examples/src/de/zabuza/fastcdc4j/examples/PatchSummary.java index 3497207..f0d6584 100644 --- a/de.zabuza.fastcdc4j.examples/src/de/zabuza/fastcdc4j/examples/PatchSummary.java +++ b/de.zabuza.fastcdc4j.examples/src/de/zabuza/fastcdc4j/examples/PatchSummary.java @@ -49,12 +49,12 @@ private static void executePatchSummary(final String description, final Chunker final Path currentBuild) { final List previousChunks = new ArrayList<>(); chunker.chunk(previousBuild) - .forEach(chunk -> previousChunks.add(new ChunkMetadata(chunk))); + .forEach(chunk -> previousChunks.add(chunk.toChunkMetadata())); final BuildSummary previousBuildSummary = new BuildSummary(previousChunks); final List currentChunks = new ArrayList<>(); chunker.chunk(currentBuild) - .forEach(chunk -> currentChunks.add(new ChunkMetadata(chunk))); + .forEach(chunk -> currentChunks.add(chunk.toChunkMetadata())); final BuildSummary currentBuildSummary = new BuildSummary(currentChunks); final PatchSummary summary = new PatchSummary(previousBuildSummary, currentBuildSummary); @@ -128,14 +128,14 @@ private void computePatch() { // Chunks to move currentBuildSummary.getChunks() .filter(previousBuildSummary::containsChunk) - .filter(currentChunk -> previousBuildSummary.getChunk(currentChunk.hexHash).offset - != currentChunk.offset) + .filter(currentChunk -> previousBuildSummary.getChunk(currentChunk.getHexHash()) + .getOffset() != currentChunk.getOffset()) .forEach(chunksToMove::add); // Untouched chunks currentBuildSummary.getChunks() .filter(previousBuildSummary::containsChunk) - .filter(currentChunk -> previousBuildSummary.getChunk(currentChunk.hexHash).offset - == currentChunk.offset) + .filter(currentChunk -> previousBuildSummary.getChunk(currentChunk.getHexHash()) + .getOffset() == currentChunk.getOffset()) .forEach(untouchedChunks::add); patchSize = chunksToAdd.stream() @@ -150,15 +150,15 @@ private static final class BuildSummary { private long totalUniqueSize; private int uniqueChunksCount; - private BuildSummary(final Iterable chunks) { + private BuildSummary(final Iterable chunks) { chunks.forEach(chunk -> { totalChunksCount++; totalSize += chunk.getLength(); - if (hashToChunk.containsKey(chunk.hexHash)) { + if (hashToChunk.containsKey(chunk.getHexHash())) { return; } - hashToChunk.put(chunk.hexHash, chunk); + hashToChunk.put(chunk.getHexHash(), chunk); uniqueChunksCount++; totalUniqueSize += chunk.getLength(); }); @@ -202,28 +202,4 @@ int getUniqueChunksCount() { return uniqueChunksCount; } } - - private static final class ChunkMetadata { - private final String hexHash; - private final int length; - private final long offset; - - private ChunkMetadata(final Chunk chunk) { - hexHash = chunk.getHexHash(); - offset = chunk.getOffset(); - length = chunk.getLength(); - } - - public long getOffset() { - return offset; - } - - String getHexHash() { - return hexHash; - } - - int getLength() { - return length; - } - } } diff --git a/de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/external/chunking/Chunk.java b/de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/external/chunking/Chunk.java index ad64635..01d89dc 100644 --- a/de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/external/chunking/Chunk.java +++ b/de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/external/chunking/Chunk.java @@ -1,5 +1,7 @@ package de.zabuza.fastcdc4j.external.chunking; +import de.zabuza.fastcdc4j.internal.chunking.SimpleChunkMetadata; + /** * Interface representing chunked data as created by a {@link Chunker}. *

@@ -45,4 +47,8 @@ public interface Chunk { * @return A hexadecimal hash representation */ String getHexHash(); + + default ChunkMetadata toChunkMetadata() { + return new SimpleChunkMetadata(getOffset(), getLength(), getHash(), getHexHash()); + } } diff --git a/de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/external/chunking/ChunkMetadata.java b/de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/external/chunking/ChunkMetadata.java new file mode 100644 index 0000000..bc15336 --- /dev/null +++ b/de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/external/chunking/ChunkMetadata.java @@ -0,0 +1,40 @@ +package de.zabuza.fastcdc4j.external.chunking; + +/** + * Interface representing metadata of a chunk as created by a {@link Chunker}. + *

+ * Unlike a {@link Chunk}, metadata does not own their data. + * + * @author Daniel Tischner {@literal } + */ +public interface ChunkMetadata { + /** + * Gets the offset of this chunk, with respect to its source data stream. + * + * @return The offset + */ + long getOffset(); + + /** + * The length of this chunk, i.e. the amount of contained data. + * + * @return Gets the length + */ + int getLength(); + + /** + * A binary hash representation of the contained data. Using the algorithm specified during construction by the + * {@link Chunker}. + * + * @return A binary hash representation + */ + byte[] getHash(); + + /** + * A hexadecimal hash representation of the contained data. Using the algorithm specified during construction by the + * {@link Chunker}. + * + * @return A hexadecimal hash representation + */ + String getHexHash(); +} diff --git a/de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/internal/chunking/SimpleChunkMetadata.java b/de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/internal/chunking/SimpleChunkMetadata.java new file mode 100644 index 0000000..95f2b1f --- /dev/null +++ b/de.zabuza.fastcdc4j/src/de/zabuza/fastcdc4j/internal/chunking/SimpleChunkMetadata.java @@ -0,0 +1,68 @@ +package de.zabuza.fastcdc4j.internal.chunking; + +import de.zabuza.fastcdc4j.external.chunking.ChunkMetadata; + +/** + * Implementation of a simple chunk metadata, wrapping given data. + * + * @author Daniel Tischner {@literal } + */ +public final class SimpleChunkMetadata implements ChunkMetadata { + /** + * The offset of this chunk, with respect to its source data stream. + */ + private final long offset; + /** + * The length of this chunk, i.e. the amount of contained data. + */ + private final int length; + /** + * A binary hash representation of the contained data. Using the algorithm specified during construction by the + * {@link de.zabuza.fastcdc4j.external.chunking.Chunker}. + */ + private final byte[] hash; + /** + * A hexadecimal hash representation of the contained data. Using the algorithm specified during construction by the + * {@link de.zabuza.fastcdc4j.external.chunking.Chunker}. + */ + private final String hexHash; + + /** + * Creates a new simple chunk. + * + * @param offset The offset of this chunk, with respect to its source data stream + * @param length The length of this chunk, i.e. the amount of contained data + * @param hash A binary hash representation of the contained data. Using the algorithm specified during + * construction by the {@link de.zabuza.fastcdc4j.external.chunking.Chunker}. + * @param hexHash A hexadecimal hash representation of the contained data. Using the algorithm specified during + * construction by the {@link de.zabuza.fastcdc4j.external.chunking.Chunker}. + */ + public SimpleChunkMetadata(final long offset, final int length, final byte[] hash, final String hexHash) { + this.offset = offset; + this.length = length; + //noinspection AssignmentOrReturnOfFieldWithMutableType + this.hash = hash; + this.hexHash = hexHash; + } + + @Override + public long getOffset() { + return offset; + } + + @Override + public int getLength() { + return length; + } + + @Override + public byte[] getHash() { + //noinspection AssignmentOrReturnOfFieldWithMutableType + return hash; + } + + @Override + public String getHexHash() { + return hexHash; + } +}