From e682123da22319346f41cc0c883e8696334b2f40 Mon Sep 17 00:00:00 2001 From: Sebastian Berlin Date: Thu, 4 Apr 2024 08:37:20 +0200 Subject: [PATCH] Upload files in chunks Adds function for uploading a file in chunks as described on https://www.mediawiki.org/wiki/API:Upload#Example_3:_Upload_file_in_chunks. The chunks are created as temporary files and are removed once uploaded. --- .../wikibase/editing/MediaFileUtils.java | 115 ++++++++++++++++++ .../wikibase/editing/MediaFileUtilsTest.java | 94 ++++++++++++++ 2 files changed, 209 insertions(+) diff --git a/extensions/wikibase/src/org/openrefine/wikibase/editing/MediaFileUtils.java b/extensions/wikibase/src/org/openrefine/wikibase/editing/MediaFileUtils.java index 2c132b83dd9d..17a6d2456018 100644 --- a/extensions/wikibase/src/org/openrefine/wikibase/editing/MediaFileUtils.java +++ b/extensions/wikibase/src/org/openrefine/wikibase/editing/MediaFileUtils.java @@ -2,8 +2,12 @@ package org.openrefine.wikibase.editing; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.IOException; import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -102,6 +106,62 @@ public MediaUploadResponse uploadLocalFile(File path, String fileName, String wi return uploadFile(parameters, files); } + /** + * Upload a local file to the MediaWiki instance in chunks. + * + * @param path + * ChunkedFile of the local file + * @param fileName + * its filename once stored on the wiki + * @param wikitext + * the accompanying wikitext for the file + * @param summary + * the edit summary associated with the upload + * @param tags + * tags to apply to the edit + * @return + * @throws IOException + * @throws MediaWikiApiErrorException + */ + protected MediaUploadResponse uploadLocalFileChunked(ChunkedFile path, String fileName, String wikitext, String summary, + List tags) + throws IOException, MediaWikiApiErrorException { + MediaUploadResponse response = null; + int i = 1; + for (File chunk = path.readChunk(); chunk != null; chunk = path.readChunk()) { + Map parameters = new HashMap<>(); + parameters.put("action", "upload"); + parameters.put("token", getCsrfToken()); + parameters.put("stash", "1"); + parameters.put("filename", fileName); + parameters.put("filesize", String.valueOf(path.getLength())); + if (response == null) { + // In the first request we don't have offset or file key. + parameters.put("offset", "0"); + } else { + parameters.put("offset", String.valueOf(response.offset)); + parameters.put("filekey", response.filekey); + } + Map> files = new HashMap<>(); + String chunkName = "chunk-" + i + ".png"; + files.put("chunk", new ImmutablePair(chunkName, chunk)); + response = uploadFile(parameters, files); + chunk.delete(); + i++; + } + + Map parameters = new HashMap<>(); + parameters.put("action", "upload"); + parameters.put("token", getCsrfToken()); + parameters.put("filename", fileName); + parameters.put("filekey", response.filekey); + parameters.put("tags", String.join("|", tags)); + parameters.put("comment", summary); + parameters.put("text", wikitext); + + return uploadFile(parameters, null); + } + /** * Upload a file that the MediaWiki server fetches directly from the supplied URL. The URL domain must likely be * whitelisted before. @@ -261,6 +321,10 @@ public static class MediaUploadResponse { public String filename; @JsonProperty("pageid") public long pageid; + @JsonProperty("offset") + public long offset; + @JsonProperty("filekey") + public String filekey; @JsonProperty("warnings") public Map warnings; @@ -306,4 +370,55 @@ public MediaInfoIdValue getMid(ApiConnection connection, String siteIri) throws return mid; } } + + /** + * A file read one chunk at a time. + */ + + public static class ChunkedFile { + + protected FileInputStream stream; + protected final int chunkSize = 5000; + protected File path; + protected long bytesRead; + + public ChunkedFile(File path) throws FileNotFoundException { + this.path = path; + stream = new FileInputStream(path); + bytesRead = 0; + } + + /** + * Read the next chunk of the file. + * + * @return {File} Contains a chunk of the original file. The length in bytes is chunkSize or however much + * remains of the file if the last chunk is read. + * @throws IOException + */ + public File readChunk() throws IOException { + if (bytesRead >= path.length()) { + return null; + } + + // Read at most the remaining bytes. + int bytesToRead = (int) Math.min(path.length() - bytesRead, chunkSize); + byte[] bytes = new byte[bytesToRead]; + int chunkBytesRead = stream.read(bytes); + Path chunk = Files.createTempFile(null, null); + Files.write(chunk, bytes); + bytesRead += chunkBytesRead; + + return chunk.toFile(); + } + + /** + * Get length of the file. + * + * @see File#length() length + * @return {long} + */ + public long getLength() { + return path.length(); + } + } } diff --git a/extensions/wikibase/tests/src/org/openrefine/wikibase/editing/MediaFileUtilsTest.java b/extensions/wikibase/tests/src/org/openrefine/wikibase/editing/MediaFileUtilsTest.java index 9ee84dd3cbd8..79bc0a8eca49 100644 --- a/extensions/wikibase/tests/src/org/openrefine/wikibase/editing/MediaFileUtilsTest.java +++ b/extensions/wikibase/tests/src/org/openrefine/wikibase/editing/MediaFileUtilsTest.java @@ -3,6 +3,7 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -13,6 +14,8 @@ import java.io.File; import java.io.IOException; import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -21,6 +24,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.mockito.InOrder; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -33,6 +37,7 @@ import com.google.refine.util.ParsingUtilities; +import org.openrefine.wikibase.editing.MediaFileUtils.ChunkedFile; import org.openrefine.wikibase.editing.MediaFileUtils.MediaUploadResponse; public class MediaFileUtilsTest { @@ -332,4 +337,93 @@ protected void mockCsrfCall(ApiConnection connection) throws IOException, MediaW JsonNode tokenJsonResponse = ParsingUtilities.mapper.readTree(csrfResponse); when(connection.sendJsonRequest("POST", tokenParams)).thenReturn(tokenJsonResponse); } + + @Test + public void testUploadLocalFileChunked() throws IOException, MediaWikiApiErrorException { + ApiConnection connection = mock(ApiConnection.class); + // mock CSRF token request + mockCsrfCall(connection); + + ChunkedFile chunkedFile = mock(ChunkedFile.class); + when(chunkedFile.getLength()).thenReturn(10001L); + Path firstChunk = Files.createTempFile("chunk-1-", ".png"); + Path secondChunk = Files.createTempFile("chunk-2-", ".png"); + Path thirdChunk = Files.createTempFile("chunk-3-", ".png"); + when(chunkedFile.readChunk()) + .thenReturn(firstChunk.toFile()) + .thenReturn(secondChunk.toFile()) + .thenReturn(thirdChunk.toFile()) + .thenReturn(null); + + // Initialise the upload and upload the first chunk. + Map firstParams = new HashMap<>(); + firstParams.put("action", "upload"); + firstParams.put("filename", "My_test_file.png"); + firstParams.put("stash", "1"); + firstParams.put("filesize", "10001"); + firstParams.put("offset", "0"); + firstParams.put("token", csrfToken); + String firstResponseString = "{\"upload\":{\"offset\":5000,\"result\":\"Continue\",\"filekey\":\"filekey.1234.png\"}}"; + JsonNode firstResponse = ParsingUtilities.mapper.readTree(firstResponseString); + Map> firstFiles = new HashMap<>(); + firstFiles.put("chunk", new ImmutablePair("chunk-1.png", firstChunk.toFile())); + when(connection.sendJsonRequest(eq("POST"), eq(firstParams), eq(firstFiles))).thenReturn(firstResponse); + + // Upload the second chunk. + Map secondParams = new HashMap<>(); + secondParams.put("action", "upload"); + secondParams.put("filename", "My_test_file.png"); + secondParams.put("stash", "1"); + secondParams.put("filesize", "10001"); + secondParams.put("offset", "5000"); + secondParams.put("filekey", "filekey.1234.png"); + secondParams.put("token", csrfToken); + String secondResponseString = "{\"upload\":{\"offset\":10000,\"result\":\"Continue\",\"filekey\":\"filekey.1234.png\"}}"; + JsonNode secondResponse = ParsingUtilities.mapper.readTree(secondResponseString); + Map> secondFiles = new HashMap<>(); + secondFiles.put("chunk", new ImmutablePair("chunk-2.png", secondChunk.toFile())); + when(connection.sendJsonRequest(eq("POST"), eq(secondParams), eq(secondFiles))).thenReturn(secondResponse); + + // Upload the third and final chunk. + Map thirdParams = new HashMap<>(); + thirdParams.put("action", "upload"); + thirdParams.put("filename", "My_test_file.png"); + thirdParams.put("stash", "1"); + thirdParams.put("filesize", "10001"); + thirdParams.put("offset", "10000"); + thirdParams.put("filekey", "filekey.1234.png"); + thirdParams.put("token", csrfToken); + String thirdResponseString = "{\"upload\":{\"offset\":10001,\"result\":\"Continue\",\"filekey\":\"filekey.1234.png\"}}"; + JsonNode thirdResponse = ParsingUtilities.mapper.readTree( + thirdResponseString); + Map> thirdFiles = new HashMap<>(); + thirdFiles.put("chunk", new ImmutablePair("chunk-3.png", thirdChunk.toFile())); + when(connection.sendJsonRequest(eq("POST"), eq(thirdParams), eq(thirdFiles))).thenReturn(thirdResponse); + + // Finalise the upload. + Map finalParams = new HashMap<>(); + finalParams.put("action", "upload"); + finalParams.put("filename", "My_test_file.png"); + finalParams.put("filekey", "filekey.1234.png"); + finalParams.put("tags", ""); + finalParams.put("comment", "my summary"); + finalParams.put("text", "my wikitext"); + finalParams.put("token", csrfToken); + JsonNode finalResponse = ParsingUtilities.mapper.readTree(successfulUploadResponse); + when(connection.sendJsonRequest(eq("POST"), eq(finalParams), eq(null))).thenReturn(finalResponse); + + MediaFileUtils mediaFileUtils = new MediaFileUtils(connection); + MediaUploadResponse response = mediaFileUtils.uploadLocalFileChunked(chunkedFile, "My_test_file.png", "my wikitext", "my summary", + Collections.emptyList()); + + InOrder inOrder = inOrder(connection); + inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(firstParams), eq(firstFiles)); + inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(secondParams), eq(secondFiles)); + inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(thirdParams), eq(thirdFiles)); + inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(finalParams), eq(null)); + assertEquals(response.filename, "My_test_file.png"); + assertEquals(response.pageid, 12345L); + assertEquals(response.getMid(connection, Datamodel.SITE_WIKIMEDIA_COMMONS), + Datamodel.makeWikimediaCommonsMediaInfoIdValue("M12345")); + } };;