Skip to content
This repository has been archived by the owner on Apr 15, 2024. It is now read-only.

Commit

Permalink
Upload files in chunks
Browse files Browse the repository at this point in the history
Adds function for uploading a file in chunks as described on
https://www.mediawiki.org/wiki/API:Upload#Example_3:_Upload_file_in_chunks. The
chunks are created as temporary files and are removed once uploaded.
  • Loading branch information
sebastian-berlin-wmse committed Apr 4, 2024
1 parent fd81e5b commit e682123
Show file tree
Hide file tree
Showing 2 changed files with 209 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
package org.openrefine.wikibase.editing;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -102,6 +106,62 @@ public MediaUploadResponse uploadLocalFile(File path, String fileName, String wi
return uploadFile(parameters, files);
}

/**
* Upload a local file to the MediaWiki instance in chunks.
*
* @param path
* ChunkedFile of the local file
* @param fileName
* its filename once stored on the wiki
* @param wikitext
* the accompanying wikitext for the file
* @param summary
* the edit summary associated with the upload
* @param tags
* tags to apply to the edit
* @return
* @throws IOException
* @throws MediaWikiApiErrorException
*/
protected MediaUploadResponse uploadLocalFileChunked(ChunkedFile path, String fileName, String wikitext, String summary,
List<String> tags)
throws IOException, MediaWikiApiErrorException {
MediaUploadResponse response = null;
int i = 1;
for (File chunk = path.readChunk(); chunk != null; chunk = path.readChunk()) {
Map<String, String> parameters = new HashMap<>();
parameters.put("action", "upload");
parameters.put("token", getCsrfToken());
parameters.put("stash", "1");
parameters.put("filename", fileName);
parameters.put("filesize", String.valueOf(path.getLength()));
if (response == null) {
// In the first request we don't have offset or file key.
parameters.put("offset", "0");
} else {
parameters.put("offset", String.valueOf(response.offset));
parameters.put("filekey", response.filekey);
}
Map<String, ImmutablePair<String, java.io.File>> files = new HashMap<>();
String chunkName = "chunk-" + i + ".png";
files.put("chunk", new ImmutablePair<String, File>(chunkName, chunk));
response = uploadFile(parameters, files);
chunk.delete();
i++;
}

Map<String, String> parameters = new HashMap<>();
parameters.put("action", "upload");
parameters.put("token", getCsrfToken());
parameters.put("filename", fileName);
parameters.put("filekey", response.filekey);
parameters.put("tags", String.join("|", tags));
parameters.put("comment", summary);
parameters.put("text", wikitext);

return uploadFile(parameters, null);
}

/**
* Upload a file that the MediaWiki server fetches directly from the supplied URL. The URL domain must likely be
* whitelisted before.
Expand Down Expand Up @@ -261,6 +321,10 @@ public static class MediaUploadResponse {
public String filename;
@JsonProperty("pageid")
public long pageid;
@JsonProperty("offset")
public long offset;
@JsonProperty("filekey")
public String filekey;
@JsonProperty("warnings")
public Map<String, JsonNode> warnings;

Expand Down Expand Up @@ -306,4 +370,55 @@ public MediaInfoIdValue getMid(ApiConnection connection, String siteIri) throws
return mid;
}
}

/**
* A file read one chunk at a time.
*/

public static class ChunkedFile {

protected FileInputStream stream;
protected final int chunkSize = 5000;
protected File path;
protected long bytesRead;

public ChunkedFile(File path) throws FileNotFoundException {
this.path = path;
stream = new FileInputStream(path);
bytesRead = 0;
}

/**
* Read the next chunk of the file.
*
* @return {File} Contains a chunk of the original file. The length in bytes is chunkSize or however much
* remains of the file if the last chunk is read.
* @throws IOException
*/
public File readChunk() throws IOException {
if (bytesRead >= path.length()) {
return null;
}

// Read at most the remaining bytes.
int bytesToRead = (int) Math.min(path.length() - bytesRead, chunkSize);
byte[] bytes = new byte[bytesToRead];
int chunkBytesRead = stream.read(bytes);
Path chunk = Files.createTempFile(null, null);
Files.write(chunk, bytes);
bytesRead += chunkBytesRead;

return chunk.toFile();
}

/**
* Get length of the file.
*
* @see File#length() length
* @return {long}
*/
public long getLength() {
return path.length();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
Expand All @@ -13,6 +14,8 @@
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
Expand All @@ -21,6 +24,7 @@

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.mockito.InOrder;
import org.mockito.Mockito;
import org.testng.annotations.Test;
Expand All @@ -33,6 +37,7 @@

import com.google.refine.util.ParsingUtilities;

import org.openrefine.wikibase.editing.MediaFileUtils.ChunkedFile;
import org.openrefine.wikibase.editing.MediaFileUtils.MediaUploadResponse;

public class MediaFileUtilsTest {
Expand Down Expand Up @@ -332,4 +337,93 @@ protected void mockCsrfCall(ApiConnection connection) throws IOException, MediaW
JsonNode tokenJsonResponse = ParsingUtilities.mapper.readTree(csrfResponse);
when(connection.sendJsonRequest("POST", tokenParams)).thenReturn(tokenJsonResponse);
}

@Test
public void testUploadLocalFileChunked() throws IOException, MediaWikiApiErrorException {
ApiConnection connection = mock(ApiConnection.class);
// mock CSRF token request
mockCsrfCall(connection);

ChunkedFile chunkedFile = mock(ChunkedFile.class);
when(chunkedFile.getLength()).thenReturn(10001L);
Path firstChunk = Files.createTempFile("chunk-1-", ".png");
Path secondChunk = Files.createTempFile("chunk-2-", ".png");
Path thirdChunk = Files.createTempFile("chunk-3-", ".png");
when(chunkedFile.readChunk())
.thenReturn(firstChunk.toFile())
.thenReturn(secondChunk.toFile())
.thenReturn(thirdChunk.toFile())
.thenReturn(null);

// Initialise the upload and upload the first chunk.
Map<String, String> firstParams = new HashMap<>();
firstParams.put("action", "upload");
firstParams.put("filename", "My_test_file.png");
firstParams.put("stash", "1");
firstParams.put("filesize", "10001");
firstParams.put("offset", "0");
firstParams.put("token", csrfToken);
String firstResponseString = "{\"upload\":{\"offset\":5000,\"result\":\"Continue\",\"filekey\":\"filekey.1234.png\"}}";
JsonNode firstResponse = ParsingUtilities.mapper.readTree(firstResponseString);
Map<String, ImmutablePair<String, java.io.File>> firstFiles = new HashMap<>();
firstFiles.put("chunk", new ImmutablePair<String, File>("chunk-1.png", firstChunk.toFile()));
when(connection.sendJsonRequest(eq("POST"), eq(firstParams), eq(firstFiles))).thenReturn(firstResponse);

// Upload the second chunk.
Map<String, String> secondParams = new HashMap<>();
secondParams.put("action", "upload");
secondParams.put("filename", "My_test_file.png");
secondParams.put("stash", "1");
secondParams.put("filesize", "10001");
secondParams.put("offset", "5000");
secondParams.put("filekey", "filekey.1234.png");
secondParams.put("token", csrfToken);
String secondResponseString = "{\"upload\":{\"offset\":10000,\"result\":\"Continue\",\"filekey\":\"filekey.1234.png\"}}";
JsonNode secondResponse = ParsingUtilities.mapper.readTree(secondResponseString);
Map<String, ImmutablePair<String, java.io.File>> secondFiles = new HashMap<>();
secondFiles.put("chunk", new ImmutablePair<String, File>("chunk-2.png", secondChunk.toFile()));
when(connection.sendJsonRequest(eq("POST"), eq(secondParams), eq(secondFiles))).thenReturn(secondResponse);

// Upload the third and final chunk.
Map<String, String> thirdParams = new HashMap<>();
thirdParams.put("action", "upload");
thirdParams.put("filename", "My_test_file.png");
thirdParams.put("stash", "1");
thirdParams.put("filesize", "10001");
thirdParams.put("offset", "10000");
thirdParams.put("filekey", "filekey.1234.png");
thirdParams.put("token", csrfToken);
String thirdResponseString = "{\"upload\":{\"offset\":10001,\"result\":\"Continue\",\"filekey\":\"filekey.1234.png\"}}";
JsonNode thirdResponse = ParsingUtilities.mapper.readTree(
thirdResponseString);
Map<String, ImmutablePair<String, java.io.File>> thirdFiles = new HashMap<>();
thirdFiles.put("chunk", new ImmutablePair<String, File>("chunk-3.png", thirdChunk.toFile()));
when(connection.sendJsonRequest(eq("POST"), eq(thirdParams), eq(thirdFiles))).thenReturn(thirdResponse);

// Finalise the upload.
Map<String, String> finalParams = new HashMap<>();
finalParams.put("action", "upload");
finalParams.put("filename", "My_test_file.png");
finalParams.put("filekey", "filekey.1234.png");
finalParams.put("tags", "");
finalParams.put("comment", "my summary");
finalParams.put("text", "my wikitext");
finalParams.put("token", csrfToken);
JsonNode finalResponse = ParsingUtilities.mapper.readTree(successfulUploadResponse);
when(connection.sendJsonRequest(eq("POST"), eq(finalParams), eq(null))).thenReturn(finalResponse);

MediaFileUtils mediaFileUtils = new MediaFileUtils(connection);
MediaUploadResponse response = mediaFileUtils.uploadLocalFileChunked(chunkedFile, "My_test_file.png", "my wikitext", "my summary",
Collections.emptyList());

InOrder inOrder = inOrder(connection);
inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(firstParams), eq(firstFiles));
inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(secondParams), eq(secondFiles));
inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(thirdParams), eq(thirdFiles));
inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(finalParams), eq(null));
assertEquals(response.filename, "My_test_file.png");
assertEquals(response.pageid, 12345L);
assertEquals(response.getMid(connection, Datamodel.SITE_WIKIMEDIA_COMMONS),
Datamodel.makeWikimediaCommonsMediaInfoIdValue("M12345"));
}
};;

0 comments on commit e682123

Please sign in to comment.