Skip to content
This repository has been archived by the owner on Apr 15, 2024. It is now read-only.

Chunked uploads #1

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
package org.openrefine.wikibase.editing;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -102,6 +106,62 @@ public MediaUploadResponse uploadLocalFile(File path, String fileName, String wi
return uploadFile(parameters, files);
}

/**
* Upload a local file to the MediaWiki instance in chunks.
*
* @param path
* ChunkedFile of the local file
* @param fileName
* its filename once stored on the wiki
* @param wikitext
* the accompanying wikitext for the file
* @param summary
* the edit summary associated with the upload
* @param tags
* tags to apply to the edit
* @return
* @throws IOException
* @throws MediaWikiApiErrorException
*/
protected MediaUploadResponse uploadLocalFileChunked(ChunkedFile path, String fileName, String wikitext, String summary,
List<String> tags)
throws IOException, MediaWikiApiErrorException {
MediaUploadResponse response = null;
int i = 1;
for (File chunk = path.readChunk(); chunk != null; chunk = path.readChunk()) {
Map<String, String> parameters = new HashMap<>();
parameters.put("action", "upload");
parameters.put("token", getCsrfToken());
parameters.put("stash", "1");
parameters.put("filename", fileName);
parameters.put("filesize", String.valueOf(path.getLength()));
if (response == null) {
// In the first request we don't have offset or file key.
parameters.put("offset", "0");
} else {
parameters.put("offset", String.valueOf(response.offset));
parameters.put("filekey", response.filekey);
}
Map<String, ImmutablePair<String, java.io.File>> files = new HashMap<>();
String chunkName = "chunk-" + i + ".png";
files.put("chunk", new ImmutablePair<String, File>(chunkName, chunk));
response = uploadFile(parameters, files);
chunk.delete();
i++;
}

Map<String, String> parameters = new HashMap<>();
parameters.put("action", "upload");
parameters.put("token", getCsrfToken());
parameters.put("filename", fileName);
parameters.put("filekey", response.filekey);
parameters.put("tags", String.join("|", tags));
parameters.put("comment", summary);
parameters.put("text", wikitext);

return uploadFile(parameters, null);
}

/**
* Upload a file that the MediaWiki server fetches directly from the supplied URL. The URL domain must likely be
* whitelisted before.
Expand Down Expand Up @@ -261,6 +321,10 @@ public static class MediaUploadResponse {
public String filename;
@JsonProperty("pageid")
public long pageid;
@JsonProperty("offset")
public long offset;
@JsonProperty("filekey")
public String filekey;
@JsonProperty("warnings")
public Map<String, JsonNode> warnings;

Expand Down Expand Up @@ -306,4 +370,55 @@ public MediaInfoIdValue getMid(ApiConnection connection, String siteIri) throws
return mid;
}
}

/**
* A file read one chunk at a time.
*/

public static class ChunkedFile {

protected FileInputStream stream;
protected final int chunkSize = 5000;
protected File path;
protected long bytesRead;

public ChunkedFile(File path) throws FileNotFoundException {
this.path = path;
stream = new FileInputStream(path);
bytesRead = 0;
}

/**
* Read the next chunk of the file.
*
* @return {File} Contains a chunk of the original file. The length in bytes is chunkSize or however much
* remains of the file if the last chunk is read.
* @throws IOException
*/
public File readChunk() throws IOException {
if (bytesRead >= path.length()) {
return null;
}

// Read at most the remaining bytes.
int bytesToRead = (int) Math.min(path.length() - bytesRead, chunkSize);
byte[] bytes = new byte[bytesToRead];
int chunkBytesRead = stream.read(bytes);
Path chunk = Files.createTempFile(null, null);
Files.write(chunk, bytes);
bytesRead += chunkBytesRead;

return chunk.toFile();
}

/**
* Get length of the file.
*
* @see File#length() length
* @return {long}
*/
public long getLength() {
return path.length();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
Expand All @@ -13,6 +14,8 @@
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
Expand All @@ -21,6 +24,7 @@

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.mockito.InOrder;
import org.mockito.Mockito;
import org.testng.annotations.Test;
Expand All @@ -33,6 +37,7 @@

import com.google.refine.util.ParsingUtilities;

import org.openrefine.wikibase.editing.MediaFileUtils.ChunkedFile;
import org.openrefine.wikibase.editing.MediaFileUtils.MediaUploadResponse;

public class MediaFileUtilsTest {
Expand Down Expand Up @@ -332,4 +337,93 @@ protected void mockCsrfCall(ApiConnection connection) throws IOException, MediaW
JsonNode tokenJsonResponse = ParsingUtilities.mapper.readTree(csrfResponse);
when(connection.sendJsonRequest("POST", tokenParams)).thenReturn(tokenJsonResponse);
}

@Test
public void testUploadLocalFileChunked() throws IOException, MediaWikiApiErrorException {
ApiConnection connection = mock(ApiConnection.class);
// mock CSRF token request
mockCsrfCall(connection);

ChunkedFile chunkedFile = mock(ChunkedFile.class);
when(chunkedFile.getLength()).thenReturn(10001L);
Path firstChunk = Files.createTempFile("chunk-1-", ".png");
Path secondChunk = Files.createTempFile("chunk-2-", ".png");
Path thirdChunk = Files.createTempFile("chunk-3-", ".png");
when(chunkedFile.readChunk())
.thenReturn(firstChunk.toFile())
.thenReturn(secondChunk.toFile())
.thenReturn(thirdChunk.toFile())
.thenReturn(null);

// Initialise the upload and upload the first chunk.
Map<String, String> firstParams = new HashMap<>();
firstParams.put("action", "upload");
firstParams.put("filename", "My_test_file.png");
firstParams.put("stash", "1");
firstParams.put("filesize", "10001");
firstParams.put("offset", "0");
firstParams.put("token", csrfToken);
String firstResponseString = "{\"upload\":{\"offset\":5000,\"result\":\"Continue\",\"filekey\":\"filekey.1234.png\"}}";
JsonNode firstResponse = ParsingUtilities.mapper.readTree(firstResponseString);
Map<String, ImmutablePair<String, java.io.File>> firstFiles = new HashMap<>();
firstFiles.put("chunk", new ImmutablePair<String, File>("chunk-1.png", firstChunk.toFile()));
when(connection.sendJsonRequest(eq("POST"), eq(firstParams), eq(firstFiles))).thenReturn(firstResponse);

// Upload the second chunk.
Map<String, String> secondParams = new HashMap<>();
secondParams.put("action", "upload");
secondParams.put("filename", "My_test_file.png");
secondParams.put("stash", "1");
secondParams.put("filesize", "10001");
secondParams.put("offset", "5000");
secondParams.put("filekey", "filekey.1234.png");
secondParams.put("token", csrfToken);
String secondResponseString = "{\"upload\":{\"offset\":10000,\"result\":\"Continue\",\"filekey\":\"filekey.1234.png\"}}";
JsonNode secondResponse = ParsingUtilities.mapper.readTree(secondResponseString);
Map<String, ImmutablePair<String, java.io.File>> secondFiles = new HashMap<>();
secondFiles.put("chunk", new ImmutablePair<String, File>("chunk-2.png", secondChunk.toFile()));
when(connection.sendJsonRequest(eq("POST"), eq(secondParams), eq(secondFiles))).thenReturn(secondResponse);

// Upload the third and final chunk.
Map<String, String> thirdParams = new HashMap<>();
thirdParams.put("action", "upload");
thirdParams.put("filename", "My_test_file.png");
thirdParams.put("stash", "1");
thirdParams.put("filesize", "10001");
thirdParams.put("offset", "10000");
thirdParams.put("filekey", "filekey.1234.png");
thirdParams.put("token", csrfToken);
String thirdResponseString = "{\"upload\":{\"offset\":10001,\"result\":\"Continue\",\"filekey\":\"filekey.1234.png\"}}";
JsonNode thirdResponse = ParsingUtilities.mapper.readTree(
thirdResponseString);
Map<String, ImmutablePair<String, java.io.File>> thirdFiles = new HashMap<>();
thirdFiles.put("chunk", new ImmutablePair<String, File>("chunk-3.png", thirdChunk.toFile()));
when(connection.sendJsonRequest(eq("POST"), eq(thirdParams), eq(thirdFiles))).thenReturn(thirdResponse);

// Finalise the upload.
Map<String, String> finalParams = new HashMap<>();
finalParams.put("action", "upload");
finalParams.put("filename", "My_test_file.png");
finalParams.put("filekey", "filekey.1234.png");
finalParams.put("tags", "");
finalParams.put("comment", "my summary");
finalParams.put("text", "my wikitext");
finalParams.put("token", csrfToken);
JsonNode finalResponse = ParsingUtilities.mapper.readTree(successfulUploadResponse);
when(connection.sendJsonRequest(eq("POST"), eq(finalParams), eq(null))).thenReturn(finalResponse);

MediaFileUtils mediaFileUtils = new MediaFileUtils(connection);
MediaUploadResponse response = mediaFileUtils.uploadLocalFileChunked(chunkedFile, "My_test_file.png", "my wikitext", "my summary",
Collections.emptyList());

InOrder inOrder = inOrder(connection);
inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(firstParams), eq(firstFiles));
inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(secondParams), eq(secondFiles));
inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(thirdParams), eq(thirdFiles));
inOrder.verify(connection).sendJsonRequest(eq("POST"), eq(finalParams), eq(null));
assertEquals(response.filename, "My_test_file.png");
assertEquals(response.pageid, 12345L);
assertEquals(response.getMid(connection, Datamodel.SITE_WIKIMEDIA_COMMONS),
Datamodel.makeWikimediaCommonsMediaInfoIdValue("M12345"));
}
};;