Skip to content

Commit

Permalink
fetch tool: --output-file option
Browse files Browse the repository at this point in the history
  • Loading branch information
ato committed Jul 27, 2023
1 parent b706a8e commit 07be2c8
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 9 deletions.
12 changes: 11 additions & 1 deletion src/org/netpreserve/jwarc/WarcCompression.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@

package org.netpreserve.jwarc;

import java.nio.file.Path;

public enum WarcCompression {
NONE, GZIP
NONE, GZIP;

static WarcCompression forPath(Path path) {
if (path.getFileName().toString().endsWith(".gz")) {
return GZIP;
} else {
return NONE;
}
}
}
10 changes: 10 additions & 0 deletions src/org/netpreserve/jwarc/WarcWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ public WarcWriter(OutputStream stream) throws IOException {
this(Channels.newChannel(stream));
}

/**
* Opens a WARC file for writing. Compression is determined by the file extension.
*
* @param path the path to the file
* @throws IOException if an I/O error occurs
*/
public WarcWriter(Path path) throws IOException {
this(FileChannel.open(path, WRITE, CREATE, TRUNCATE_EXISTING), WarcCompression.forPath(path));
}

public synchronized void write(WarcRecord record) throws IOException {
// TODO: buffer headers
position.addAndGet(channel.write(ByteBuffer.wrap(record.serializeHeader())));
Expand Down
19 changes: 14 additions & 5 deletions src/org/netpreserve/jwarc/tools/FetchTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

public class FetchTool {
public static void main(String[] args) throws IOException, URISyntaxException {
FetchOptions options = new FetchOptions();
List<URI> urls = new ArrayList<>();
Path outputFile = null;
for (int i = 0; i < args.length; i++) {
switch (args[i]) {
case "-h":
Expand All @@ -21,10 +24,12 @@ public static void main(String[] args) throws IOException, URISyntaxException {
System.out.println("Fetches a URL while writing the request and response as WARC records");
System.out.println();
System.out.println("Options:");
System.out.println("-A, --user-agent STRING Sets the User-Agent header");
System.out.println(" --read-timeout MILLIS Sets the socket read timeout");
System.out.println(" --max-length BYTES Truncate response after BYTES received");
System.out.println(" --max-time MILLIS Truncate response after MILLIS elapsed");
System.out.println(" -A, --user-agent STRING Sets the User-Agent header");
System.out.println(" --read-timeout MILLIS Sets the socket read timeout");
System.out.println(" --max-length BYTES Truncate response after BYTES received");
System.out.println(" --max-time MILLIS Truncate response after MILLIS elapsed");
System.out.println(" -o, --output-file FILE Write WARC records to FILE instead of stdout");
System.out.println();
System.exit(0);
break;
case "-A":
Expand All @@ -40,6 +45,10 @@ public static void main(String[] args) throws IOException, URISyntaxException {
case "--max-time":
options.maxTime(Integer.parseInt(args[++i]));
break;
case "-o":
case "--output-file":
outputFile = Paths.get(args[++i]);
break;
default:
if (args[i].startsWith("-")) {
System.err.println("Unknown option: " + args[i]);
Expand All @@ -52,7 +61,7 @@ public static void main(String[] args) throws IOException, URISyntaxException {
System.err.println("No URLs specified. Try: jwarc fetch --help");
System.exit(1);
}
try (WarcWriter writer = new WarcWriter(System.out)) {
try (WarcWriter writer = outputFile == null ? new WarcWriter(System.out) : new WarcWriter(outputFile)) {
for (URI url : urls) {
writer.fetch(url, options);
}
Expand Down
2 changes: 1 addition & 1 deletion src/org/netpreserve/jwarc/tools/RecorderTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public static void main(String[] args) throws Exception {
}

ServerSocket serverSocket = new ServerSocket(port);
WarcRecorder warcRecorder = new WarcRecorder(serverSocket, new WarcWriter(outputFile == null ? System.out : Files.newOutputStream(outputFile)));
WarcRecorder warcRecorder = new WarcRecorder(serverSocket, outputFile == null ? new WarcWriter(System.out) : new WarcWriter(outputFile));

if (caCertificateSaveFile != null) {
X509Certificate certificate = warcRecorder.certificateAuthority().certificate();
Expand Down
2 changes: 0 additions & 2 deletions test/org/netpreserve/jwarc/apitests/WarcWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ public void fetch() throws IOException, NoSuchAlgorithmException, URISyntaxExcep

WarcResponse response = (WarcResponse) warcReader.next()
.orElseThrow(() -> new RuntimeException("Missing response record"));
System.out.println(new String(response.serializeHeader()));
//assertEquals(12, response.http().body().size());
assertEquals(256, response.http().status());
assertEquals("present", response.http().headers().first("Test-Header").orElse(null));
assertTrue(response.blockDigest().isPresent());
Expand Down

0 comments on commit 07be2c8

Please sign in to comment.