Skip to content

Commit

Permalink
track zenodo/github query responses when using [elton track] in addit…
Browse files Browse the repository at this point in the history
…ion to [elton ls]
  • Loading branch information
Jorrit Poelen committed Jan 2, 2025
1 parent fa30156 commit f2bb29e
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 64 deletions.
56 changes: 2 additions & 54 deletions src/main/java/org/globalbioticinteractions/elton/cmd/CmdList.java
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
package org.globalbioticinteractions.elton.cmd;

import bio.guoda.preston.HashType;
import bio.guoda.preston.RefNodeConstants;
import bio.guoda.preston.RefNodeFactory;
import bio.guoda.preston.store.BlobStoreAppendOnly;
import bio.guoda.preston.store.KeyTo1LevelPath;
import bio.guoda.preston.store.KeyValueStoreLocalFileSystem;
import bio.guoda.preston.store.ValidatingKeyValueStreamContentAddressedFactory;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.rdf.api.IRI;
import org.apache.commons.rdf.api.Literal;
import org.eol.globi.service.ResourceService;
import org.eol.globi.util.ResourceServiceRemote;
import org.globalbioticinteractions.elton.util.ResourceServiceRemote;
import org.globalbioticinteractions.dataset.Dataset;
import org.globalbioticinteractions.dataset.DatasetRegistry;
import org.globalbioticinteractions.dataset.DatasetRegistryException;
Expand All @@ -20,21 +14,14 @@
import org.globalbioticinteractions.dataset.DatasetRegistryZenodo;
import org.globalbioticinteractions.elton.store.AccessLogger;
import org.globalbioticinteractions.elton.store.ActivityListener;
import org.globalbioticinteractions.elton.store.ActivityProxy;
import org.globalbioticinteractions.elton.store.ProvLoggerWithClock;
import org.globalbioticinteractions.elton.util.DatasetRegistryUtil;
import picocli.CommandLine;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.function.Supplier;

@CommandLine.Command(
name = "list",
Expand Down Expand Up @@ -69,7 +56,7 @@ public void run(PrintStream out) {
File tmpDir = new File(getWorkDir());

ResourceService serviceRemote = getEnableProvMode()
? getResourceServiceWithProv(inputStreamFactory, activityListener, tmpDir)
? getResourceServiceRemoteWithProv(inputStreamFactory, activityListener, tmpDir)
: new ResourceServiceRemote(inputStreamFactory, tmpDir);

List<DatasetRegistry> registries =
Expand Down Expand Up @@ -114,45 +101,6 @@ public void run(PrintStream out) {
}
}

private ResourceService getResourceServiceWithProv(InputStreamFactoryLogging inputStreamFactory, ActivityListener activityListener, File tmpDir) {
File dataFolder = new File(getDataDir());

KeyTo1LevelPath keyToPath = new KeyTo1LevelPath(dataFolder.toURI());
BlobStoreAppendOnly blobStore = new BlobStoreAppendOnly(
new KeyValueStoreLocalFileSystem(
dataFolder,
keyToPath,
new ValidatingKeyValueStreamContentAddressedFactory()
),
true,
HashType.sha256
);

return new ResourceService() {

private final ResourceServiceRemote resourceServiceRemote = new ResourceServiceRemote(inputStreamFactory, tmpDir);

private final ProvLoggerWithClock logger = new ProvLoggerWithClock(getStatementListener(), new Supplier<Literal>() {
@Override
public Literal get() {
return RefNodeFactory.nowDateTimeLiteral();
}
});

@Override
public InputStream retrieve(URI uri) throws IOException {
IRI activityId = getActivityIdFactory().get();
IRI request = RefNodeFactory.toIRI(uri);
final ActivityListener proxy = new ActivityProxy(Arrays.asList(logger, activityListener));
proxy.onStarted(getActivityContext().getActivity(), activityId, request);
InputStream retrieve = resourceServiceRemote.retrieve(uri);
IRI put = blobStore.put(retrieve);
proxy.onCompleted(getActivityContext().getActivity(), activityId, request, put, null);
return blobStore.get(put);
}
};
}

private List<DatasetRegistry> getOnlineAndOfflineRegistries(DatasetRegistry registryLocal, ResourceService resourceServiceRemote) {
List<DatasetRegistry> onlineAndOffline = new ArrayList<>();

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,28 @@
package org.globalbioticinteractions.elton.cmd;

import bio.guoda.preston.HashType;
import bio.guoda.preston.RefNodeFactory;
import bio.guoda.preston.store.BlobStoreAppendOnly;
import bio.guoda.preston.store.KeyTo1LevelPath;
import bio.guoda.preston.store.KeyValueStoreLocalFileSystem;
import bio.guoda.preston.store.ValidatingKeyValueStreamContentAddressedFactory;
import org.apache.commons.rdf.api.IRI;
import org.apache.commons.rdf.api.Literal;
import org.eol.globi.service.ResourceService;
import org.globalbioticinteractions.elton.util.ResourceServiceRemote;
import org.globalbioticinteractions.elton.store.ActivityListener;
import org.globalbioticinteractions.elton.store.ActivityProxy;
import org.globalbioticinteractions.elton.store.ProvLoggerWithClock;
import picocli.CommandLine;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.function.Supplier;

public abstract class CmdRegistry extends CmdDefaultParams {
@CommandLine.Option(names = {"--registries", "--registry"},
Expand All @@ -21,4 +40,43 @@ public void setRegistryNames(List<String> registryNames) {
public List<String> getRegistryNames() {
return registryNames;
}

protected ResourceService getResourceServiceRemoteWithProv(InputStreamFactoryLogging inputStreamFactory, ActivityListener activityListener, File tmpDir) {
File dataFolder = new File(getDataDir());

KeyTo1LevelPath keyToPath = new KeyTo1LevelPath(dataFolder.toURI());
BlobStoreAppendOnly blobStore = new BlobStoreAppendOnly(
new KeyValueStoreLocalFileSystem(
dataFolder,
keyToPath,
new ValidatingKeyValueStreamContentAddressedFactory()
),
true,
HashType.sha256
);

return new ResourceService() {

private final ResourceServiceRemote resourceServiceRemote = new ResourceServiceRemote(inputStreamFactory, tmpDir);

private final ProvLoggerWithClock logger = new ProvLoggerWithClock(getStatementListener(), new Supplier<Literal>() {
@Override
public Literal get() {
return RefNodeFactory.nowDateTimeLiteral();
}
});

@Override
public InputStream retrieve(URI uri) throws IOException {
IRI activityId = getActivityIdFactory().get();
IRI request = RefNodeFactory.toIRI(uri);
final ActivityListener proxy = new ActivityProxy(Arrays.asList(logger, activityListener));
proxy.onStarted(getActivityContext().getActivity(), activityId, request);
InputStream retrieve = resourceServiceRemote.retrieve(uri);
IRI put = blobStore.put(retrieve);
proxy.onCompleted(getActivityContext().getActivity(), activityId, request, put, null);
return blobStore.get(put);
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.apache.commons.io.FileUtils;
import org.eol.globi.data.NodeFactory;
import org.eol.globi.data.StudyImporterException;
import org.eol.globi.service.ResourceService;
import org.eol.globi.util.DatasetImportUtil;
import org.globalbioticinteractions.cache.CacheUtil;
import org.globalbioticinteractions.dataset.Dataset;
Expand All @@ -22,7 +23,6 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.function.Consumer;

@CommandLine.Command(
name = "sync",
Expand All @@ -44,6 +44,11 @@ public String getDescription() {
protected void doRun() {
InputStreamFactoryLogging inputStreamFactory = createInputStreamFactory();

ResourceService resourceService = getEnableProvMode()
? getResourceServiceRemoteWithProv(inputStreamFactory, getActivityListener(), new File(getWorkDir()))
: null;


List<DatasetRegistry> registries = new ArrayList<>();
for (String registryName : getRegistryNames()) {
DatasetRegistryFactoryImpl datasetRegistryFactory = new DatasetRegistryFactoryImpl(
Expand All @@ -53,8 +58,8 @@ protected void doRun() {
getProvDir(),
getActivityListener(),
getActivityContext(),
getActivityIdFactory()
);
getActivityIdFactory(),
resourceService);
try {
DatasetRegistry registry = datasetRegistryFactory.createRegistryByName(registryName);
registries.add(registry);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public class DatasetRegistryFactoryImpl implements DatasetRegistryFactory {

private final String provDir;
private final ActivityListener activityListener;
private final ResourceService resourceService;
private ActivityContext activityContext;
private Supplier<IRI> iriSupplier;

Expand All @@ -49,15 +50,16 @@ public DatasetRegistryFactoryImpl(
String provDir,
ActivityListener activityListener,
ActivityContext activityContext,
Supplier<IRI> iriSupplier
) {
Supplier<IRI> iriSupplier,
ResourceService resourceService) {
this.workDir = workDir;
this.inputStreamFactory = inputStreamFactory;
this.dataDir = dataDir;
this.provDir = provDir;
this.activityListener = activityListener;
this.activityContext = activityContext;
this.iriSupplier = iriSupplier;
this.resourceService = resourceService;

}

Expand All @@ -70,7 +72,9 @@ public DatasetRegistry createRegistryByName(String name) throws DatasetRegistryE
try {
Class<?>[] paramTypes = {URI.class, ResourceService.class, ContentPathFactory.class, String.class, String.class, ActivityListener.class, ActivityContext.class, Supplier.class};
Optional<Constructor<? extends DatasetRegistry>> constructor = constructorFor(registryClass, paramTypes);
ResourceService resourceService = new ResourceServiceLocalAndRemote(inputStreamFactory, new File(getDataDir()));
ResourceService resourceService = this.resourceService == null
? new ResourceServiceLocalAndRemote(inputStreamFactory, new File(getWorkDir()))
: this.resourceService;
if (!constructor.isPresent()) {
Class<?>[] paramTypesShort = {ResourceService.class};
Optional<Constructor<? extends DatasetRegistry>> constructor2 = constructorFor(registryClass, paramTypesShort);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package org.globalbioticinteractions.elton.util;

import org.eol.globi.service.ResourceService;
import org.eol.globi.util.InputStreamFactory;
import org.eol.globi.util.ResourceServiceFactoryRemote;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;

public class ResourceServiceRemote implements ResourceService {

private final InputStreamFactory factory;
private final File cacheDir;

public ResourceServiceRemote(InputStreamFactory factory, File tmpDir) {
this.factory = factory;
this.cacheDir = tmpDir;
}

@Override
public InputStream retrieve(URI resourceName) throws IOException {
InputStream is = null;

if (resourceName != null) {
ResourceService resourceService = new ResourceServiceFactoryRemote(factory, cacheDir)
.serviceForResource(resourceName);
if (resourceService == null) {
throw new IOException("cannot retrieve content of unsupported resource identifier [" + resourceName.toString() + "]");
} else {
is = resourceService.retrieve(resourceName);
}
}

return is;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
import org.globalbioticinteractions.dataset.DatasetRegistryException;
import org.globalbioticinteractions.elton.store.ActivityListener;
import org.hamcrest.core.Is;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
Expand All @@ -18,20 +22,25 @@

public class DatasetRegistryFactoryImplTest {

@Rule
public TemporaryFolder folder = new TemporaryFolder();

@Test
public void listAndCreateSupportedRegistries() throws DatasetRegistryException {
public void listAndCreateSupportedRegistries() throws DatasetRegistryException, IOException {
File tmpDir = folder.newFolder("tmp");

List<DatasetRegistry> registries = new ArrayList<>();
Set<String> supportedRegistries = DatasetRegistryFactoryImpl.getSupportedRegistries();
for (String supportedRegistry : supportedRegistries) {
DatasetRegistry registry = new DatasetRegistryFactoryImpl(
URI.create("some:uri"),
tmpDir.toURI(),
in -> in,
"someDataDir",
"someProvDir",
getListener(),
getCtx(),
getActivityIdFactory()
).createRegistryByName(supportedRegistry);
getActivityIdFactory(),
null).createRegistryByName(supportedRegistry);
registries.add(registry);
}
assertThat(registries.size(), Is.is(supportedRegistries.size()));
Expand Down

0 comments on commit f2bb29e

Please sign in to comment.