Skip to content

Commit

Permalink
Implement analysis migration for VDI
Browse files Browse the repository at this point in the history
  • Loading branch information
dmgaldi committed Mar 27, 2024
1 parent fed8330 commit e8ed6cc
Show file tree
Hide file tree
Showing 5 changed files with 207 additions and 0 deletions.
6 changes: 6 additions & 0 deletions Model/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.irods.jargon</groupId>
<artifactId>jargon-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ public AnalysisRow(ResultSet rs, DBPlatform platform) throws SQLException {
_numVisualizations = rs.getInt("num_visualizations");
}

public AnalysisRow(String analysisId, String datasetId, JSONObject descriptor, int numFilters, int numComputations, int numVisualizations) {
_analysisId = analysisId;
_datasetId = datasetId;
_descriptor = descriptor;
_numFilters = numFilters;
_numComputations = numComputations;
_numVisualizations = numVisualizations;
}

public Object[] toOrderedValues() {
return new Object[] {
_datasetId, _descriptor.toString(), _numFilters, _numComputations, _numVisualizations, _analysisId
Expand Down Expand Up @@ -61,6 +70,22 @@ public JSONObject getDescriptor() {
return _descriptor;
}

public String getAnalysisId() {
return _analysisId;
}

public int getNumFilters() {
return _numFilters;
}

public int getNumComputations() {
return _numComputations;
}

public int getNumVisualizations() {
return _numVisualizations;
}

/**
* Sets a new descriptor and refreshes the stats (number of filters, computations,
* and visualizations) on this analysis.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins;

import com.fasterxml.jackson.databind.JsonNode;
import org.apache.log4j.Logger;
import org.gusdb.fgputil.json.JsonUtil;
import org.gusdb.wdk.model.WdkModel;
import org.gusdb.wdk.model.fix.table.TableRowInterfaces;
import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater;
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class VDIMigrationPlugin extends AbstractAnalysisUpdater {
private static final Logger LOG = Logger.getLogger(VDIMigrationPlugin.class);
public static final String UD_DATASET_ID_PREFIX = "EDAUD_";

private Map<String, String> legacyIdToVdiId;
private int missingFromVdiCount = 0;

@Override
public TableRowInterfaces.RowResult<AnalysisRow> processRecord(AnalysisRow nextRow) throws Exception {
final String legacyDatasetId = nextRow.getDatasetId();
final String legacyUdId = legacyDatasetId.replace(UD_DATASET_ID_PREFIX, "");
final String vdiId = legacyIdToVdiId.get(legacyUdId);

if (vdiId == null) {
LOG.warn("Unable to find legacy ID " + legacyUdId + " in the tinydb file.");
missingFromVdiCount++;
return new TableRowInterfaces.RowResult<>(nextRow);
}

// Append UD prefix to VDI ID. The prefix is prepended in the view that maps stable VDI IDs to the unstable study
// ID, which is the currency of EDA.
final String vdiDatasetId = UD_DATASET_ID_PREFIX + vdiId;

// Create a copy with just the dataset ID updated to VDI counterpart.
AnalysisRow out = new AnalysisRow(nextRow.getAnalysisId(), vdiDatasetId, nextRow.getDescriptor(),
nextRow.getNumFilters(), nextRow.getNumComputations(), nextRow.getNumVisualizations());

return new TableRowInterfaces.RowResult<>(out);
}

@Override
public void dumpStatistics() {
if (missingFromVdiCount > 0) {
LOG.warn("Failed to migrate " + missingFromVdiCount + " datasets, they were not found in the provided tinydb file.");
}
}

@Override
public void configure(WdkModel wdkModel, List<String> additionalArgs) throws Exception {
// Parse args in the format --<argname>=<argvalue>
final Map<String, String> args = additionalArgs.stream()
.map(arg -> Arrays.stream(arg.split("="))
.map(String::trim) // Trim whitespace from args
.collect(Collectors.toList()))
.collect(Collectors.toMap(
pair -> pair.get(0),
pair -> pair.size() > 1 ? pair.get(1) : "true")); // A flag without an "=" is a boolean. Set true if present.

// Validate required arg.
if (!args.containsKey("--tinyDb")) {
throw new IllegalArgumentException("Missing required flag --tinyDb");
}
final File tinyDbFile = new File(args.get("--tinyDb"));

this.legacyIdToVdiId = readLegacyStudyIdToVdiId(tinyDbFile);

// Default to dryrun to avoid incidental migrations when testing.
this._writeToDb = Boolean.parseBoolean(args.getOrDefault("--liveRun", "false"));
}

/**
* Parse the tinydb file into a map of legacy UD identifiers to VDI identifiers.
*
* Example file format:
*
* {
* "_default": {
* "1": {
* "type": "owner",
* "udId": 1234,
* "vdiId": "123XyZ",
* "msg": null,
* "time": "Fri Mar 26 00:00:00 2024"
* }
* }
*
* @param tinyDbFile TinyDB file, output of the migration script run to migrate legacy UDs into VDI.
* @return Map of legacy UD Ids to VDI Ids.
*/
private Map<String, String> readLegacyStudyIdToVdiId(File tinyDbFile) {
try {
JsonNode root = JsonUtil.Jackson.readTree(tinyDbFile);
JsonNode dbRoot = root.get("_default");

Map<String, String> mapping = new HashMap<>();
Iterator<Map.Entry<String, JsonNode>> fieldIterator = dbRoot.fields();

// Iterate through each field in the "_default" node.
// Ignore the numeric index keys and extract the udId and vdiId fields to create mapping.
while (fieldIterator.hasNext()) {
Map.Entry<String, JsonNode> entry = fieldIterator.next();
mapping.put(entry.getValue().get("udId").asText(), entry.getValue().get("vdiId").asText());
}

LOG.info("Extracted a mapping of " + mapping.size() + " legacy to VDI identifiers.");
return mapping;
} catch (IOException e) {
throw new RuntimeException(e);
}
}

private enum CliArg {

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins;

import org.gusdb.wdk.model.WdkModel;
import org.gusdb.wdk.model.fix.table.TableRowInterfaces;
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow;
import org.json.JSONObject;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;

import java.io.File;
import java.util.List;
import java.util.Objects;

public class VDIMigrationPluginTest {
private WdkModel mockedModel;
private ClassLoader classLoader;

@Before
public void setup() {
classLoader = getClass().getClassLoader();
mockedModel = Mockito.mock(WdkModel.class);
}

@Test
public void test() throws Exception {
final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile());
final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin();
final List<String> args = List.of("--tinyDb=" + file.getPath());
migrationPlugin.configure(mockedModel, args);
TableRowInterfaces.RowResult<AnalysisRow> result = migrationPlugin.processRecord(
new AnalysisRow("x",
"EDAUD_1234",
new JSONObject(),
3,
4,
5));
Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId());
}
}
11 changes: 11 additions & 0 deletions Model/src/test/resources/migration-unit-test-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"_default": {
"1": {
"type": "owner",
"udId": 1234,
"vdiId": "123XyZ",
"msg": null,
"time": "Fri Mar 26 00:00:00 2024"
}
}
}

0 comments on commit e8ed6cc

Please sign in to comment.