Skip to content

Commit

Permalink
Implement analysis migration for VDI (#83)
Browse files Browse the repository at this point in the history
  • Loading branch information
dmgaldi authored Apr 10, 2024
1 parent 22f77b3 commit 7982b74
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 0 deletions.
6 changes: 6 additions & 0 deletions Model/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.irods.jargon</groupId>
<artifactId>jargon-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package org.gusdb.wdk.model.fix;

import com.fasterxml.jackson.databind.JsonNode;
import org.gusdb.fgputil.json.JsonUtil;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

public class VdiMigrationFileReader {
private File file;

public VdiMigrationFileReader(File file) {
this.file = file;
}

/**
* Parse the tinydb file into a map of legacy UD identifiers to VDI identifiers.
*
* Example file format:
*
* {
* "_default": {
* "1": {
* "type": "owner",
* "udId": 1234,
* "vdiId": "123XyZ",
* "msg": null,
* "time": "Fri Mar 26 00:00:00 2024"
* }
* }
*
* @return Map of legacy UD Ids to VDI Ids.
*/
public Map<String, String> readLegacyStudyIdToVdiId() {
try {
JsonNode root = JsonUtil.Jackson.readTree(file);
JsonNode dbRoot = root.get("_default");

Map<String, String> mapping = new HashMap<>();
Iterator<Map.Entry<String, JsonNode>> fieldIterator = dbRoot.fields();

// Iterate through each field in the "_default" node.
// Ignore the numeric index keys and extract the udId and vdiId fields to create mapping.
while (fieldIterator.hasNext()) {
Map.Entry<String, JsonNode> entry = fieldIterator.next();
mapping.put(entry.getValue().get("udId").asText(), entry.getValue().get("vdiId").asText());
}

return mapping;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,7 @@ public void update(final List<T> modifiedRows) {

// if updates enabled, execute argument batch
if (!UPDATES_DISABLED) {
LOG.info("would have run update. " + batch);
new SQLRunner(_userDs, _writers.get(i).getWriteSql(
_schema), true, "migration-update-rows").executeUpdateBatch(batch);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ public AnalysisRow(ResultSet rs, DBPlatform platform) throws SQLException {
_numVisualizations = rs.getInt("num_visualizations");
}

public AnalysisRow(String analysisId, String datasetId, JSONObject descriptor, int numFilters, int numComputations, int numVisualizations) {
_analysisId = analysisId;
_datasetId = datasetId;
_descriptor = descriptor;
_numFilters = numFilters;
_numComputations = numComputations;
_numVisualizations = numVisualizations;
}

public Object[] toOrderedValues() {
return new Object[] {
_datasetId, _descriptor.toString(), _numFilters, _numComputations, _numVisualizations, _analysisId
Expand Down Expand Up @@ -61,6 +70,22 @@ public JSONObject getDescriptor() {
return _descriptor;
}

public String getAnalysisId() {
return _analysisId;
}

public int getNumFilters() {
return _numFilters;
}

public int getNumComputations() {
return _numComputations;
}

public int getNumVisualizations() {
return _numVisualizations;
}

/**
* Sets a new descriptor and refreshes the stats (number of filters, computations,
* and visualizations) on this analysis.
Expand All @@ -72,6 +97,10 @@ public void setDescriptor(JSONObject descriptor) {
refreshStats();
}

public void setDatasetId(String datasetId) {
_datasetId = datasetId;
}

public void refreshStats() {
_numFilters = _descriptor.getJSONObject("subset").getJSONArray("descriptor").length();
JSONArray computations = Optional.ofNullable(_descriptor.optJSONArray("computations")).orElse(new JSONArray());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins;

import com.fasterxml.jackson.databind.JsonNode;
import org.apache.log4j.Logger;
import org.gusdb.fgputil.json.JsonUtil;
import org.gusdb.wdk.model.WdkModel;
import org.gusdb.wdk.model.fix.VdiMigrationFileReader;
import org.gusdb.wdk.model.fix.table.TableRowInterfaces;
import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater;
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

public class VDIMigrationPlugin extends AbstractAnalysisUpdater {
private static final Logger LOG = Logger.getLogger(VDIMigrationPlugin.class);
public static final String UD_DATASET_ID_PREFIX = "EDAUD_";

private Map<String, String> legacyIdToVdiId;
private final AtomicInteger missingFromVdiCount = new AtomicInteger(0);

@Override
public TableRowInterfaces.RowResult<AnalysisRow> processRecord(AnalysisRow nextRow) throws Exception {
final String legacyDatasetId = nextRow.getDatasetId();
final String legacyUdId = legacyDatasetId.replace(UD_DATASET_ID_PREFIX, "");
final String vdiId = legacyIdToVdiId.get(legacyUdId);

if (vdiId == null) {
LOG.warn("Unable to find legacy ID " + legacyUdId + " in the tinydb file.");
missingFromVdiCount.incrementAndGet();
return new TableRowInterfaces.RowResult<>(nextRow);
}

// Append UD prefix to VDI ID. The prefix is prepended in the view that maps stable VDI IDs to the unstable study
// ID, which is the currency of EDA.
final String vdiDatasetId = UD_DATASET_ID_PREFIX + vdiId;
nextRow.setDatasetId(vdiDatasetId);

return new TableRowInterfaces.RowResult<>(nextRow)
.setShouldWrite(_writeToDb);
}

@Override
public void dumpStatistics() {
if (missingFromVdiCount.get() > 0) {
LOG.warn("Failed to migrate " + missingFromVdiCount + " datasets, they were not found in the provided tinydb file.");
}
}

@Override
public void configure(WdkModel wdkModel, List<String> additionalArgs) throws Exception {
// Parse args in the format --<argname>=<argvalue>
final Map<String, String> args = additionalArgs.stream()
.map(arg -> Arrays.stream(arg.split("="))
.map(String::trim) // Trim whitespace from args
.collect(Collectors.toList()))
.collect(Collectors.toMap(
pair -> pair.get(0),
pair -> pair.size() > 1 ? pair.get(1) : "true")); // A flag without an "=" is a boolean. Set true if present.

// Validate required arg.
if (!args.containsKey("--tinyDb")) {
throw new IllegalArgumentException("Missing required flag --tinyDb");
}

final File tinyDbFile = new File(args.get("--tinyDb"));
VdiMigrationFileReader reader = new VdiMigrationFileReader(tinyDbFile);

this.legacyIdToVdiId = reader.readLegacyStudyIdToVdiId();

// Default to dryrun to avoid incidental migrations when testing.
this._writeToDb = Boolean.parseBoolean(args.getOrDefault("--liveRun", "false"));
}

/**
* Parse the tinydb file into a map of legacy UD identifiers to VDI identifiers.
*
* Example file format:
*
* {
* "_default": {
* "1": {
* "type": "owner",
* "udId": 1234,
* "vdiId": "123XyZ",
* "msg": null,
* "time": "Fri Mar 26 00:00:00 2024"
* }
* }
*
* @param tinyDbFile TinyDB file, output of the migration script run to migrate legacy UDs into VDI.
* @return Map of legacy UD Ids to VDI Ids.
*/
private Map<String, String> readLegacyStudyIdToVdiId(File tinyDbFile) {
try {
JsonNode root = JsonUtil.Jackson.readTree(tinyDbFile);
JsonNode dbRoot = root.get("_default");

Map<String, String> mapping = new HashMap<>();
Iterator<Map.Entry<String, JsonNode>> fieldIterator = dbRoot.fields();

// Iterate through each field in the "_default" node.
// Ignore the numeric index keys and extract the udId and vdiId fields to create mapping.
while (fieldIterator.hasNext()) {
Map.Entry<String, JsonNode> entry = fieldIterator.next();
mapping.put(entry.getValue().get("udId").asText(), entry.getValue().get("vdiId").asText());
}

LOG.info("Extracted a mapping of " + mapping.size() + " legacy to VDI identifiers.");
return mapping;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins;

import org.gusdb.wdk.model.WdkModel;
import org.gusdb.wdk.model.fix.table.TableRowInterfaces;
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow;
import org.json.JSONObject;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;

import java.io.File;
import java.util.List;
import java.util.Objects;

public class VDIMigrationPluginTest {
private WdkModel mockedModel;
private ClassLoader classLoader;

@Before
public void setup() {
classLoader = getClass().getClassLoader();
mockedModel = Mockito.mock(WdkModel.class);
}

@Test
public void testUpdateEnabled() throws Exception {
final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile());
final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin();
final List<String> args = List.of("--tinyDb=" + file.getPath());
migrationPlugin.configure(mockedModel, args);
TableRowInterfaces.RowResult<AnalysisRow> result = migrationPlugin.processRecord(
new AnalysisRow("x",
"EDAUD_1234",
new JSONObject(),
3,
4,
5));
Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId());
Assert.assertFalse(result.shouldWrite());
}

@Test
public void testUpdateDisabled() throws Exception {
final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile());
final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin();
final List<String> args = List.of("--tinyDb=" + file.getPath(), "--liveRun");
migrationPlugin.configure(mockedModel, args);
TableRowInterfaces.RowResult<AnalysisRow> result = migrationPlugin.processRecord(
new AnalysisRow("x",
"EDAUD_1234",
new JSONObject(),
3,
4,
5));
Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId());
Assert.assertTrue(result.shouldWrite());
}
}
11 changes: 11 additions & 0 deletions Model/src/test/resources/migration-unit-test-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"_default": {
"1": {
"type": "owner",
"udId": 1234,
"vdiId": "123XyZ",
"msg": null,
"time": "Fri Mar 26 00:00:00 2024"
}
}
}

0 comments on commit 7982b74

Please sign in to comment.