Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement analysis migration for VDI #83

Merged
merged 3 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Model/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.irods.jargon</groupId>
<artifactId>jargon-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package org.gusdb.wdk.model.fix;

import com.fasterxml.jackson.databind.JsonNode;
import org.gusdb.fgputil.json.JsonUtil;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

public class VdiMigrationFileReader {
private File file;

public VdiMigrationFileReader(File file) {
this.file = file;
}

/**
* Parse the tinydb file into a map of legacy UD identifiers to VDI identifiers.
*
* Example file format:
*
* {
* "_default": {
* "1": {
* "type": "owner",
* "udId": 1234,
* "vdiId": "123XyZ",
* "msg": null,
* "time": "Fri Mar 26 00:00:00 2024"
* }
* }
*
* @return Map of legacy UD Ids to VDI Ids.
*/
public Map<String, String> readLegacyStudyIdToVdiId() {
try {
JsonNode root = JsonUtil.Jackson.readTree(file);
JsonNode dbRoot = root.get("_default");

Map<String, String> mapping = new HashMap<>();
Iterator<Map.Entry<String, JsonNode>> fieldIterator = dbRoot.fields();

// Iterate through each field in the "_default" node.
// Ignore the numeric index keys and extract the udId and vdiId fields to create mapping.
while (fieldIterator.hasNext()) {
Map.Entry<String, JsonNode> entry = fieldIterator.next();
mapping.put(entry.getValue().get("udId").asText(), entry.getValue().get("vdiId").asText());
}

return mapping;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,7 @@ public void update(final List<T> modifiedRows) {

// if updates enabled, execute argument batch
if (!UPDATES_DISABLED) {
LOG.info("would have run update. " + batch);
new SQLRunner(_userDs, _writers.get(i).getWriteSql(
_schema), true, "migration-update-rows").executeUpdateBatch(batch);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ public AnalysisRow(ResultSet rs, DBPlatform platform) throws SQLException {
_numVisualizations = rs.getInt("num_visualizations");
}

public AnalysisRow(String analysisId, String datasetId, JSONObject descriptor, int numFilters, int numComputations, int numVisualizations) {
_analysisId = analysisId;
_datasetId = datasetId;
_descriptor = descriptor;
_numFilters = numFilters;
_numComputations = numComputations;
_numVisualizations = numVisualizations;
}

public Object[] toOrderedValues() {
return new Object[] {
_datasetId, _descriptor.toString(), _numFilters, _numComputations, _numVisualizations, _analysisId
Expand Down Expand Up @@ -61,6 +70,22 @@ public JSONObject getDescriptor() {
return _descriptor;
}

public String getAnalysisId() {
return _analysisId;
}

public int getNumFilters() {
return _numFilters;
}

public int getNumComputations() {
return _numComputations;
}

public int getNumVisualizations() {
return _numVisualizations;
}

/**
* Sets a new descriptor and refreshes the stats (number of filters, computations,
* and visualizations) on this analysis.
Expand All @@ -72,6 +97,10 @@ public void setDescriptor(JSONObject descriptor) {
refreshStats();
}

public void setDatasetId(String datasetId) {
_datasetId = datasetId;
}

public void refreshStats() {
_numFilters = _descriptor.getJSONObject("subset").getJSONArray("descriptor").length();
JSONArray computations = Optional.ofNullable(_descriptor.optJSONArray("computations")).orElse(new JSONArray());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins;

import com.fasterxml.jackson.databind.JsonNode;
import org.apache.log4j.Logger;
import org.gusdb.fgputil.json.JsonUtil;
import org.gusdb.wdk.model.WdkModel;
import org.gusdb.wdk.model.fix.VdiMigrationFileReader;
import org.gusdb.wdk.model.fix.table.TableRowInterfaces;
import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater;
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

public class VDIMigrationPlugin extends AbstractAnalysisUpdater {
private static final Logger LOG = Logger.getLogger(VDIMigrationPlugin.class);
public static final String UD_DATASET_ID_PREFIX = "EDAUD_";

private Map<String, String> legacyIdToVdiId;
private final AtomicInteger missingFromVdiCount = new AtomicInteger(0);

@Override
public TableRowInterfaces.RowResult<AnalysisRow> processRecord(AnalysisRow nextRow) throws Exception {
final String legacyDatasetId = nextRow.getDatasetId();
final String legacyUdId = legacyDatasetId.replace(UD_DATASET_ID_PREFIX, "");
final String vdiId = legacyIdToVdiId.get(legacyUdId);

if (vdiId == null) {
LOG.warn("Unable to find legacy ID " + legacyUdId + " in the tinydb file.");
missingFromVdiCount.incrementAndGet();
return new TableRowInterfaces.RowResult<>(nextRow);
}

// Append UD prefix to VDI ID. The prefix is prepended in the view that maps stable VDI IDs to the unstable study
// ID, which is the currency of EDA.
final String vdiDatasetId = UD_DATASET_ID_PREFIX + vdiId;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we were planning to change the previx to VDIID_ or something once we moved to it? Weren't there multiple hour-long conversations about VD vs VDID vs VDIID?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just for referring to EDA user datasets that are managed by VDI. I think keeping it as UD should be fine. We discussed in scrum this morning.

nextRow.setDatasetId(vdiDatasetId);

return new TableRowInterfaces.RowResult<>(nextRow)
.setShouldWrite(_writeToDb);
}

@Override
public void dumpStatistics() {
if (missingFromVdiCount.get() > 0) {
LOG.warn("Failed to migrate " + missingFromVdiCount + " datasets, they were not found in the provided tinydb file.");
}
}

@Override
public void configure(WdkModel wdkModel, List<String> additionalArgs) throws Exception {
// Parse args in the format --<argname>=<argvalue>
final Map<String, String> args = additionalArgs.stream()
.map(arg -> Arrays.stream(arg.split("="))
.map(String::trim) // Trim whitespace from args
.collect(Collectors.toList()))
.collect(Collectors.toMap(
pair -> pair.get(0),
pair -> pair.size() > 1 ? pair.get(1) : "true")); // A flag without an "=" is a boolean. Set true if present.

// Validate required arg.
if (!args.containsKey("--tinyDb")) {
throw new IllegalArgumentException("Missing required flag --tinyDb");
}

final File tinyDbFile = new File(args.get("--tinyDb"));
VdiMigrationFileReader reader = new VdiMigrationFileReader(tinyDbFile);

this.legacyIdToVdiId = reader.readLegacyStudyIdToVdiId();

// Default to dryrun to avoid incidental migrations when testing.
this._writeToDb = Boolean.parseBoolean(args.getOrDefault("--liveRun", "false"));
}

/**
* Parse the tinydb file into a map of legacy UD identifiers to VDI identifiers.
*
* Example file format:
*
* {
* "_default": {
* "1": {
* "type": "owner",
* "udId": 1234,
* "vdiId": "123XyZ",
* "msg": null,
* "time": "Fri Mar 26 00:00:00 2024"
* }
* }
*
* @param tinyDbFile TinyDB file, output of the migration script run to migrate legacy UDs into VDI.
* @return Map of legacy UD Ids to VDI Ids.
*/
private Map<String, String> readLegacyStudyIdToVdiId(File tinyDbFile) {
try {
JsonNode root = JsonUtil.Jackson.readTree(tinyDbFile);
JsonNode dbRoot = root.get("_default");

Map<String, String> mapping = new HashMap<>();
Iterator<Map.Entry<String, JsonNode>> fieldIterator = dbRoot.fields();

// Iterate through each field in the "_default" node.
// Ignore the numeric index keys and extract the udId and vdiId fields to create mapping.
while (fieldIterator.hasNext()) {
Map.Entry<String, JsonNode> entry = fieldIterator.next();
mapping.put(entry.getValue().get("udId").asText(), entry.getValue().get("vdiId").asText());
}

LOG.info("Extracted a mapping of " + mapping.size() + " legacy to VDI identifiers.");
return mapping;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins;

import org.gusdb.wdk.model.WdkModel;
import org.gusdb.wdk.model.fix.table.TableRowInterfaces;
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow;
import org.json.JSONObject;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;

import java.io.File;
import java.util.List;
import java.util.Objects;

public class VDIMigrationPluginTest {
private WdkModel mockedModel;
private ClassLoader classLoader;

@Before
public void setup() {
classLoader = getClass().getClassLoader();
mockedModel = Mockito.mock(WdkModel.class);
}

@Test
public void testUpdateEnabled() throws Exception {
final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile());
final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin();
final List<String> args = List.of("--tinyDb=" + file.getPath());
migrationPlugin.configure(mockedModel, args);
TableRowInterfaces.RowResult<AnalysisRow> result = migrationPlugin.processRecord(
new AnalysisRow("x",
"EDAUD_1234",
new JSONObject(),
3,
4,
5));
Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So maybe also check that update flag is true here. And can add a case where it should not be true.

Assert.assertFalse(result.shouldWrite());
}

@Test
public void testUpdateDisabled() throws Exception {
final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile());
final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin();
final List<String> args = List.of("--tinyDb=" + file.getPath(), "--liveRun");
migrationPlugin.configure(mockedModel, args);
TableRowInterfaces.RowResult<AnalysisRow> result = migrationPlugin.processRecord(
new AnalysisRow("x",
"EDAUD_1234",
new JSONObject(),
3,
4,
5));
Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId());
Assert.assertTrue(result.shouldWrite());
}
}
11 changes: 11 additions & 0 deletions Model/src/test/resources/migration-unit-test-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"_default": {
"1": {
"type": "owner",
"udId": 1234,
"vdiId": "123XyZ",
"msg": null,
"time": "Fri Mar 26 00:00:00 2024"
}
}
}
Loading