-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement analysis migration for VDI (#83)
- Loading branch information
Showing
7 changed files
with
285 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
57 changes: 57 additions & 0 deletions
57
Model/src/main/java/org/gusdb/wdk/model/fix/VdiMigrationFileReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
package org.gusdb.wdk.model.fix; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import org.gusdb.fgputil.json.JsonUtil; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.HashMap; | ||
import java.util.Iterator; | ||
import java.util.Map; | ||
|
||
public class VdiMigrationFileReader { | ||
private File file; | ||
|
||
public VdiMigrationFileReader(File file) { | ||
this.file = file; | ||
} | ||
|
||
/** | ||
* Parse the tinydb file into a map of legacy UD identifiers to VDI identifiers. | ||
* | ||
* Example file format: | ||
* | ||
* { | ||
* "_default": { | ||
* "1": { | ||
* "type": "owner", | ||
* "udId": 1234, | ||
* "vdiId": "123XyZ", | ||
* "msg": null, | ||
* "time": "Fri Mar 26 00:00:00 2024" | ||
* } | ||
* } | ||
* | ||
* @return Map of legacy UD Ids to VDI Ids. | ||
*/ | ||
public Map<String, String> readLegacyStudyIdToVdiId() { | ||
try { | ||
JsonNode root = JsonUtil.Jackson.readTree(file); | ||
JsonNode dbRoot = root.get("_default"); | ||
|
||
Map<String, String> mapping = new HashMap<>(); | ||
Iterator<Map.Entry<String, JsonNode>> fieldIterator = dbRoot.fields(); | ||
|
||
// Iterate through each field in the "_default" node. | ||
// Ignore the numeric index keys and extract the udId and vdiId fields to create mapping. | ||
while (fieldIterator.hasNext()) { | ||
Map.Entry<String, JsonNode> entry = fieldIterator.next(); | ||
mapping.put(entry.getValue().get("udId").asText(), entry.getValue().get("vdiId").asText()); | ||
} | ||
|
||
return mapping; | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
122 changes: 122 additions & 0 deletions
122
...l/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import org.apache.log4j.Logger; | ||
import org.gusdb.fgputil.json.JsonUtil; | ||
import org.gusdb.wdk.model.WdkModel; | ||
import org.gusdb.wdk.model.fix.VdiMigrationFileReader; | ||
import org.gusdb.wdk.model.fix.table.TableRowInterfaces; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.HashMap; | ||
import java.util.Iterator; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
import java.util.stream.Collectors; | ||
|
||
public class VDIMigrationPlugin extends AbstractAnalysisUpdater { | ||
private static final Logger LOG = Logger.getLogger(VDIMigrationPlugin.class); | ||
public static final String UD_DATASET_ID_PREFIX = "EDAUD_"; | ||
|
||
private Map<String, String> legacyIdToVdiId; | ||
private final AtomicInteger missingFromVdiCount = new AtomicInteger(0); | ||
|
||
@Override | ||
public TableRowInterfaces.RowResult<AnalysisRow> processRecord(AnalysisRow nextRow) throws Exception { | ||
final String legacyDatasetId = nextRow.getDatasetId(); | ||
final String legacyUdId = legacyDatasetId.replace(UD_DATASET_ID_PREFIX, ""); | ||
final String vdiId = legacyIdToVdiId.get(legacyUdId); | ||
|
||
if (vdiId == null) { | ||
LOG.warn("Unable to find legacy ID " + legacyUdId + " in the tinydb file."); | ||
missingFromVdiCount.incrementAndGet(); | ||
return new TableRowInterfaces.RowResult<>(nextRow); | ||
} | ||
|
||
// Append UD prefix to VDI ID. The prefix is prepended in the view that maps stable VDI IDs to the unstable study | ||
// ID, which is the currency of EDA. | ||
final String vdiDatasetId = UD_DATASET_ID_PREFIX + vdiId; | ||
nextRow.setDatasetId(vdiDatasetId); | ||
|
||
return new TableRowInterfaces.RowResult<>(nextRow) | ||
.setShouldWrite(_writeToDb); | ||
} | ||
|
||
@Override | ||
public void dumpStatistics() { | ||
if (missingFromVdiCount.get() > 0) { | ||
LOG.warn("Failed to migrate " + missingFromVdiCount + " datasets, they were not found in the provided tinydb file."); | ||
} | ||
} | ||
|
||
@Override | ||
public void configure(WdkModel wdkModel, List<String> additionalArgs) throws Exception { | ||
// Parse args in the format --<argname>=<argvalue> | ||
final Map<String, String> args = additionalArgs.stream() | ||
.map(arg -> Arrays.stream(arg.split("=")) | ||
.map(String::trim) // Trim whitespace from args | ||
.collect(Collectors.toList())) | ||
.collect(Collectors.toMap( | ||
pair -> pair.get(0), | ||
pair -> pair.size() > 1 ? pair.get(1) : "true")); // A flag without an "=" is a boolean. Set true if present. | ||
|
||
// Validate required arg. | ||
if (!args.containsKey("--tinyDb")) { | ||
throw new IllegalArgumentException("Missing required flag --tinyDb"); | ||
} | ||
|
||
final File tinyDbFile = new File(args.get("--tinyDb")); | ||
VdiMigrationFileReader reader = new VdiMigrationFileReader(tinyDbFile); | ||
|
||
this.legacyIdToVdiId = reader.readLegacyStudyIdToVdiId(); | ||
|
||
// Default to dryrun to avoid incidental migrations when testing. | ||
this._writeToDb = Boolean.parseBoolean(args.getOrDefault("--liveRun", "false")); | ||
} | ||
|
||
/** | ||
* Parse the tinydb file into a map of legacy UD identifiers to VDI identifiers. | ||
* | ||
* Example file format: | ||
* | ||
* { | ||
* "_default": { | ||
* "1": { | ||
* "type": "owner", | ||
* "udId": 1234, | ||
* "vdiId": "123XyZ", | ||
* "msg": null, | ||
* "time": "Fri Mar 26 00:00:00 2024" | ||
* } | ||
* } | ||
* | ||
* @param tinyDbFile TinyDB file, output of the migration script run to migrate legacy UDs into VDI. | ||
* @return Map of legacy UD Ids to VDI Ids. | ||
*/ | ||
private Map<String, String> readLegacyStudyIdToVdiId(File tinyDbFile) { | ||
try { | ||
JsonNode root = JsonUtil.Jackson.readTree(tinyDbFile); | ||
JsonNode dbRoot = root.get("_default"); | ||
|
||
Map<String, String> mapping = new HashMap<>(); | ||
Iterator<Map.Entry<String, JsonNode>> fieldIterator = dbRoot.fields(); | ||
|
||
// Iterate through each field in the "_default" node. | ||
// Ignore the numeric index keys and extract the udId and vdiId fields to create mapping. | ||
while (fieldIterator.hasNext()) { | ||
Map.Entry<String, JsonNode> entry = fieldIterator.next(); | ||
mapping.put(entry.getValue().get("udId").asText(), entry.getValue().get("vdiId").asText()); | ||
} | ||
|
||
LOG.info("Extracted a mapping of " + mapping.size() + " legacy to VDI identifiers."); | ||
return mapping; | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
} |
59 changes: 59 additions & 0 deletions
59
...c/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; | ||
|
||
import org.gusdb.wdk.model.WdkModel; | ||
import org.gusdb.wdk.model.fix.table.TableRowInterfaces; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; | ||
import org.json.JSONObject; | ||
import org.junit.Assert; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
import org.mockito.Mockito; | ||
|
||
import java.io.File; | ||
import java.util.List; | ||
import java.util.Objects; | ||
|
||
public class VDIMigrationPluginTest { | ||
private WdkModel mockedModel; | ||
private ClassLoader classLoader; | ||
|
||
@Before | ||
public void setup() { | ||
classLoader = getClass().getClassLoader(); | ||
mockedModel = Mockito.mock(WdkModel.class); | ||
} | ||
|
||
@Test | ||
public void testUpdateEnabled() throws Exception { | ||
final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile()); | ||
final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin(); | ||
final List<String> args = List.of("--tinyDb=" + file.getPath()); | ||
migrationPlugin.configure(mockedModel, args); | ||
TableRowInterfaces.RowResult<AnalysisRow> result = migrationPlugin.processRecord( | ||
new AnalysisRow("x", | ||
"EDAUD_1234", | ||
new JSONObject(), | ||
3, | ||
4, | ||
5)); | ||
Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId()); | ||
Assert.assertFalse(result.shouldWrite()); | ||
} | ||
|
||
@Test | ||
public void testUpdateDisabled() throws Exception { | ||
final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile()); | ||
final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin(); | ||
final List<String> args = List.of("--tinyDb=" + file.getPath(), "--liveRun"); | ||
migrationPlugin.configure(mockedModel, args); | ||
TableRowInterfaces.RowResult<AnalysisRow> result = migrationPlugin.processRecord( | ||
new AnalysisRow("x", | ||
"EDAUD_1234", | ||
new JSONObject(), | ||
3, | ||
4, | ||
5)); | ||
Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId()); | ||
Assert.assertTrue(result.shouldWrite()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"_default": { | ||
"1": { | ||
"type": "owner", | ||
"udId": 1234, | ||
"vdiId": "123XyZ", | ||
"msg": null, | ||
"time": "Fri Mar 26 00:00:00 2024" | ||
} | ||
} | ||
} |