-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement analysis migration for VDI #83
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import org.apache.log4j.Logger; | ||
import org.gusdb.fgputil.json.JsonUtil; | ||
import org.gusdb.wdk.model.WdkModel; | ||
import org.gusdb.wdk.model.fix.table.TableRowInterfaces; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.HashMap; | ||
import java.util.Iterator; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
|
||
public class VDIMigrationPlugin extends AbstractAnalysisUpdater { | ||
private static final Logger LOG = Logger.getLogger(VDIMigrationPlugin.class); | ||
public static final String UD_DATASET_ID_PREFIX = "EDAUD_"; | ||
|
||
private Map<String, String> legacyIdToVdiId; | ||
private int missingFromVdiCount = 0; | ||
|
||
@Override | ||
public TableRowInterfaces.RowResult<AnalysisRow> processRecord(AnalysisRow nextRow) throws Exception { | ||
final String legacyDatasetId = nextRow.getDatasetId(); | ||
final String legacyUdId = legacyDatasetId.replace(UD_DATASET_ID_PREFIX, ""); | ||
final String vdiId = legacyIdToVdiId.get(legacyUdId); | ||
|
||
if (vdiId == null) { | ||
LOG.warn("Unable to find legacy ID " + legacyUdId + " in the tinydb file."); | ||
missingFromVdiCount++; | ||
return new TableRowInterfaces.RowResult<>(nextRow); | ||
} | ||
|
||
// Append UD prefix to VDI ID. The prefix is prepended in the view that maps stable VDI IDs to the unstable study | ||
// ID, which is the currency of EDA. | ||
final String vdiDatasetId = UD_DATASET_ID_PREFIX + vdiId; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought we were planning to change the previx to VDIID_ or something once we moved to it? Weren't there multiple hour-long conversations about VD vs VDID vs VDIID? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just for referring to EDA user datasets that are managed by VDI. I think keeping it as UD should be fine. We discussed in scrum this morning. |
||
|
||
// Create a copy with just the dataset ID updated to VDI counterpart. | ||
AnalysisRow out = new AnalysisRow(nextRow.getAnalysisId(), vdiDatasetId, nextRow.getDescriptor(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't have to do this here. You could just make a datasetId setter and modify nextRow. The incoming object is a throwaway to the "framework". Might save time. |
||
nextRow.getNumFilters(), nextRow.getNumComputations(), nextRow.getNumVisualizations()); | ||
|
||
return new TableRowInterfaces.RowResult<>(out); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Want to set the result object's update flag to true here unless _writeToDb is false. |
||
} | ||
|
||
@Override | ||
public void dumpStatistics() { | ||
if (missingFromVdiCount > 0) { | ||
LOG.warn("Failed to migrate " + missingFromVdiCount + " datasets, they were not found in the provided tinydb file."); | ||
} | ||
} | ||
|
||
@Override | ||
public void configure(WdkModel wdkModel, List<String> additionalArgs) throws Exception { | ||
// Parse args in the format --<argname>=<argvalue> | ||
final Map<String, String> args = additionalArgs.stream() | ||
.map(arg -> Arrays.stream(arg.split("=")) | ||
.map(String::trim) // Trim whitespace from args | ||
.collect(Collectors.toList())) | ||
.collect(Collectors.toMap( | ||
pair -> pair.get(0), | ||
pair -> pair.size() > 1 ? pair.get(1) : "true")); // A flag without an "=" is a boolean. Set true if present. | ||
|
||
// Validate required arg. | ||
if (!args.containsKey("--tinyDb")) { | ||
throw new IllegalArgumentException("Missing required flag --tinyDb"); | ||
} | ||
final File tinyDbFile = new File(args.get("--tinyDb")); | ||
|
||
this.legacyIdToVdiId = readLegacyStudyIdToVdiId(tinyDbFile); | ||
|
||
// Default to dryrun to avoid incidental migrations when testing. | ||
this._writeToDb = Boolean.parseBoolean(args.getOrDefault("--liveRun", "false")); | ||
} | ||
|
||
/** | ||
* Parse the tinydb file into a map of legacy UD identifiers to VDI identifiers. | ||
* | ||
* Example file format: | ||
* | ||
* { | ||
* "_default": { | ||
* "1": { | ||
* "type": "owner", | ||
* "udId": 1234, | ||
* "vdiId": "123XyZ", | ||
* "msg": null, | ||
* "time": "Fri Mar 26 00:00:00 2024" | ||
* } | ||
* } | ||
* | ||
* @param tinyDbFile TinyDB file, output of the migration script run to migrate legacy UDs into VDI. | ||
* @return Map of legacy UD Ids to VDI Ids. | ||
*/ | ||
private Map<String, String> readLegacyStudyIdToVdiId(File tinyDbFile) { | ||
try { | ||
JsonNode root = JsonUtil.Jackson.readTree(tinyDbFile); | ||
JsonNode dbRoot = root.get("_default"); | ||
|
||
Map<String, String> mapping = new HashMap<>(); | ||
Iterator<Map.Entry<String, JsonNode>> fieldIterator = dbRoot.fields(); | ||
|
||
// Iterate through each field in the "_default" node. | ||
// Ignore the numeric index keys and extract the udId and vdiId fields to create mapping. | ||
while (fieldIterator.hasNext()) { | ||
Map.Entry<String, JsonNode> entry = fieldIterator.next(); | ||
mapping.put(entry.getValue().get("udId").asText(), entry.getValue().get("vdiId").asText()); | ||
} | ||
|
||
LOG.info("Extracted a mapping of " + mapping.size() + " legacy to VDI identifiers."); | ||
return mapping; | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
private enum CliArg { | ||
|
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; | ||
|
||
import org.gusdb.wdk.model.WdkModel; | ||
import org.gusdb.wdk.model.fix.table.TableRowInterfaces; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; | ||
import org.json.JSONObject; | ||
import org.junit.Assert; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
import org.mockito.Mockito; | ||
|
||
import java.io.File; | ||
import java.util.List; | ||
import java.util.Objects; | ||
|
||
public class VDIMigrationPluginTest { | ||
private WdkModel mockedModel; | ||
private ClassLoader classLoader; | ||
|
||
@Before | ||
public void setup() { | ||
classLoader = getClass().getClassLoader(); | ||
mockedModel = Mockito.mock(WdkModel.class); | ||
} | ||
|
||
@Test | ||
public void test() throws Exception { | ||
final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile()); | ||
final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin(); | ||
final List<String> args = List.of("--tinyDb=" + file.getPath()); | ||
migrationPlugin.configure(mockedModel, args); | ||
TableRowInterfaces.RowResult<AnalysisRow> result = migrationPlugin.processRecord( | ||
new AnalysisRow("x", | ||
"EDAUD_1234", | ||
new JSONObject(), | ||
3, | ||
4, | ||
5)); | ||
Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So maybe also check that update flag is true here. And can add a case where it should not be true. |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"_default": { | ||
"1": { | ||
"type": "owner", | ||
"udId": 1234, | ||
"vdiId": "123XyZ", | ||
"msg": null, | ||
"time": "Fri Mar 26 00:00:00 2024" | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably want AtomicInteger here. I don't think I've consistently used it in the past and it's sorta been ok, but...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Forgot the executor was multi-threaded, fixed