Skip to content

Commit

Permalink
Merge pull request #70 from VEuPathDB/add-differential-expression
Browse files Browse the repository at this point in the history
add differential expression
  • Loading branch information
asizemore authored Dec 5, 2024
2 parents 5e7b101 + 078a499 commit 66c7075
Show file tree
Hide file tree
Showing 12 changed files with 415 additions and 4 deletions.
38 changes: 38 additions & 0 deletions api.raml
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,29 @@ securitySchemes:
body:
application/json: lib.DifferentialAbundanceStatsResponse

/differentialexpression:
post:
queryParameters:
autostart:
type: boolean
required: false
default: true
body:
application/json: lib.DifferentialExpressionPluginRequest
responses:
200:
body:
application/json: lib.JobResponse
/statistics:
post:
body:
application/json: lib.DifferentialExpressionPluginRequest
responses:
200:
body:
application/json: lib.DifferentialExpressionStatsResponse


/correlation:
post:
queryParameters:
Expand Down Expand Up @@ -1276,6 +1299,21 @@ securitySchemes:
application/json:
type: lib.DifferentialAbundanceStatsResponse

/differentialexpression/visualizations:
displayName: Visualizations for interrogating differentially expressed genes

/volcanoplot:
post:
description: Returns data required to create a volcanoplot from a differential expression analysis.
body:
application/json:
type: lib.DifferentialExpressionVolcanoplotPostRequest
responses:
200:
body:
application/json:
type: lib.DifferentialExpressionStatsResponse

/correlation/visualizations:
displayName: Visualizations for discovering correlations between various assay data

Expand Down
44 changes: 42 additions & 2 deletions schema/library.raml
Original file line number Diff line number Diff line change
Expand Up @@ -1148,7 +1148,7 @@ types:
type: object
properties:
collectionVariable: CollectionSpec
comparator: ComparatorSpec
comparator: DifferentialAbundanceComparatorSpec
differentialAbundanceMethod: DifferentialAbundanceMethod
pValueFloor:
type: string
Expand All @@ -1159,7 +1159,7 @@ types:
- DESeq
- ANCOMBC
- Maaslin
ComparatorSpec:
DifferentialAbundanceComparatorSpec:
type: object
properties:
variable: VariableSpec
Expand All @@ -1177,6 +1177,41 @@ types:
properties:
effectSizeLabel: string
statistics: DifferentialAbundanceStats
DifferentialExpressionPluginRequest:
type: ComputeRequestBase
properties:
config: DifferentialExpressionComputeConfig
DifferentialExpressionComputeConfig:
type: object
properties:
collectionVariable: CollectionSpec
comparator: ComparatorSpec
differentialExpressionMethod: DifferentialExpressionMethod
pValueFloor:
type: string
required: false
DifferentialExpressionMethod:
type: string
enum:
- DESeq
ComparatorSpec:
type: object
properties:
variable: VariableSpec
groupA: LabeledRange[]
groupB: LabeledRange[]
DifferentialExpressionPoint:
type: object
properties:
effectSize: string
pValue: string
adjustedPValue: string
pointId: string
DifferentialExpressionStatsResponse:
type: object
properties:
effectSizeLabel: string
statistics: DifferentialExpressionPoint[]
ExamplePluginRequest:
type: ComputeRequestBase
properties:
Expand Down Expand Up @@ -1383,6 +1418,11 @@ types:
properties:
computeConfig: DifferentialAbundanceComputeConfig
config: EmptyDataPluginSpec
DifferentialExpressionVolcanoplotPostRequest:
type: DataPluginRequestBase
properties:
computeConfig: DifferentialExpressionComputeConfig
config: EmptyDataPluginSpec
ContinuousVariableMetadataPostRequest:
type: DataPluginRequestBase
additionalProperties: false
Expand Down
4 changes: 2 additions & 2 deletions schema/url/compute/computes/differentialAbundance.raml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ types:
DifferentialAbundanceComputeConfig:
properties:
collectionVariable: CollectionSpec
comparator: ComparatorSpec
comparator: DifferentialAbundanceComparatorSpec
differentialAbundanceMethod: DifferentialAbundanceMethod
pValueFloor:
type: string
Expand All @@ -20,7 +20,7 @@ types:
type: string
enum: ['DESeq','ANCOMBC','Maaslin']

ComparatorSpec:
DifferentialAbundanceComparatorSpec:
type: object
properties:
variable: VariableSpec
Expand Down
40 changes: 40 additions & 0 deletions schema/url/compute/computes/differentialExpression.raml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#%RAML 1.0 Library

types:

DifferentialExpressionPluginRequest:
type: ComputeRequestBase
properties:
config: DifferentialExpressionComputeConfig

DifferentialExpressionComputeConfig:
properties:
collectionVariable: CollectionSpec
comparator: ComparatorSpec
differentialExpressionMethod: DifferentialExpressionMethod
pValueFloor:
type: string
required: false

DifferentialExpressionMethod:
type: string
enum: ['DESeq']

ComparatorSpec:
type: object
properties:
variable: VariableSpec
groupA: LabeledRange[]
groupB: LabeledRange[]

DifferentialExpressionPoint:
properties:
effectSize: string
pValue: string
adjustedPValue: string
pointId: string

DifferentialExpressionStatsResponse:
properties:
effectSizeLabel: string
statistics: DifferentialExpressionPoint[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#%RAML 1.0 Library

types:

DifferentialExpressionVolcanoplotPostRequest:
type: DataPluginRequestBase
properties:
computeConfig: DifferentialExpressionComputeConfig
config: EmptyDataPluginSpec
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.veupathdb.service.eda.compute.plugins.betadiv.BetaDivPluginProvider;
import org.veupathdb.service.eda.compute.plugins.correlation.CorrelationPluginProvider;
import org.veupathdb.service.eda.compute.plugins.differentialabundance.DifferentialAbundancePluginProvider;
import org.veupathdb.service.eda.compute.plugins.differentialexpression.DifferentialExpressionPluginProvider;
import org.veupathdb.service.eda.compute.plugins.example.ExamplePluginProvider;
import org.veupathdb.service.eda.compute.plugins.rankedabundance.RankedAbundancePluginProvider;
import org.veupathdb.service.eda.compute.plugins.selfcorrelation.SelfCorrelationPluginProvider;
Expand Down Expand Up @@ -133,6 +134,23 @@ public PostComputesDifferentialabundanceStatisticsResponse postComputesDifferent
}));
}

@Override
public PostComputesDifferentialexpressionResponse postComputesDifferentialexpression(Boolean autostart, DifferentialExpressionPluginRequest entity) {
return PostComputesDifferentialexpressionResponse.respond200WithApplicationJson(submitJob(new DifferentialExpressionPluginProvider(), entity, autostart));
}

@Override
public PostComputesDifferentialexpressionStatisticsResponse postComputesDifferentialexpressionStatistics(DifferentialExpressionPluginRequest entity) {
return PostComputesDifferentialexpressionStatisticsResponse.respond200WithApplicationJson(new DifferentialExpressionStatsResponseStream(out -> {
try {
getResultFileStreamer(new DifferentialExpressionPluginProvider(), STATISTICS, entity).write(out);
}
catch (IOException e) {
throw new RuntimeException(e);
}
}));
}

@Override
public PostComputesCorrelationResponse postComputesCorrelation(Boolean autostart, CorrelationPluginRequest entity) {
return PostComputesCorrelationResponse.respond200WithApplicationJson(submitJob(new CorrelationPluginProvider(), entity, autostart));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.veupathdb.service.eda.compute.plugins.example.ExamplePluginProvider;
import org.veupathdb.service.eda.compute.plugins.rankedabundance.RankedAbundancePluginProvider;
import org.veupathdb.service.eda.compute.plugins.differentialabundance.DifferentialAbundancePluginProvider;
import org.veupathdb.service.eda.compute.plugins.differentialexpression.DifferentialExpressionPluginProvider;
import org.veupathdb.service.eda.generated.model.ComputeRequestBase;
import org.veupathdb.service.eda.generated.model.PluginOverview;
import org.veupathdb.service.eda.generated.model.PluginOverviewImpl;
Expand Down Expand Up @@ -50,6 +51,7 @@ public final class PluginRegistry {
new BetaDivPluginProvider(),
new RankedAbundancePluginProvider(),
new DifferentialAbundancePluginProvider(),
new DifferentialExpressionPluginProvider(),
new CorrelationPluginProvider(),
new SelfCorrelationPluginProvider()
);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package org.veupathdb.service.eda.compute.plugins.differentialexpression;

import org.gusdb.fgputil.ListBuilder;
import org.jetbrains.annotations.NotNull;
import org.veupathdb.service.eda.common.client.spec.StreamSpec;
import org.veupathdb.service.eda.common.model.CollectionDef;
import org.veupathdb.service.eda.common.model.EntityDef;
import org.veupathdb.service.eda.common.model.ReferenceMetadata;
import org.veupathdb.service.eda.common.model.VariableDef;
import org.veupathdb.service.eda.common.plugin.util.PluginUtil;
import org.veupathdb.service.eda.compute.RServe;
import org.veupathdb.service.eda.compute.plugins.AbstractPlugin;
import org.veupathdb.service.eda.compute.plugins.PluginContext;
import org.veupathdb.service.eda.generated.model.LabeledRange;
import org.veupathdb.service.eda.generated.model.DifferentialExpressionComputeConfig;
import org.veupathdb.service.eda.generated.model.DifferentialExpressionPluginRequest;
import org.veupathdb.service.eda.generated.model.CollectionSpec;
import org.veupathdb.service.eda.generated.model.VariableSpec;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import static org.veupathdb.service.eda.common.plugin.util.PluginUtil.singleQuote;

public class DifferentialExpressionPlugin extends AbstractPlugin<DifferentialExpressionPluginRequest, DifferentialExpressionComputeConfig> {

private static final String INPUT_DATA = "differential_expression_input";

public DifferentialExpressionPlugin(@NotNull PluginContext<DifferentialExpressionPluginRequest, DifferentialExpressionComputeConfig> context) {
super(context);
}

@NotNull
@Override
public List<StreamSpec> getStreamSpecs() {
return List.of(new StreamSpec(INPUT_DATA, getConfig().getCollectionVariable().getEntityId())
.addVars(getUtil().getCollectionMembers(getConfig().getCollectionVariable()))
.addVar(getConfig().getComparator().getVariable())
);
}

@Override
protected void execute() {

DifferentialExpressionComputeConfig computeConfig = getConfig();
PluginUtil util = getUtil();
ReferenceMetadata meta = getContext().getReferenceMetadata();

CollectionSpec collectionSpec = computeConfig.getCollectionVariable();
CollectionDef collection = meta.getCollection(collectionSpec).orElseThrow();
String collectionMemberType = collection.getMember() == null ? "unknown" : collection.getMember();
String entityId = collectionSpec.getEntityId();
EntityDef entity = meta.getEntity(entityId).orElseThrow();
VariableDef computeEntityIdVarSpec = util.getEntityIdVarSpec(entityId);
String computeEntityIdColName = util.toColNameOrEmpty(computeEntityIdVarSpec);
String method = computeConfig.getDifferentialExpressionMethod().getValue().equals("DESeq") ? "DESeq" : "unknown";
VariableSpec comparisonVariableSpec = computeConfig.getComparator().getVariable();
String comparisonVariableDataShape = util.getVariableDataShape(comparisonVariableSpec);
List<LabeledRange> groupA = computeConfig.getComparator().getGroupA();
List<LabeledRange> groupB = computeConfig.getComparator().getGroupB();
String pValueFloor = computeConfig.getPValueFloor() != null ? computeConfig.getPValueFloor() : "1e-200"; // Same default as set in the frontend and microbiomeComputations

// Get record id columns
List<VariableDef> idColumns = new ArrayList<>();
for (EntityDef ancestor : meta.getAncestors(entity)) {
idColumns.add(ancestor.getIdColumnDef());
}

HashMap<String, InputStream> dataStream = new HashMap<>();
dataStream.put(INPUT_DATA, getWorkspace().openStream(INPUT_DATA));

RServe.useRConnectionWithRemoteFiles(dataStream, connection -> {
connection.voidEval("print('starting differential expression computation')");

// Read in the count data
List<VariableSpec> computeInputVars = ListBuilder.asList(computeEntityIdVarSpec);
computeInputVars.addAll(util.getCollectionMembers(collectionSpec));
computeInputVars.addAll(idColumns);
connection.voidEval(util.getVoidEvalFreadCommand(INPUT_DATA, computeInputVars));
connection.voidEval("countData <- " + INPUT_DATA); // Renaming here so we can go get the sampleMetadata later

// Read in the sample metadata
List<VariableSpec> sampleMetadataVars = ListBuilder.asList(comparisonVariableSpec);
sampleMetadataVars.add(computeEntityIdVarSpec);
connection.voidEval(util.getVoidEvalFreadCommand(INPUT_DATA, sampleMetadataVars));
connection.voidEval("sampleMetadata <- veupathUtils::SampleMetadata(data = " + INPUT_DATA
+ ", recordIdColumn = " + singleQuote(computeEntityIdColName)
+ ")");


// Turn the list of id columns into an array of strings for R
List<String> dotNotatedIdColumns = idColumns.stream().map(VariableDef::toDotNotation).toList();
StringBuilder dotNotatedIdColumnsString = new StringBuilder("c(");
boolean first = true;
for (String idCol : dotNotatedIdColumns) {
if (first) {
first = false;
dotNotatedIdColumnsString.append(singleQuote(idCol));
} else {
dotNotatedIdColumnsString.append(",").append(singleQuote(idCol));
}
}
dotNotatedIdColumnsString.append(")");

// Turn the comparator bin lists into a string for R
String rGroupA = util.getRBinListAsString(groupA);
String rGroupB = util.getRBinListAsString(groupB);


// Create the comparator and input data objects
connection.voidEval("comparator <- veupathUtils::Comparator(" +
"variable=veupathUtils::VariableMetadata(" +
"variableSpec=veupathUtils::VariableSpec(" +
"variableId='" + comparisonVariableSpec.getVariableId() + "'," +
"entityId='" + comparisonVariableSpec.getEntityId() + "')," +
"dataShape = veupathUtils::DataShape(value = '" + comparisonVariableDataShape + "')" +
")," +
"groupA=" + rGroupA + "," +
"groupB=" + rGroupB +
")");

// TEMPORARY HACK FOR TESTING
// We dont have rnaseq data loaded yet (to my knowledge) so we are going to use mbio data and
// just convert it to counts. This is a hack and should be removed when we have real data
connection.voidEval("taxaColNames <- names(countData[, -c('" + computeEntityIdColName + "', as.character(" + dotNotatedIdColumnsString + "))])");
connection.voidEval("countData[, (taxaColNames) := lapply(.SD,function(x) {round(x*1000)}), .SDcols=taxaColNames]");
// END OF TEMP FOR TESTING

connection.voidEval("countDataCollection <- veupathUtils::CountDataCollection(name=" + singleQuote(collectionMemberType) +
", data=countData" +
", sampleMetadata=sampleMetadata" +
", recordIdColumn=" + singleQuote(computeEntityIdColName) +
", ancestorIdColumns=as.character(" + dotNotatedIdColumnsString + ")" +
", imputeZero=TRUE)");


connection.voidEval("computeResult <- veupathUtils::differentialExpression(" +
"collection=countDataCollection" +
", comparator=comparator" +
", method=" + singleQuote(method) +
", pValueFloor=as.numeric(" + singleQuote(pValueFloor) + ")" +
", verbose=TRUE)");


String statsCmd = "writeStatistics(computeResult, NULL, TRUE)";

getWorkspace().writeStatisticsResult(connection, statsCmd);
});
}
}
Loading

0 comments on commit 66c7075

Please sign in to comment.