Skip to content

Commit

Permalink
Merge pull request #83 from waterflow80/md-table-element-collection
Browse files Browse the repository at this point in the history
Added a metadata table - one-to-many relationship enhanced
  • Loading branch information
waterflow80 authored May 22, 2024
2 parents 07a38d8 + ea6e9e6 commit e9548b9
Show file tree
Hide file tree
Showing 12 changed files with 151 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,16 @@ public ResponseEntity<?> getSeqColByDigestAndLevel(
required = true) @PathVariable String digest,
@Parameter(name = "level",
description = "The desired output's level (1 or 2)",
example = "1") @RequestParam(required = false) String level) {
example = "1") @RequestParam(required = false) String level,
@Parameter(name = "metadata",
description = "A boolean value that indicates if we need the metadata of the given seqcol digest",
example = "true, 1, yes")
@RequestParam(required = false, defaultValue = "false") boolean metadata) {
if (metadata) {
return new ResponseEntity<>(
seqColService.getSeqColMetadataBySeqColDigest(digest), HttpStatus.OK
);
}
if (level == null) level = "none";
try {
switch (level) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ public abstract class SeqColEntity {

protected String digest; // The level 0 digest

protected NamingConvention namingConvention;


public enum NamingConvention {
ENA, GENBANK, UCSC, TEST
Expand Down
17 changes: 0 additions & 17 deletions src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColId.java

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,21 @@
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne;

import javax.persistence.Basic;
import javax.persistence.CollectionTable;
import javax.persistence.Column;
import javax.persistence.ElementCollection;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.FetchType;
import javax.persistence.Id;
import javax.persistence.IdClass;
import javax.persistence.JoinColumn;
import javax.persistence.Table;
import java.util.HashSet;
import java.util.Set;

@Entity
@NoArgsConstructor
@Data
@Table(name = "sequence_collections_L1")
@IdClass(SeqColId.class)
public class SeqColLevelOneEntity extends SeqColEntity{

@Id
Expand All @@ -32,15 +33,19 @@ public class SeqColLevelOneEntity extends SeqColEntity{
@Basic(fetch = FetchType.LAZY)
private JSONLevelOne seqColLevel1Object;

@Id
@Column(nullable = false)
@Enumerated(EnumType.STRING)
protected NamingConvention namingConvention;
@ElementCollection(fetch = FetchType.LAZY)
@CollectionTable(name = "seqcol_md", joinColumns =
@JoinColumn(name = "digest", nullable = false, updatable = false))
private Set<SeqColMetadataEntity> metadata;

public SeqColLevelOneEntity(String digest, NamingConvention namingConvention, JSONLevelOne jsonLevelOne){
super(digest, namingConvention);
public SeqColLevelOneEntity(String digest, JSONLevelOne jsonLevelOne){
super(digest);
this.seqColLevel1Object = jsonLevelOne;
this.namingConvention = namingConvention;
}

public void addMetadata(SeqColMetadataEntity seqColMetadataEntity){
if(metadata == null) metadata = new HashSet<>();
metadata.add(seqColMetadataEntity);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,4 @@ public SeqColLevelTwoEntity setDigest(String digest) {
this.digest = digest;
return this;
}

public SeqColLevelTwoEntity setNamingConvention(NamingConvention convention) {
this.namingConvention = convention;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package uk.ac.ebi.eva.evaseqcol.entities;

import lombok.Data;
import lombok.NoArgsConstructor;
import org.hibernate.annotations.CreationTimestamp;

import javax.persistence.Column;
import javax.persistence.Embeddable;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import java.util.Date;

@Data
@Embeddable
@NoArgsConstructor
public class SeqColMetadataEntity {

@Column(name = "source_id")
private String sourceIdentifier; // Eg: INSDC Acession

@Column(name = "source_url")
private String sourceUrl;

@Enumerated(EnumType.STRING)
@Column(name = "naming_convention")
private SeqColEntity.NamingConvention namingConvention;

@Column(name = "created_on", nullable = false)
@Temporal(TemporalType.TIMESTAMP)
@CreationTimestamp
private Date createdOn = new Date();

public SeqColMetadataEntity(String sourceIdentifier, String sourceUrl, SeqColEntity.NamingConvention namingConvention,
Date createdOn) {
this.sourceIdentifier = sourceIdentifier;
this.sourceUrl = sourceUrl;
this.namingConvention = namingConvention;
this.createdOn = createdOn;
}

public SeqColMetadataEntity setNamingConvention(SeqColEntity.NamingConvention namingConvention) {
this.namingConvention = namingConvention;
return this;
}

public SeqColMetadataEntity setSourceIdentifier(String sourceIdentifier) {
this.sourceIdentifier = sourceIdentifier;
return this;
}

public SeqColMetadataEntity setSourceUrl(String sourceUrl) {
this.sourceUrl = sourceUrl;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package uk.ac.ebi.eva.evaseqcol.repo;

import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.stereotype.Repository;

import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;

import java.util.List;

@Repository
public interface SeqColLevelOneRepository extends JpaRepository<SeqColLevelOneEntity, String> {
SeqColLevelOneEntity findSeqColLevelOneEntityByDigest(String digest);
Expand All @@ -14,4 +17,10 @@ public interface SeqColLevelOneRepository extends JpaRepository<SeqColLevelOneEn
void removeSeqColLevelOneEntityByDigest(String digest);

void deleteAll();

@Query(value = "select source_id, source_url, naming_convention, created_on from seqcol_md where digest = ?1", nativeQuery = true)
List<Object[]> findMetadataBySeqColDigest(String digest);

@Query(value = "select source_id, source_url, naming_convention, created_on from seqcol_md", nativeQuery = true)
List<Object[]> findAllMetadata();
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,21 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.digests.DigestCalculator;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataEntity;
import uk.ac.ebi.eva.evaseqcol.repo.SeqColLevelOneRepository;
import uk.ac.ebi.eva.evaseqcol.utils.JSONExtData;
import uk.ac.ebi.eva.evaseqcol.utils.JSONIntegerListExtData;
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne;
import uk.ac.ebi.eva.evaseqcol.utils.JSONStringListExtData;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

@Service
public class SeqColLevelOneService {
Expand Down Expand Up @@ -62,12 +66,17 @@ public List<SeqColLevelOneEntity> getAllSeqColLevelOneObjects(){

/**
* Construct a seqCol level 1 entity out of three seqCol level 2 entities that
* hold names, lengths and sequences objects*/
* hold names, lengths and sequences objects
* TODO: Change the signature of this method and make it accept metadata object instead of namingconvention and source id*/
public SeqColLevelOneEntity constructSeqColLevelOne(List<SeqColExtendedDataEntity<List<String>>> stringListExtendedDataEntities,
List<SeqColExtendedDataEntity<List<Integer>>> integerListExtendedDataEntities,
SeqColEntity.NamingConvention convention) throws IOException {
SeqColEntity.NamingConvention convention, String sourceId) throws IOException {
SeqColLevelOneEntity levelOneEntity = new SeqColLevelOneEntity();
JSONLevelOne jsonLevelOne = new JSONLevelOne();
SeqColMetadataEntity metadata = new SeqColMetadataEntity()
.setNamingConvention(convention)
.setSourceIdentifier(sourceId);
levelOneEntity.addMetadata(metadata);

// Looping over List<String> types
for (SeqColExtendedDataEntity<List<String>> dataEntity: stringListExtendedDataEntities) {
Expand Down Expand Up @@ -99,14 +108,13 @@ public SeqColLevelOneEntity constructSeqColLevelOne(List<SeqColExtendedDataEntit
levelOneEntity.setSeqColLevel1Object(jsonLevelOne);
String digest0 = digestCalculator.getSha512Digest(levelOneEntity.toString());
levelOneEntity.setDigest(digest0);
levelOneEntity.setNamingConvention(convention);
return levelOneEntity;
}

/**
* Construct a Level 1 seqCol out of a Level 2 seqCol*/
public SeqColLevelOneEntity constructSeqColLevelOne(
SeqColLevelTwoEntity levelTwoEntity, SeqColEntity.NamingConvention convention) throws IOException {
SeqColLevelTwoEntity levelTwoEntity, SeqColEntity.NamingConvention convention, String sourceId) throws IOException {
DigestCalculator digestCalculator = new DigestCalculator();
JSONExtData<List<String>> sequencesExtData = new JSONStringListExtData(levelTwoEntity.getSequences());
JSONExtData<List<Integer>> lengthsExtData = new JSONIntegerListExtData(levelTwoEntity.getLengths());
Expand Down Expand Up @@ -151,7 +159,7 @@ public SeqColLevelOneEntity constructSeqColLevelOne(
lengthsExtEntity
);

return constructSeqColLevelOne(stringListExtendedDataEntities,integerListExtendedDataEntities, convention);
return constructSeqColLevelOne(stringListExtendedDataEntities,integerListExtendedDataEntities, convention, sourceId);
}

/**
Expand Down Expand Up @@ -208,4 +216,20 @@ public List<SeqColExtendedDataEntity<List<Integer>>> constructIntegerListExtData
return integerListExtendedDataEntities;
}

public SeqColMetadataEntity transformToMetadataEntity(Object[] tuple) {
return new SeqColMetadataEntity(
(String) tuple[0],
(String) tuple[1],
SeqColEntity.NamingConvention.valueOf((String) tuple[2]),
(Date) tuple[3]
);
}

public List<SeqColMetadataEntity> getAllMetadata() {
return repository.findAllMetadata().stream().map(this::transformToMetadataEntity).collect(Collectors.toList());
}

public List<SeqColMetadataEntity> getMetadataBySeqcolDigest(String digest) {
return repository.findMetadataBySeqColDigest(digest).stream().map(this::transformToMetadataEntity).collect(Collectors.toList());
}
}
18 changes: 15 additions & 3 deletions src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColExtendedDataEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataEntity;
import uk.ac.ebi.eva.evaseqcol.exception.AssemblyAlreadyIngestedException;
import uk.ac.ebi.eva.evaseqcol.exception.AssemblyNotFoundException;
import uk.ac.ebi.eva.evaseqcol.exception.AttributeNotDefinedException;
Expand Down Expand Up @@ -124,6 +125,10 @@ public Optional<? extends SeqColEntity> getSeqColByDigestAndLevel(String digest,
}
}

public List<SeqColMetadataEntity> getSeqColMetadataBySeqColDigest(String digest) {
return levelOneService.getMetadataBySeqcolDigest(digest);
}

/**
* Return the service info entity in a Map<String,Object> format
* @see 'https://seqcol.readthedocs.io/en/dev/specification/#21-service-info'
Expand Down Expand Up @@ -165,6 +170,13 @@ public IngestionResultEntity fetchAndInsertAllSeqColInFastaFile(String accession
* assembly report.
* Return the list of level 0 digests of the inserted seqcol objects*/
public IngestionResultEntity fetchAndInsertAllSeqColByAssemblyAccession(String assemblyAccession) throws IOException {
// Check for existing same source id
boolean sourceIdExists = levelOneService.getAllMetadata().stream()
.anyMatch(md -> md.getSourceIdentifier().equals(assemblyAccession));
if (sourceIdExists) {
logger.warn("Seqcol objects for assembly " + assemblyAccession + " have been already ingested... Nothing to ingest !");
throw new AssemblyAlreadyIngestedException(assemblyAccession);
}
Optional<Map<String, Object>> seqColDataMap = ncbiSeqColDataSource.getAllPossibleSeqColExtendedData(assemblyAccession);
return createSeqColObjectsAndInsert(seqColDataMap, assemblyAccession);
}
Expand Down Expand Up @@ -206,8 +218,8 @@ public IngestionResultEntity createSeqColObjectsAndInsert(Optional<Map<String, O

// Constructing seqCol Level One object
SeqColLevelOneEntity levelOneEntity = levelOneService.constructSeqColLevelOne(
seqColStringListExtDataEntities, seqColIntegerListExtDataEntities, extendedNamesEntity.getNamingConvention()
);
seqColStringListExtDataEntities, seqColIntegerListExtDataEntities, extendedNamesEntity.getNamingConvention(),
assemblyAccession);

try {
Optional<String> seqColDigest = insertSeqColL1AndL2( // TODO: Check for possible self invocation problem
Expand All @@ -227,7 +239,7 @@ public IngestionResultEntity createSeqColObjectsAndInsert(Optional<Map<String, O
}
}
if (ingestionResultEntity.getNumberOfInsertedSeqcols() == 0) {
logger.warn("Seqcol objects for assembly " + assemblyAccession + " has been already ingested");
logger.warn("Seqcol objects for assembly " + assemblyAccession + " have been already ingested");
throw new AssemblyAlreadyIngestedException(assemblyAccession);
} else {
return ingestionResultEntity;
Expand Down
10 changes: 8 additions & 2 deletions src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataEntity;
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne;

import java.util.Arrays;
Expand All @@ -14,20 +15,26 @@
@Component
public class SeqColGenerator {

private final String GCA_ACCESSION = "GCA_000146045.2";

/**
* Return an example (might not be real) of a seqCol object level 1
* The naming convention is set to GENBANK as a random choice*/
public SeqColLevelOneEntity generateLevelOneEntity() {
SeqColLevelOneEntity levelOneEntity = new SeqColLevelOneEntity();
JSONLevelOne jsonLevelOne = new JSONLevelOne();
SeqColMetadataEntity metadata = new SeqColMetadataEntity()
.setNamingConvention(SeqColEntity.NamingConvention.GENBANK)
.setSourceIdentifier(GCA_ACCESSION)
.setSourceUrl("https://test.ncbi.datasourece.uk");
levelOneEntity.addMetadata(metadata);
jsonLevelOne.setNames("mfxUkK3J5y7BGVW7hJWcJ3erxuaMX6xm");
jsonLevelOne.setSequences("dda3Kzi1Wkm2A8I99WietU1R8J4PL-D6");
jsonLevelOne.setLengths("Ms_ixPgQMJaM54dVntLWeovXSO7ljvZh");
jsonLevelOne.setMd5DigestsOfSequences("_6iaYtcWw4TZaowlL7_64Wu9mbHpDUw4");
jsonLevelOne.setSortedNameLengthPairs("QFuKs5Hh8uQwwUtnRxIf8W3zeJoFOp8Z");
levelOneEntity.setSeqColLevel1Object(jsonLevelOne);
levelOneEntity.setDigest("3mTg0tAA3PS-R1TzelLVWJ2ilUzoWfVq");
levelOneEntity.setNamingConvention(SeqColEntity.NamingConvention.GENBANK);
return levelOneEntity;
}

Expand Down Expand Up @@ -128,7 +135,6 @@ public SeqColLevelTwoEntity generateLevelTwoEntity() {
"YfHZgnpuJm4SN3RN4XL1VWWWZwTXtqw5"
));
levelTwoEntity.setDigest("3mTg0tAA3PS-R1TzelLVWJ2ilUzoWfVq");
levelTwoEntity.setNamingConvention(SeqColEntity.NamingConvention.GENBANK);
return levelTwoEntity;
}
}
4 changes: 2 additions & 2 deletions src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ public void create() throws IOException {
extendedIntegerListDataEntitiesUcsc =
(List<SeqColExtendedDataEntity<List<Integer>>>) ucscExtendedDataMap.get("integerListExtDataList");
levelOneEntityUcsc = levelOneService.constructSeqColLevelOne(
extendedStringListDataEntitiesUcsc, extendedIntegerListDataEntitiesUcsc, SeqColEntity.NamingConvention.UCSC);
extendedStringListDataEntitiesUcsc, extendedIntegerListDataEntitiesUcsc, SeqColEntity.NamingConvention.UCSC, GCA_ACCESSION);
Optional<String> resultDigestUcsc = seqColService.addFullSequenceCollection(
levelOneEntityUcsc, extendedStringListDataEntitiesUcsc, extendedIntegerListDataEntitiesUcsc);
if (resultDigestUcsc.isPresent()) {
Expand All @@ -163,7 +163,7 @@ public void create() throws IOException {
extendedIntegerListDataEntitiesGenbank = (List<SeqColExtendedDataEntity<List<Integer>>>) genbankExtendedDataMap.get("integerListExtDataList");

levelOneEntityGenbank = levelOneService.constructSeqColLevelOne(
extendedStringListDataEntitiesGenbank, extendedIntegerListDataEntitiesGenbank, SeqColEntity.NamingConvention.GENBANK);
extendedStringListDataEntitiesGenbank, extendedIntegerListDataEntitiesGenbank, SeqColEntity.NamingConvention.GENBANK, GCA_ACCESSION);
Optional<String> resultDigestGenbank = seqColService.addFullSequenceCollection(
levelOneEntityGenbank, extendedStringListDataEntitiesGenbank, extendedIntegerListDataEntitiesGenbank);
if (resultDigestGenbank.isPresent()) {
Expand Down
Loading

0 comments on commit e9548b9

Please sign in to comment.