Skip to content

Commit

Permalink
implemeted the one to many relationship for the metadata table using he
Browse files Browse the repository at this point in the history
  • Loading branch information
waterflow80 committed Apr 20, 2024
1 parent 07a38d8 commit 51a95ac
Show file tree
Hide file tree
Showing 12 changed files with 146 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ public ResponseEntity<?> getSeqColByDigestAndLevel(
return new ResponseEntity<>(HttpStatus.NOT_FOUND);
}

@GetMapping(value = "/collection/{digest}/metadata")
public ResponseEntity<?> getSeqColByDigestAndLevelMetadata(@PathVariable String digest) {
return new ResponseEntity<>(
seqColService.getSeqColMetadataBySeqColDigest(digest), HttpStatus.OK
);
}

@GetMapping("/service-info")
public ResponseEntity<?> getServiceInfo() {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ public abstract class SeqColEntity {

protected String digest; // The level 0 digest

protected NamingConvention namingConvention;


public enum NamingConvention {
ENA, GENBANK, UCSC, TEST
Expand Down
17 changes: 0 additions & 17 deletions src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColId.java

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,21 @@
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne;

import javax.persistence.Basic;
import javax.persistence.CollectionTable;
import javax.persistence.Column;
import javax.persistence.ElementCollection;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.FetchType;
import javax.persistence.Id;
import javax.persistence.IdClass;
import javax.persistence.JoinColumn;
import javax.persistence.Table;
import java.util.HashSet;
import java.util.Set;

@Entity
@NoArgsConstructor
@Data
@Table(name = "sequence_collections_L1")
@IdClass(SeqColId.class)
public class SeqColLevelOneEntity extends SeqColEntity{

@Id
Expand All @@ -32,15 +33,19 @@ public class SeqColLevelOneEntity extends SeqColEntity{
@Basic(fetch = FetchType.LAZY)
private JSONLevelOne seqColLevel1Object;

@Id
@Column(nullable = false)
@Enumerated(EnumType.STRING)
protected NamingConvention namingConvention;
@ElementCollection(fetch = FetchType.LAZY)
@CollectionTable(name = "seqcol_md", joinColumns =
@JoinColumn(name = "digest", nullable = false, updatable = false))
private Set<SeqColMetadataEntity> metadata;

public SeqColLevelOneEntity(String digest, NamingConvention namingConvention, JSONLevelOne jsonLevelOne){
super(digest, namingConvention);
public SeqColLevelOneEntity(String digest, JSONLevelOne jsonLevelOne){
super(digest);
this.seqColLevel1Object = jsonLevelOne;
this.namingConvention = namingConvention;
}

public void addMetadata(SeqColMetadataEntity seqColMetadataEntity){
if(metadata == null) metadata = new HashSet<>();
metadata.add(seqColMetadataEntity);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,4 @@ public SeqColLevelTwoEntity setDigest(String digest) {
this.digest = digest;
return this;
}

public SeqColLevelTwoEntity setNamingConvention(NamingConvention convention) {
this.namingConvention = convention;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package uk.ac.ebi.eva.evaseqcol.entities;

import lombok.Data;
import org.hibernate.annotations.CreationTimestamp;

import javax.persistence.Column;
import javax.persistence.Embeddable;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import java.util.Date;

@Data
@Embeddable
public class SeqColMetadataEntity {

@Column(name = "source_id")
private String sourceIdentifier; // Eg: INSDC Acession

@Column(name = "source_url")
private String sourceUrl;

@Enumerated(EnumType.STRING)
@Column(name = "naming_convention")
private SeqColEntity.NamingConvention namingConvention;

@Column(name = "timestamp", updatable = false, columnDefinition="TIMESTAMP DEFAULT CURRENT_TIMESTAMP")
@Temporal(TemporalType.TIMESTAMP)
@CreationTimestamp
private Date timestamp;

public SeqColMetadataEntity setNamingConvention(SeqColEntity.NamingConvention namingConvention) {
this.namingConvention = namingConvention;
return this;
}

public SeqColMetadataEntity setSourceIdentifier(String sourceIdentifier) {
this.sourceIdentifier = sourceIdentifier;
return this;
}

public SeqColMetadataEntity setSourceUrl(String sourceUrl) {
this.sourceUrl = sourceUrl;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
package uk.ac.ebi.eva.evaseqcol.repo;

import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.stereotype.Repository;

import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataEntity;

import java.util.List;

@Repository
public interface SeqColLevelOneRepository extends JpaRepository<SeqColLevelOneEntity, String> {
Expand All @@ -14,4 +18,10 @@ public interface SeqColLevelOneRepository extends JpaRepository<SeqColLevelOneEn
void removeSeqColLevelOneEntityByDigest(String digest);

void deleteAll();

@Query(value = "select source_id, source_url, naming_convention, timestamp from seqcol_md where digest = ?1", nativeQuery = true)
List<Object[]> findMetadataBySeqColDigest(String digest);

@Query(value = "select source_id, source_url, naming_convention, timestamp from seqcol_md", nativeQuery = true)
List<Object[]> findAllMetadata();
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.digests.DigestCalculator;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataEntity;
import uk.ac.ebi.eva.evaseqcol.repo.SeqColLevelOneRepository;
import uk.ac.ebi.eva.evaseqcol.utils.JSONExtData;
import uk.ac.ebi.eva.evaseqcol.utils.JSONIntegerListExtData;
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne;
import uk.ac.ebi.eva.evaseqcol.utils.JSONStringListExtData;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand Down Expand Up @@ -62,12 +65,17 @@ public List<SeqColLevelOneEntity> getAllSeqColLevelOneObjects(){

/**
* Construct a seqCol level 1 entity out of three seqCol level 2 entities that
* hold names, lengths and sequences objects*/
* hold names, lengths and sequences objects
* TODO: Change the signature of this method and make it accept metadata object instead of namingconvention and source id*/
public SeqColLevelOneEntity constructSeqColLevelOne(List<SeqColExtendedDataEntity<List<String>>> stringListExtendedDataEntities,
List<SeqColExtendedDataEntity<List<Integer>>> integerListExtendedDataEntities,
SeqColEntity.NamingConvention convention) throws IOException {
SeqColEntity.NamingConvention convention, String sourceId) throws IOException {
SeqColLevelOneEntity levelOneEntity = new SeqColLevelOneEntity();
JSONLevelOne jsonLevelOne = new JSONLevelOne();
SeqColMetadataEntity metadata = new SeqColMetadataEntity()
.setNamingConvention(convention)
.setSourceIdentifier(sourceId);
levelOneEntity.addMetadata(metadata);

// Looping over List<String> types
for (SeqColExtendedDataEntity<List<String>> dataEntity: stringListExtendedDataEntities) {
Expand Down Expand Up @@ -99,14 +107,13 @@ public SeqColLevelOneEntity constructSeqColLevelOne(List<SeqColExtendedDataEntit
levelOneEntity.setSeqColLevel1Object(jsonLevelOne);
String digest0 = digestCalculator.getSha512Digest(levelOneEntity.toString());
levelOneEntity.setDigest(digest0);
levelOneEntity.setNamingConvention(convention);
return levelOneEntity;
}

/**
* Construct a Level 1 seqCol out of a Level 2 seqCol*/
public SeqColLevelOneEntity constructSeqColLevelOne(
SeqColLevelTwoEntity levelTwoEntity, SeqColEntity.NamingConvention convention) throws IOException {
SeqColLevelTwoEntity levelTwoEntity, SeqColEntity.NamingConvention convention, String sourceId) throws IOException {
DigestCalculator digestCalculator = new DigestCalculator();
JSONExtData<List<String>> sequencesExtData = new JSONStringListExtData(levelTwoEntity.getSequences());
JSONExtData<List<Integer>> lengthsExtData = new JSONIntegerListExtData(levelTwoEntity.getLengths());
Expand Down Expand Up @@ -151,7 +158,7 @@ public SeqColLevelOneEntity constructSeqColLevelOne(
lengthsExtEntity
);

return constructSeqColLevelOne(stringListExtendedDataEntities,integerListExtendedDataEntities, convention);
return constructSeqColLevelOne(stringListExtendedDataEntities,integerListExtendedDataEntities, convention, sourceId);
}

/**
Expand Down Expand Up @@ -208,4 +215,28 @@ public List<SeqColExtendedDataEntity<List<Integer>>> constructIntegerListExtData
return integerListExtendedDataEntities;
}

public List<SeqColMetadataEntity> metadataObjectArrayListToMetadataList(List<Object[]> metadataArray) {
List<SeqColMetadataEntity> metadataList = new ArrayList<>();
for (Object[] metadataElements : metadataArray) {
SeqColMetadataEntity metadataEntity = new SeqColMetadataEntity();
metadataEntity.setSourceIdentifier((String) metadataElements[0]);
metadataEntity.setSourceUrl((String) metadataElements[1]);
metadataEntity.setNamingConvention(SeqColEntity.NamingConvention.valueOf(
(String) metadataElements[2]
));
metadataEntity.setTimestamp((Date) metadataElements[3]);
metadataList.add(metadataEntity);
}
return metadataList;
}

public List<SeqColMetadataEntity> getAllMetadata() {
List<Object[]> metadataArrayList = repository.findAllMetadata();
return metadataObjectArrayListToMetadataList(metadataArrayList);
}

public List<SeqColMetadataEntity> getMetadataBySeqcolDigest(String digest) {
List<Object[]> metadataArrayList = repository.findMetadataBySeqColDigest(digest);
return metadataObjectArrayListToMetadataList(metadataArrayList);
}
}
18 changes: 15 additions & 3 deletions src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColExtendedDataEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataEntity;
import uk.ac.ebi.eva.evaseqcol.exception.AssemblyAlreadyIngestedException;
import uk.ac.ebi.eva.evaseqcol.exception.AssemblyNotFoundException;
import uk.ac.ebi.eva.evaseqcol.exception.AttributeNotDefinedException;
Expand Down Expand Up @@ -124,6 +125,10 @@ public Optional<? extends SeqColEntity> getSeqColByDigestAndLevel(String digest,
}
}

public List<SeqColMetadataEntity> getSeqColMetadataBySeqColDigest(String digest) {
return levelOneService.getMetadataBySeqcolDigest(digest);
}

/**
* Return the service info entity in a Map<String,Object> format
* @see 'https://seqcol.readthedocs.io/en/dev/specification/#21-service-info'
Expand Down Expand Up @@ -165,6 +170,13 @@ public IngestionResultEntity fetchAndInsertAllSeqColInFastaFile(String accession
* assembly report.
* Return the list of level 0 digests of the inserted seqcol objects*/
public IngestionResultEntity fetchAndInsertAllSeqColByAssemblyAccession(String assemblyAccession) throws IOException {
// Check for existing same source id
boolean sourceIdExists = levelOneService.getAllMetadata().stream()
.anyMatch(md -> md.getSourceIdentifier().equals(assemblyAccession));
if (sourceIdExists) {
logger.warn("Seqcol objects for assembly" + assemblyAccession + " have been already ingested. Nothing to ingest !");
throw new AssemblyAlreadyIngestedException(assemblyAccession);
}
Optional<Map<String, Object>> seqColDataMap = ncbiSeqColDataSource.getAllPossibleSeqColExtendedData(assemblyAccession);
return createSeqColObjectsAndInsert(seqColDataMap, assemblyAccession);
}
Expand Down Expand Up @@ -206,8 +218,8 @@ public IngestionResultEntity createSeqColObjectsAndInsert(Optional<Map<String, O

// Constructing seqCol Level One object
SeqColLevelOneEntity levelOneEntity = levelOneService.constructSeqColLevelOne(
seqColStringListExtDataEntities, seqColIntegerListExtDataEntities, extendedNamesEntity.getNamingConvention()
);
seqColStringListExtDataEntities, seqColIntegerListExtDataEntities, extendedNamesEntity.getNamingConvention(),
assemblyAccession);

try {
Optional<String> seqColDigest = insertSeqColL1AndL2( // TODO: Check for possible self invocation problem
Expand All @@ -227,7 +239,7 @@ public IngestionResultEntity createSeqColObjectsAndInsert(Optional<Map<String, O
}
}
if (ingestionResultEntity.getNumberOfInsertedSeqcols() == 0) {
logger.warn("Seqcol objects for assembly " + assemblyAccession + " has been already ingested");
logger.warn("Seqcol objects for assembly " + assemblyAccession + " have been already ingested");
throw new AssemblyAlreadyIngestedException(assemblyAccession);
} else {
return ingestionResultEntity;
Expand Down
10 changes: 8 additions & 2 deletions src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataEntity;
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne;

import java.util.Arrays;
Expand All @@ -14,20 +15,26 @@
@Component
public class SeqColGenerator {

private final String GCA_ACCESSION = "GCA_000146045.2";

/**
* Return an example (might not be real) of a seqCol object level 1
* The naming convention is set to GENBANK as a random choice*/
public SeqColLevelOneEntity generateLevelOneEntity() {
SeqColLevelOneEntity levelOneEntity = new SeqColLevelOneEntity();
JSONLevelOne jsonLevelOne = new JSONLevelOne();
SeqColMetadataEntity metadata = new SeqColMetadataEntity()
.setNamingConvention(SeqColEntity.NamingConvention.GENBANK)
.setSourceIdentifier(GCA_ACCESSION)
.setSourceUrl("https://test.ncbi.datasourece.uk");
levelOneEntity.addMetadata(metadata);
jsonLevelOne.setNames("mfxUkK3J5y7BGVW7hJWcJ3erxuaMX6xm");
jsonLevelOne.setSequences("dda3Kzi1Wkm2A8I99WietU1R8J4PL-D6");
jsonLevelOne.setLengths("Ms_ixPgQMJaM54dVntLWeovXSO7ljvZh");
jsonLevelOne.setMd5DigestsOfSequences("_6iaYtcWw4TZaowlL7_64Wu9mbHpDUw4");
jsonLevelOne.setSortedNameLengthPairs("QFuKs5Hh8uQwwUtnRxIf8W3zeJoFOp8Z");
levelOneEntity.setSeqColLevel1Object(jsonLevelOne);
levelOneEntity.setDigest("3mTg0tAA3PS-R1TzelLVWJ2ilUzoWfVq");
levelOneEntity.setNamingConvention(SeqColEntity.NamingConvention.GENBANK);
return levelOneEntity;
}

Expand Down Expand Up @@ -128,7 +135,6 @@ public SeqColLevelTwoEntity generateLevelTwoEntity() {
"YfHZgnpuJm4SN3RN4XL1VWWWZwTXtqw5"
));
levelTwoEntity.setDigest("3mTg0tAA3PS-R1TzelLVWJ2ilUzoWfVq");
levelTwoEntity.setNamingConvention(SeqColEntity.NamingConvention.GENBANK);
return levelTwoEntity;
}
}
4 changes: 2 additions & 2 deletions src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ public void create() throws IOException {
extendedIntegerListDataEntitiesUcsc =
(List<SeqColExtendedDataEntity<List<Integer>>>) ucscExtendedDataMap.get("integerListExtDataList");
levelOneEntityUcsc = levelOneService.constructSeqColLevelOne(
extendedStringListDataEntitiesUcsc, extendedIntegerListDataEntitiesUcsc, SeqColEntity.NamingConvention.UCSC);
extendedStringListDataEntitiesUcsc, extendedIntegerListDataEntitiesUcsc, SeqColEntity.NamingConvention.UCSC, GCA_ACCESSION);
Optional<String> resultDigestUcsc = seqColService.addFullSequenceCollection(
levelOneEntityUcsc, extendedStringListDataEntitiesUcsc, extendedIntegerListDataEntitiesUcsc);
if (resultDigestUcsc.isPresent()) {
Expand All @@ -163,7 +163,7 @@ public void create() throws IOException {
extendedIntegerListDataEntitiesGenbank = (List<SeqColExtendedDataEntity<List<Integer>>>) genbankExtendedDataMap.get("integerListExtDataList");

levelOneEntityGenbank = levelOneService.constructSeqColLevelOne(
extendedStringListDataEntitiesGenbank, extendedIntegerListDataEntitiesGenbank, SeqColEntity.NamingConvention.GENBANK);
extendedStringListDataEntitiesGenbank, extendedIntegerListDataEntitiesGenbank, SeqColEntity.NamingConvention.GENBANK, GCA_ACCESSION);
Optional<String> resultDigestGenbank = seqColService.addFullSequenceCollection(
levelOneEntityGenbank, extendedStringListDataEntitiesGenbank, extendedIntegerListDataEntitiesGenbank);
if (resultDigestGenbank.isPresent()) {
Expand Down
Loading

0 comments on commit 51a95ac

Please sign in to comment.