Skip to content

Commit

Permalink
Added a meta-data table / entity
Browse files Browse the repository at this point in the history
- made a one-to-one-relationship between the levelOneEntity and the MetaDataEntity
- set seqcol digest & naming_convention as the primary key of the meta-data table
waterflow80 committed Mar 22, 2024
1 parent 07a38d8 commit b80c755
Showing 13 changed files with 195 additions and 26 deletions.
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@
import uk.ac.ebi.eva.evaseqcol.exception.AssemblyAlreadyIngestedException;
import uk.ac.ebi.eva.evaseqcol.exception.AssemblyNotFoundException;
import uk.ac.ebi.eva.evaseqcol.exception.DuplicateSeqColException;
import uk.ac.ebi.eva.evaseqcol.exception.DuplicateSeqColWithDifferentMetadata;
import uk.ac.ebi.eva.evaseqcol.exception.IncorrectAccessionException;
import uk.ac.ebi.eva.evaseqcol.model.IngestionResultEntity;
import uk.ac.ebi.eva.evaseqcol.service.SeqColService;
@@ -71,6 +72,8 @@ public ResponseEntity<?> fetchAndInsertSeqColByAssemblyAccession(
return new ResponseEntity<>(e.getMessage(), HttpStatus.INTERNAL_SERVER_ERROR);
} catch (DuplicateSeqColException e) {
return new ResponseEntity<>(e.getMessage(), HttpStatus.CONFLICT);
} catch (DuplicateSeqColWithDifferentMetadata e) {
return new ResponseEntity<>(e.getMessage(), HttpStatus.CREATED); // TODO: review this response
} catch (AssemblyNotFoundException e) {
return new ResponseEntity<>(e.getMessage(), HttpStatus.NOT_FOUND);
} catch (AssemblyAlreadyIngestedException e) {
Original file line number Diff line number Diff line change
@@ -17,8 +17,6 @@ public abstract class SeqColEntity {

protected String digest; // The level 0 digest

protected NamingConvention namingConvention;


public enum NamingConvention {
ENA, GENBANK, UCSC, TEST
Original file line number Diff line number Diff line change
@@ -7,20 +7,23 @@
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne;

import javax.persistence.Basic;
import javax.persistence.CascadeType;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.FetchType;
import javax.persistence.Id;
import javax.persistence.IdClass;
import javax.persistence.JoinColumn;
import javax.persistence.JoinColumns;
import javax.persistence.OneToOne;
import javax.persistence.Table;

@Entity
@NoArgsConstructor
@Data
@Table(name = "sequence_collections_L1")
@IdClass(SeqColId.class)
public class SeqColLevelOneEntity extends SeqColEntity{

@Id
@@ -32,15 +35,16 @@ public class SeqColLevelOneEntity extends SeqColEntity{
@Basic(fetch = FetchType.LAZY)
private JSONLevelOne seqColLevel1Object;

@Id
@Column(nullable = false)
@Enumerated(EnumType.STRING)
protected NamingConvention namingConvention;

public SeqColLevelOneEntity(String digest, NamingConvention namingConvention, JSONLevelOne jsonLevelOne){
super(digest, namingConvention);
@OneToOne(cascade = CascadeType.ALL)
@JoinColumns({
@JoinColumn(name = "seqcol_digest", referencedColumnName = "seqcol_digest"),
@JoinColumn(name = "source_id", referencedColumnName = "source_identifier")
})
private SeqColMetadata metadata;
public SeqColLevelOneEntity(String digest, JSONLevelOne jsonLevelOne, SeqColMetadata metadata){
super(digest);
this.seqColLevel1Object = jsonLevelOne;
this.namingConvention = namingConvention;
this.metadata = metadata;
}

@Override
Original file line number Diff line number Diff line change
@@ -24,9 +24,4 @@ public SeqColLevelTwoEntity setDigest(String digest) {
this.digest = digest;
return this;
}

public SeqColLevelTwoEntity setNamingConvention(NamingConvention convention) {
this.namingConvention = convention;
return this;
}
}
67 changes: 67 additions & 0 deletions src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColMetadata.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package uk.ac.ebi.eva.evaseqcol.entities;

import lombok.Data;

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.Id;
import javax.persistence.IdClass;
import javax.persistence.OneToOne;
import javax.persistence.Table;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import java.util.Date;

@Entity
@IdClass(SeqColMetadataId.class)
@Table(name = "seqcol_md")
@Data
public class SeqColMetadata {

@Id
@Column(name = "seqcol_digest")
private String seqColDigest;

@Id
@Enumerated(EnumType.STRING)
@Column(name = "source_identifier")
private SourceIdentifier sourceIdentifier;

private String sourceUrl;

@Enumerated(EnumType.STRING)
private SeqColEntity.NamingConvention namingConvention;

@Column(insertable = false, updatable = false)
@Temporal(TemporalType.TIMESTAMP)
private Date timestamp;

@OneToOne(mappedBy = "metadata")
private SeqColLevelOneEntity seqColLevelOne;

public enum SourceIdentifier {
Insdc
}

public SeqColMetadata setSeqColDigest(String digest) {
this.seqColDigest = digest;
return this;
}

public SeqColMetadata setSourceIdentifier(SourceIdentifier sourceIdentifier) {
this.sourceIdentifier = sourceIdentifier;
return this;
}

public SeqColMetadata setSourceUrl(String sourceUrl) {
this.sourceUrl = sourceUrl;
return this;
}

public SeqColMetadata setNamingConvention(SeqColEntity.NamingConvention namingConvention) {
this.namingConvention = namingConvention;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package uk.ac.ebi.eva.evaseqcol.entities;

import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;

import javax.persistence.Embeddable;
import javax.validation.constraints.NotNull;
import java.io.Serializable;

@EqualsAndHashCode
@Embeddable
@NoArgsConstructor
@AllArgsConstructor
public class SeqColMetadataId implements Serializable {

@NotNull
private String seqColDigest;
@NotNull
private SeqColMetadata.SourceIdentifier sourceIdentifier; // Eg: INSDC, UCSC, GENBANK, etc..

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package uk.ac.ebi.eva.evaseqcol.exception;

public class DuplicateSeqColWithDifferentMetadata extends RuntimeException{

public DuplicateSeqColWithDifferentMetadata(String digest) {
super("A similar seqCol already exists with digest " + digest + " but with different metadata");
}
}
15 changes: 15 additions & 0 deletions src/main/java/uk/ac/ebi/eva/evaseqcol/repo/MetadataRepository.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package uk.ac.ebi.eva.evaseqcol.repo;

import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;

import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadata;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataId;

import java.util.List;

@Repository
public interface MetadataRepository extends JpaRepository<SeqColMetadata, SeqColMetadataId> {

List<SeqColMetadata> findAllBySeqColDigest(String digest);
}
Original file line number Diff line number Diff line change
@@ -14,4 +14,5 @@ public interface SeqColLevelOneRepository extends JpaRepository<SeqColLevelOneEn
void removeSeqColLevelOneEntityByDigest(String digest);

void deleteAll();

}
35 changes: 35 additions & 0 deletions src/main/java/uk/ac/ebi/eva/evaseqcol/service/MetadataService.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package uk.ac.ebi.eva.evaseqcol.service;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadata;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataId;
import uk.ac.ebi.eva.evaseqcol.repo.MetadataRepository;

import java.util.List;
import java.util.Optional;

@Service
public class MetadataService {

@Autowired
private MetadataRepository repository;

public Optional<SeqColMetadata> addMetadata(SeqColMetadata metadata) {
if (repository.existsById(new SeqColMetadataId(metadata.getSeqColDigest(), metadata.getSourceIdentifier()))){
return Optional.empty();
}
return Optional.of(
repository.save(metadata)
);
}

/**
* Return the list of all metadata entries for the seqCol object with the given digest*/
public Optional<List<SeqColMetadata>> getAllMetadataForSeqColByDigest(String seqColDigest) {
return Optional.of(
repository.findAllBySeqColDigest(seqColDigest)
);
}
}
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.digests.DigestCalculator;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadata;
import uk.ac.ebi.eva.evaseqcol.repo.SeqColLevelOneRepository;
import uk.ac.ebi.eva.evaseqcol.utils.JSONExtData;
import uk.ac.ebi.eva.evaseqcol.utils.JSONIntegerListExtData;
@@ -32,8 +33,12 @@ public class SeqColLevelOneService {
* Add a new Level 1 sequence collection object and save it to the
* database*/
public Optional<SeqColLevelOneEntity> addSequenceCollectionL1(SeqColLevelOneEntity seqColLevelOne){
SeqColLevelOneEntity seqCol = repository.save(seqColLevelOne);
return Optional.of(seqCol);
if (repository.existsById(seqColLevelOne.getDigest())) {
return Optional.empty();
}
return Optional.of(
repository.save(seqColLevelOne)
);
}

public Optional<SeqColLevelOneEntity> getSeqColLevelOneByDigest(String digest){
@@ -62,12 +67,15 @@ public List<SeqColLevelOneEntity> getAllSeqColLevelOneObjects(){

/**
* Construct a seqCol level 1 entity out of three seqCol level 2 entities that
* hold names, lengths and sequences objects*/
* hold names, lengths and sequences objects
* TODO: Change the signature of this method and make it accept metadata object instead of namingconvention*/
public SeqColLevelOneEntity constructSeqColLevelOne(List<SeqColExtendedDataEntity<List<String>>> stringListExtendedDataEntities,
List<SeqColExtendedDataEntity<List<Integer>>> integerListExtendedDataEntities,
SeqColEntity.NamingConvention convention) throws IOException {
SeqColLevelOneEntity levelOneEntity = new SeqColLevelOneEntity();
JSONLevelOne jsonLevelOne = new JSONLevelOne();
SeqColMetadata metadata = new SeqColMetadata().setNamingConvention(convention)
.setSourceIdentifier(SeqColMetadata.SourceIdentifier.Insdc); // TODO: this should be specified by the method parameter

// Looping over List<String> types
for (SeqColExtendedDataEntity<List<String>> dataEntity: stringListExtendedDataEntities) {
@@ -99,7 +107,8 @@ public SeqColLevelOneEntity constructSeqColLevelOne(List<SeqColExtendedDataEntit
levelOneEntity.setSeqColLevel1Object(jsonLevelOne);
String digest0 = digestCalculator.getSha512Digest(levelOneEntity.toString());
levelOneEntity.setDigest(digest0);
levelOneEntity.setNamingConvention(convention);
metadata.setSeqColDigest(digest0);
levelOneEntity.setMetadata(metadata);
return levelOneEntity;
}

11 changes: 10 additions & 1 deletion src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@
import uk.ac.ebi.eva.evaseqcol.exception.AssemblyNotFoundException;
import uk.ac.ebi.eva.evaseqcol.exception.AttributeNotDefinedException;
import uk.ac.ebi.eva.evaseqcol.exception.DuplicateSeqColException;
import uk.ac.ebi.eva.evaseqcol.exception.DuplicateSeqColWithDifferentMetadata;
import uk.ac.ebi.eva.evaseqcol.exception.SeqColNotFoundException;
import uk.ac.ebi.eva.evaseqcol.exception.UnableToLoadServiceInfoException;
import uk.ac.ebi.eva.evaseqcol.model.IngestionResultEntity;
@@ -50,16 +51,19 @@ public class SeqColService {
private final SeqColLevelOneService levelOneService;
private final SeqColLevelTwoService levelTwoService;
private final SeqColExtendedDataService extendedDataService;
private final MetadataService metadataService;
private final DigestCalculator digestCalculator = new DigestCalculator();
private final Logger logger = LoggerFactory.getLogger(SeqColService.class);

@Autowired
public SeqColService(NCBISeqColDataSource ncbiSeqColDataSource, SeqColLevelOneService levelOneService,
SeqColLevelTwoService levelTwoService, SeqColExtendedDataService extendedDataService) {
SeqColLevelTwoService levelTwoService, SeqColExtendedDataService extendedDataService,
MetadataService metadataService) {
this.ncbiSeqColDataSource = ncbiSeqColDataSource;
this.levelOneService = levelOneService;
this.levelTwoService = levelTwoService;
this.extendedDataService = extendedDataService;
this.metadataService = metadataService;
}

@Transactional
@@ -73,6 +77,11 @@ public Optional<String> addFullSequenceCollection(
) {
long numSeqCols = levelOneService.countSeqColLevelOneEntitiesByDigest(levelOneEntity.getDigest());
if (numSeqCols > 0) {
// Checking for possibly different metadata
if (metadataService.addMetadata(levelOneEntity.getMetadata()).isPresent()) {
logger.warn("SeqCol with digest " + levelOneEntity.getDigest() + " already exists but with different metadata!");
throw new DuplicateSeqColWithDifferentMetadata(levelOneEntity.getDigest());
}
logger.warn("SeqCol with digest " + levelOneEntity.getDigest() + " already exists !");
throw new DuplicateSeqColException(levelOneEntity.getDigest());
} else {
13 changes: 8 additions & 5 deletions src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColGenerator.java
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadata;
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne;

import java.util.Arrays;
@@ -16,24 +17,27 @@ public class SeqColGenerator {

/**
* Return an example (might not be real) of a seqCol object level 1
* The naming convention is set to GENBANK as a random choice*/
* The naming convention is set to GENBANK as a random choice
* and source identifier is set Insdc*/
public SeqColLevelOneEntity generateLevelOneEntity() {
SeqColLevelOneEntity levelOneEntity = new SeqColLevelOneEntity();
JSONLevelOne jsonLevelOne = new JSONLevelOne();
SeqColMetadata metadata = new SeqColMetadata()
.setNamingConvention(SeqColEntity.NamingConvention.GENBANK)
.setSeqColDigest("PgQMkKm2A8I9GVW7hJWcJ3erxuaMbHpD")
.setSourceIdentifier(SeqColMetadata.SourceIdentifier.Insdc);
jsonLevelOne.setNames("mfxUkK3J5y7BGVW7hJWcJ3erxuaMX6xm");
jsonLevelOne.setSequences("dda3Kzi1Wkm2A8I99WietU1R8J4PL-D6");
jsonLevelOne.setLengths("Ms_ixPgQMJaM54dVntLWeovXSO7ljvZh");
jsonLevelOne.setMd5DigestsOfSequences("_6iaYtcWw4TZaowlL7_64Wu9mbHpDUw4");
jsonLevelOne.setSortedNameLengthPairs("QFuKs5Hh8uQwwUtnRxIf8W3zeJoFOp8Z");
levelOneEntity.setSeqColLevel1Object(jsonLevelOne);
levelOneEntity.setDigest("3mTg0tAA3PS-R1TzelLVWJ2ilUzoWfVq");
levelOneEntity.setNamingConvention(SeqColEntity.NamingConvention.GENBANK);
levelOneEntity.setMetadata(metadata);
return levelOneEntity;
}

/**
* Return an example (might not be real) of a seqCol object level 2
* The naming convention is set to GENBANK as a random choice
* */
public SeqColLevelTwoEntity generateLevelTwoEntity() {
SeqColLevelTwoEntity levelTwoEntity = new SeqColLevelTwoEntity();
@@ -128,7 +132,6 @@ public SeqColLevelTwoEntity generateLevelTwoEntity() {
"YfHZgnpuJm4SN3RN4XL1VWWWZwTXtqw5"
));
levelTwoEntity.setDigest("3mTg0tAA3PS-R1TzelLVWJ2ilUzoWfVq");
levelTwoEntity.setNamingConvention(SeqColEntity.NamingConvention.GENBANK);
return levelTwoEntity;
}
}

0 comments on commit b80c755

Please sign in to comment.