From b80c7554649a00e20d48b53fe67ca225a3d04295 Mon Sep 17 00:00:00 2001 From: waterflow80 Date: Fri, 22 Mar 2024 17:59:17 +0100 Subject: [PATCH] Added a meta-data table / entity - made a one-to-one-relationship between the levelOneEntity and the MetaDataEntity - set seqcol digest & naming_convention as the primary key of the meta-data table --- .../controller/admin/AdminController.java | 3 + .../eva/evaseqcol/entities/SeqColEntity.java | 2 - .../entities/SeqColLevelOneEntity.java | 22 +++--- .../entities/SeqColLevelTwoEntity.java | 5 -- .../evaseqcol/entities/SeqColMetadata.java | 67 +++++++++++++++++++ .../evaseqcol/entities/SeqColMetadataId.java | 22 ++++++ .../DuplicateSeqColWithDifferentMetadata.java | 8 +++ .../evaseqcol/repo/MetadataRepository.java | 15 +++++ .../repo/SeqColLevelOneRepository.java | 1 + .../evaseqcol/service/MetadataService.java | 35 ++++++++++ .../service/SeqColLevelOneService.java | 17 +++-- .../eva/evaseqcol/service/SeqColService.java | 11 ++- .../ebi/eva/evaseqcol/io/SeqColGenerator.java | 13 ++-- 13 files changed, 195 insertions(+), 26 deletions(-) create mode 100644 src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColMetadata.java create mode 100644 src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColMetadataId.java create mode 100644 src/main/java/uk/ac/ebi/eva/evaseqcol/exception/DuplicateSeqColWithDifferentMetadata.java create mode 100644 src/main/java/uk/ac/ebi/eva/evaseqcol/repo/MetadataRepository.java create mode 100644 src/main/java/uk/ac/ebi/eva/evaseqcol/service/MetadataService.java diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/controller/admin/AdminController.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/controller/admin/AdminController.java index 78e5ecf..6638b60 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/controller/admin/AdminController.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/controller/admin/AdminController.java @@ -17,6 +17,7 @@ import uk.ac.ebi.eva.evaseqcol.exception.AssemblyAlreadyIngestedException; import uk.ac.ebi.eva.evaseqcol.exception.AssemblyNotFoundException; import uk.ac.ebi.eva.evaseqcol.exception.DuplicateSeqColException; +import uk.ac.ebi.eva.evaseqcol.exception.DuplicateSeqColWithDifferentMetadata; import uk.ac.ebi.eva.evaseqcol.exception.IncorrectAccessionException; import uk.ac.ebi.eva.evaseqcol.model.IngestionResultEntity; import uk.ac.ebi.eva.evaseqcol.service.SeqColService; @@ -71,6 +72,8 @@ public ResponseEntity fetchAndInsertSeqColByAssemblyAccession( return new ResponseEntity<>(e.getMessage(), HttpStatus.INTERNAL_SERVER_ERROR); } catch (DuplicateSeqColException e) { return new ResponseEntity<>(e.getMessage(), HttpStatus.CONFLICT); + } catch (DuplicateSeqColWithDifferentMetadata e) { + return new ResponseEntity<>(e.getMessage(), HttpStatus.CREATED); // TODO: review this response } catch (AssemblyNotFoundException e) { return new ResponseEntity<>(e.getMessage(), HttpStatus.NOT_FOUND); } catch (AssemblyAlreadyIngestedException e) { diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColEntity.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColEntity.java index 05da0ca..585198a 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColEntity.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColEntity.java @@ -17,8 +17,6 @@ public abstract class SeqColEntity { protected String digest; // The level 0 digest - protected NamingConvention namingConvention; - public enum NamingConvention { ENA, GENBANK, UCSC, TEST diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColLevelOneEntity.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColLevelOneEntity.java index dd47949..cfdc145 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColLevelOneEntity.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColLevelOneEntity.java @@ -7,6 +7,7 @@ import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne; import javax.persistence.Basic; +import javax.persistence.CascadeType; import javax.persistence.Column; import javax.persistence.Entity; import javax.persistence.EnumType; @@ -14,13 +15,15 @@ import javax.persistence.FetchType; import javax.persistence.Id; import javax.persistence.IdClass; +import javax.persistence.JoinColumn; +import javax.persistence.JoinColumns; +import javax.persistence.OneToOne; import javax.persistence.Table; @Entity @NoArgsConstructor @Data @Table(name = "sequence_collections_L1") -@IdClass(SeqColId.class) public class SeqColLevelOneEntity extends SeqColEntity{ @Id @@ -32,15 +35,16 @@ public class SeqColLevelOneEntity extends SeqColEntity{ @Basic(fetch = FetchType.LAZY) private JSONLevelOne seqColLevel1Object; - @Id - @Column(nullable = false) - @Enumerated(EnumType.STRING) - protected NamingConvention namingConvention; - - public SeqColLevelOneEntity(String digest, NamingConvention namingConvention, JSONLevelOne jsonLevelOne){ - super(digest, namingConvention); + @OneToOne(cascade = CascadeType.ALL) + @JoinColumns({ + @JoinColumn(name = "seqcol_digest", referencedColumnName = "seqcol_digest"), + @JoinColumn(name = "source_id", referencedColumnName = "source_identifier") + }) + private SeqColMetadata metadata; + public SeqColLevelOneEntity(String digest, JSONLevelOne jsonLevelOne, SeqColMetadata metadata){ + super(digest); this.seqColLevel1Object = jsonLevelOne; - this.namingConvention = namingConvention; + this.metadata = metadata; } @Override diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColLevelTwoEntity.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColLevelTwoEntity.java index e707ae7..a7bea68 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColLevelTwoEntity.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColLevelTwoEntity.java @@ -24,9 +24,4 @@ public SeqColLevelTwoEntity setDigest(String digest) { this.digest = digest; return this; } - - public SeqColLevelTwoEntity setNamingConvention(NamingConvention convention) { - this.namingConvention = convention; - return this; - } } diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColMetadata.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColMetadata.java new file mode 100644 index 0000000..b7a7ae6 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColMetadata.java @@ -0,0 +1,67 @@ +package uk.ac.ebi.eva.evaseqcol.entities; + +import lombok.Data; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.IdClass; +import javax.persistence.OneToOne; +import javax.persistence.Table; +import javax.persistence.Temporal; +import javax.persistence.TemporalType; +import java.util.Date; + +@Entity +@IdClass(SeqColMetadataId.class) +@Table(name = "seqcol_md") +@Data +public class SeqColMetadata { + + @Id + @Column(name = "seqcol_digest") + private String seqColDigest; + + @Id + @Enumerated(EnumType.STRING) + @Column(name = "source_identifier") + private SourceIdentifier sourceIdentifier; + + private String sourceUrl; + + @Enumerated(EnumType.STRING) + private SeqColEntity.NamingConvention namingConvention; + + @Column(insertable = false, updatable = false) + @Temporal(TemporalType.TIMESTAMP) + private Date timestamp; + + @OneToOne(mappedBy = "metadata") + private SeqColLevelOneEntity seqColLevelOne; + + public enum SourceIdentifier { + Insdc + } + + public SeqColMetadata setSeqColDigest(String digest) { + this.seqColDigest = digest; + return this; + } + + public SeqColMetadata setSourceIdentifier(SourceIdentifier sourceIdentifier) { + this.sourceIdentifier = sourceIdentifier; + return this; + } + + public SeqColMetadata setSourceUrl(String sourceUrl) { + this.sourceUrl = sourceUrl; + return this; + } + + public SeqColMetadata setNamingConvention(SeqColEntity.NamingConvention namingConvention) { + this.namingConvention = namingConvention; + return this; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColMetadataId.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColMetadataId.java new file mode 100644 index 0000000..fcca9c9 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColMetadataId.java @@ -0,0 +1,22 @@ +package uk.ac.ebi.eva.evaseqcol.entities; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; + +import javax.persistence.Embeddable; +import javax.validation.constraints.NotNull; +import java.io.Serializable; + +@EqualsAndHashCode +@Embeddable +@NoArgsConstructor +@AllArgsConstructor +public class SeqColMetadataId implements Serializable { + + @NotNull + private String seqColDigest; + @NotNull + private SeqColMetadata.SourceIdentifier sourceIdentifier; // Eg: INSDC, UCSC, GENBANK, etc.. + +} diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/exception/DuplicateSeqColWithDifferentMetadata.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/exception/DuplicateSeqColWithDifferentMetadata.java new file mode 100644 index 0000000..2a7e0e0 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/exception/DuplicateSeqColWithDifferentMetadata.java @@ -0,0 +1,8 @@ +package uk.ac.ebi.eva.evaseqcol.exception; + +public class DuplicateSeqColWithDifferentMetadata extends RuntimeException{ + + public DuplicateSeqColWithDifferentMetadata(String digest) { + super("A similar seqCol already exists with digest " + digest + " but with different metadata"); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/repo/MetadataRepository.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/repo/MetadataRepository.java new file mode 100644 index 0000000..8a2d791 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/repo/MetadataRepository.java @@ -0,0 +1,15 @@ +package uk.ac.ebi.eva.evaseqcol.repo; + +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.stereotype.Repository; + +import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadata; +import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadataId; + +import java.util.List; + +@Repository +public interface MetadataRepository extends JpaRepository { + + List findAllBySeqColDigest(String digest); +} diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/repo/SeqColLevelOneRepository.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/repo/SeqColLevelOneRepository.java index 27ff575..58972b3 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/repo/SeqColLevelOneRepository.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/repo/SeqColLevelOneRepository.java @@ -14,4 +14,5 @@ public interface SeqColLevelOneRepository extends JpaRepository addMetadata(SeqColMetadata metadata) { + if (repository.existsById(new SeqColMetadataId(metadata.getSeqColDigest(), metadata.getSourceIdentifier()))){ + return Optional.empty(); + } + return Optional.of( + repository.save(metadata) + ); + } + + /** + * Return the list of all metadata entries for the seqCol object with the given digest*/ + public Optional> getAllMetadataForSeqColByDigest(String seqColDigest) { + return Optional.of( + repository.findAllBySeqColDigest(seqColDigest) + ); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java index 2810409..12adad3 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java @@ -8,6 +8,7 @@ import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity; import uk.ac.ebi.eva.evaseqcol.digests.DigestCalculator; import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity; +import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadata; import uk.ac.ebi.eva.evaseqcol.repo.SeqColLevelOneRepository; import uk.ac.ebi.eva.evaseqcol.utils.JSONExtData; import uk.ac.ebi.eva.evaseqcol.utils.JSONIntegerListExtData; @@ -32,8 +33,12 @@ public class SeqColLevelOneService { * Add a new Level 1 sequence collection object and save it to the * database*/ public Optional addSequenceCollectionL1(SeqColLevelOneEntity seqColLevelOne){ - SeqColLevelOneEntity seqCol = repository.save(seqColLevelOne); - return Optional.of(seqCol); + if (repository.existsById(seqColLevelOne.getDigest())) { + return Optional.empty(); + } + return Optional.of( + repository.save(seqColLevelOne) + ); } public Optional getSeqColLevelOneByDigest(String digest){ @@ -62,12 +67,15 @@ public List getAllSeqColLevelOneObjects(){ /** * Construct a seqCol level 1 entity out of three seqCol level 2 entities that - * hold names, lengths and sequences objects*/ + * hold names, lengths and sequences objects + * TODO: Change the signature of this method and make it accept metadata object instead of namingconvention*/ public SeqColLevelOneEntity constructSeqColLevelOne(List>> stringListExtendedDataEntities, List>> integerListExtendedDataEntities, SeqColEntity.NamingConvention convention) throws IOException { SeqColLevelOneEntity levelOneEntity = new SeqColLevelOneEntity(); JSONLevelOne jsonLevelOne = new JSONLevelOne(); + SeqColMetadata metadata = new SeqColMetadata().setNamingConvention(convention) + .setSourceIdentifier(SeqColMetadata.SourceIdentifier.Insdc); // TODO: this should be specified by the method parameter // Looping over List types for (SeqColExtendedDataEntity> dataEntity: stringListExtendedDataEntities) { @@ -99,7 +107,8 @@ public SeqColLevelOneEntity constructSeqColLevelOne(List addFullSequenceCollection( ) { long numSeqCols = levelOneService.countSeqColLevelOneEntitiesByDigest(levelOneEntity.getDigest()); if (numSeqCols > 0) { + // Checking for possibly different metadata + if (metadataService.addMetadata(levelOneEntity.getMetadata()).isPresent()) { + logger.warn("SeqCol with digest " + levelOneEntity.getDigest() + " already exists but with different metadata!"); + throw new DuplicateSeqColWithDifferentMetadata(levelOneEntity.getDigest()); + } logger.warn("SeqCol with digest " + levelOneEntity.getDigest() + " already exists !"); throw new DuplicateSeqColException(levelOneEntity.getDigest()); } else { diff --git a/src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColGenerator.java b/src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColGenerator.java index e4ccd41..75329bf 100644 --- a/src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColGenerator.java +++ b/src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColGenerator.java @@ -5,6 +5,7 @@ import uk.ac.ebi.eva.evaseqcol.entities.SeqColEntity; import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity; import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity; +import uk.ac.ebi.eva.evaseqcol.entities.SeqColMetadata; import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne; import java.util.Arrays; @@ -16,24 +17,27 @@ public class SeqColGenerator { /** * Return an example (might not be real) of a seqCol object level 1 - * The naming convention is set to GENBANK as a random choice*/ + * The naming convention is set to GENBANK as a random choice + * and source identifier is set Insdc*/ public SeqColLevelOneEntity generateLevelOneEntity() { SeqColLevelOneEntity levelOneEntity = new SeqColLevelOneEntity(); JSONLevelOne jsonLevelOne = new JSONLevelOne(); + SeqColMetadata metadata = new SeqColMetadata() + .setNamingConvention(SeqColEntity.NamingConvention.GENBANK) + .setSeqColDigest("PgQMkKm2A8I9GVW7hJWcJ3erxuaMbHpD") + .setSourceIdentifier(SeqColMetadata.SourceIdentifier.Insdc); jsonLevelOne.setNames("mfxUkK3J5y7BGVW7hJWcJ3erxuaMX6xm"); jsonLevelOne.setSequences("dda3Kzi1Wkm2A8I99WietU1R8J4PL-D6"); jsonLevelOne.setLengths("Ms_ixPgQMJaM54dVntLWeovXSO7ljvZh"); jsonLevelOne.setMd5DigestsOfSequences("_6iaYtcWw4TZaowlL7_64Wu9mbHpDUw4"); jsonLevelOne.setSortedNameLengthPairs("QFuKs5Hh8uQwwUtnRxIf8W3zeJoFOp8Z"); levelOneEntity.setSeqColLevel1Object(jsonLevelOne); - levelOneEntity.setDigest("3mTg0tAA3PS-R1TzelLVWJ2ilUzoWfVq"); - levelOneEntity.setNamingConvention(SeqColEntity.NamingConvention.GENBANK); + levelOneEntity.setMetadata(metadata); return levelOneEntity; } /** * Return an example (might not be real) of a seqCol object level 2 - * The naming convention is set to GENBANK as a random choice * */ public SeqColLevelTwoEntity generateLevelTwoEntity() { SeqColLevelTwoEntity levelTwoEntity = new SeqColLevelTwoEntity(); @@ -128,7 +132,6 @@ public SeqColLevelTwoEntity generateLevelTwoEntity() { "YfHZgnpuJm4SN3RN4XL1VWWWZwTXtqw5" )); levelTwoEntity.setDigest("3mTg0tAA3PS-R1TzelLVWJ2ilUzoWfVq"); - levelTwoEntity.setNamingConvention(SeqColEntity.NamingConvention.GENBANK); return levelTwoEntity; } }