diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index d5a82a1c9a..a74e60d7f2 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -10,14 +10,13 @@ on: jobs: build: uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop + with: + maven_opts: -DCELLBASE.WAR.NAME=cellbase -P default-config-test test: uses: ./.github/workflows/test-analysis.yml needs: build secrets: inherit - with: - report_context: development - report_dir: ${{ github.ref_name }}/cellbase/${{ github.sha }} deploy-maven: uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop @@ -32,4 +31,3 @@ jobs: with: cli: python3 ./build/cloud/docker/docker-build.py push --images base secrets: inherit - diff --git a/.github/workflows/pull-request-merge.yml b/.github/workflows/pull-request-merge.yml index 2ae1584532..4b1c43c682 100644 --- a/.github/workflows/pull-request-merge.yml +++ b/.github/workflows/pull-request-merge.yml @@ -1,4 +1,4 @@ -name: "Merge Workflow" +name: "Pull Request Merge Workflow" on: pull_request: @@ -13,6 +13,4 @@ jobs: uses: opencb/java-common-libs/.github/workflows/delete-docker-hub-workflow.yml@develop with: cli: python3 ./build/cloud/docker/docker-build.py delete --images base --tag ${{ github.head_ref }} - secrets: - DOCKER_HUB_USER: ${{ secrets.DOCKER_HUB_USER }} - DOCKER_HUB_PASSWORD: ${{ secrets.DOCKER_HUB_PASSWORD }} + secrets: inherit diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 05191554c3..69003092a0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,32 +9,26 @@ on: jobs: build: uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop - - test: - uses: ./.github/workflows/test-analysis.yml - needs: build - secrets: inherit with: - report_context: xetabase - report_dir: cellbase + maven_opts: -DCELLBASE.WAR.NAME=cellbase deploy-maven: uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop - needs: test + needs: build with: maven_opts: -Dcheckstyle.skip -DCELLBASE.WAR.NAME=cellbase secrets: inherit deploy-docker: uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop - needs: test + needs: build with: cli: python3 ./build/cloud/docker/docker-build.py push --images base secrets: inherit deploy-python: uses: opencb/java-common-libs/.github/workflows/deploy-python-workflow.yml@develop - needs: test + needs: build with: cli: bash ./clients/python/python-build.sh push artifact: build-folder diff --git a/.github/workflows/task.yml b/.github/workflows/task.yml index c97f2b9790..6dbd2a9240 100644 --- a/.github/workflows/task.yml +++ b/.github/workflows/task.yml @@ -9,14 +9,13 @@ on: jobs: build: uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop + with: + maven_opts: -DCELLBASE.WAR.NAME=cellbase -P default-config-test test: uses: ./.github/workflows/test-analysis.yml needs: build secrets: inherit - with: - report_context: development - report_dir: ${{ github.ref_name }}/cellbase/${{ github.sha }} deploy-docker: uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop @@ -24,5 +23,3 @@ jobs: with: cli: python3 ./build/cloud/docker/docker-build.py push --images base --tag ${{ github.ref_name }} secrets: inherit - - diff --git a/.github/workflows/test-analysis.yml b/.github/workflows/test-analysis.yml index 30d82f2b7a..4c477cbd65 100644 --- a/.github/workflows/test-analysis.yml +++ b/.github/workflows/test-analysis.yml @@ -4,6 +4,7 @@ on: secrets: SONAR_TOKEN: required: true +<<<<<<< HEAD SSH_TESTING_SERVER_HOST: required: true SSH_TESTING_SERVER_PORT: @@ -21,6 +22,9 @@ on: required: true env: xb_version: "1.6.0" +======= + +>>>>>>> release-5.6.x jobs: test: name: Test and push Sonar analysis @@ -52,31 +56,14 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} - run: mvn -B verify surefire-report:report -Dcheckstyle.skip org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Dsonar.projectKey=opencb_cellbase - - name: Upload result dir - uses: actions/upload-artifact@v3 + run: mvn -B verify surefire-report:report --fail-never -Dcheckstyle.skip org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Dsonar.projectKey=opencb_cellbase + - name: Publish Test Report + uses: scacap/action-surefire-report@v1 + ## Skip cancelled() + ## https://docs.github.com/en/actions/learn-github-actions/expressions#cancelled + if: success() || failure() with: - name: workdir - path: "**/target/site" - publish-test: - name: Publish test results - runs-on: ubuntu-22.04 - needs: test - strategy: - matrix: - module: ["cellbase-core", "cellbase-app", "cellbase-client", "cellbase-lib", "cellbase-server"] - steps: - - name: Download result dir - uses: actions/download-artifact@v3 - with: - name: workdir - - name: Deploy unit tests web recursively to remote - uses: garygrossgarten/github-action-scp@release - with: - local: ${{ matrix.module }}/target/site - remote: /var/www/html/reports/${{ inputs.report_context }}/${{ env.xb_version }}/${{ inputs.report_dir }}/unit/${{ matrix.module }} - host: ${{ secrets.SSH_TESTING_SERVER_HOST}} - port: ${{ secrets.SSH_TESTING_SERVER_PORT}} - username: ${{ secrets.SSH_TESTING_SERVER_USER }} - password: ${{ secrets.SSH_TESTING_SERVER_PASSWORD }} - concurrency: 2 + check_name: "Surefire tests report" + report_paths: './**/surefire-reports/TEST-*.xml' + commit: '${{ github.sha }}' + fail_on_test_failures: true diff --git a/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile b/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile index 978c0506f4..0502319f49 100644 --- a/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile +++ b/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile @@ -1,5 +1,6 @@ -## Based on Debian 11 (bullseye) -FROM openjdk:11-jre +## Based on Ubuntu 22.04 (jammy) +## We are now using OpenJDK 8u372 to support "cgroup v2", see https://developers.redhat.com/articles/2023/04/19/openjdk-8u372-feature-cgroup-v2-support# +FROM eclipse-temurin:8u372-b07-jre-jammy LABEL org.label-schema.vendor="OpenCB" \ org.label-schema.name="cellbase-base" \ diff --git a/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile.alpine b/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile.alpine deleted file mode 100644 index 9960039781..0000000000 --- a/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile.alpine +++ /dev/null @@ -1,22 +0,0 @@ -FROM openjdk:8-jre-alpine - -LABEL org.label-schema.vendor="OpenCB" \ - org.label-schema.name="cellbase-base" \ - org.label-schema.url="http://docs.opencb.org/display/cellbase" \ - org.label-schema.description="An Open Computational Genomics Analysis platform for big data processing and analysis in genomics" \ - maintainer="Julie Sullivan " \ - org.label-schema.schema-version="1.0" - -ENV CELLBASE_USER cellbase -ENV CELLBASE_HOME /opt/cellbase/ - -RUN apk update && apk upgrade && apk add ca-certificates openssl wget bash \ - && update-ca-certificates \ - && addgroup -S $CELLBASE_USER && adduser -S $CELLBASE_USER -G $CELLBASE_USER -u 1001 - -USER $CELLBASE_USER - -VOLUME /opt/cellbase/conf - -COPY . /opt/cellbase -WORKDIR /opt/cellbase diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 48e197d152..ec4ff19a0a 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.5.0 + 5.7.0-SNAPSHOT ../pom.xml diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java index 28a65d5f93..29131a2ff4 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java @@ -36,6 +36,7 @@ public class AdminCliOptionsParser extends CliOptionsParser { private DataReleaseCommandOptions dataReleaseCommandOptions; private DataTokenCommandOptions dataTokenCommandOptions; private LoadCommandOptions loadCommandOptions; + private ExportCommandOptions exportCommandOptions; private CustomiseCommandOptions customiseCommandOptions; private IndexCommandOptions indexCommandOptions; private InstallCommandOptions installCommandOptions; @@ -52,6 +53,7 @@ public AdminCliOptionsParser() { dataReleaseCommandOptions = new DataReleaseCommandOptions(); dataTokenCommandOptions = new DataTokenCommandOptions(); loadCommandOptions = new LoadCommandOptions(); + exportCommandOptions = new ExportCommandOptions(); customiseCommandOptions = new CustomiseCommandOptions(); indexCommandOptions = new IndexCommandOptions(); installCommandOptions = new InstallCommandOptions(); @@ -63,6 +65,7 @@ public AdminCliOptionsParser() { jCommander.addCommand("data-release", dataReleaseCommandOptions); jCommander.addCommand("data-token", dataTokenCommandOptions); jCommander.addCommand("load", loadCommandOptions); + jCommander.addCommand("export", exportCommandOptions); jCommander.addCommand("customise", customiseCommandOptions); jCommander.addCommand("index", indexCommandOptions); jCommander.addCommand("install", installCommandOptions); @@ -171,9 +174,9 @@ public class LoadCommandOptions { @ParametersDelegate public CommonCommandOptions commonOptions = commonCommandOptions; - @Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation, " - + "conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed. 'all' loads everything", - required = true, arity = 1) + @Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation," + + " conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed, pharmacogenomics." + + " 'all' loads everything", required = true, arity = 1) public String data; @Parameter(names = {"-i", "--input"}, required = true, arity = 1, @@ -212,6 +215,40 @@ public class LoadCommandOptions { } + @Parameters(commandNames = {"export"}, commandDescription = "Export data into JSON files") + public class ExportCommandOptions { + + @ParametersDelegate + public CommonCommandOptions commonOptions = commonCommandOptions; + + @Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation, " + + "conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed. 'all' " + + " loads everything", required = true, arity = 1) + public String data; + + @Parameter(names = {"--db", "--database"}, description = "Database name, e.g., cellbase_hsapiens_grch38_v5", required = true, + arity = 1) + public String database; + + @Parameter(names = {"--data-release"}, description = "Data release for exporting data.", required = true, arity = 1) + public int dataRelease; + + @Parameter(names = {"--token"}, description = "Data token to export licensed data.", arity = 1) + public String token; + + @Parameter(names = {"--gene"}, description = "List of genes (separated by commas). Exported data will be related to these genes" + + " (gene coordinates will be taken into account).", required = true, arity = 1) + public String gene; + + @Parameter(names = {"--region"}, description = "List of regions (separated by commas). Exported data will be related to these" + + " regions taking into account their coordinates.", arity = 1) + public String region; + + @Parameter(names = {"-o", "--output"}, required = true, arity = 1, + description = "Output directory where to save the JSON data models.") + public String output; + } + @Parameters(commandNames = {"load"}, commandDescription = "Load the built data models into the database") public class CustomiseCommandOptions { @@ -360,9 +397,9 @@ public DataTokenCommandOptions getDataTokenCommandOptions() { return dataTokenCommandOptions; } - public LoadCommandOptions getLoadCommandOptions() { - return loadCommandOptions; - } + public LoadCommandOptions getLoadCommandOptions() { return loadCommandOptions; } + + public ExportCommandOptions getExportCommandOptions() { return exportCommandOptions; } public IndexCommandOptions getIndexCommandOptions() { return indexCommandOptions; diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java index 8bb7bbc9eb..cfe4652c80 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java @@ -72,6 +72,9 @@ public static void main(String[] args) { case "load": commandExecutor = new LoadCommandExecutor(cliOptionsParser.getLoadCommandOptions()); break; + case "export": + commandExecutor = new ExportCommandExecutor(cliOptionsParser.getExportCommandOptions()); + break; case "index": commandExecutor = new IndexCommandExecutor(cliOptionsParser.getIndexCommandOptions()); break; diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java index dcd40ba508..8c0d477023 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java @@ -38,6 +38,8 @@ import java.util.Collections; import java.util.List; +import static org.opencb.cellbase.lib.EtlCommons.PHARMGKB_DATA; + /** * Created by imedina on 03/02/15. */ @@ -163,6 +165,9 @@ public void execute() { case EtlCommons.PUBMED_DATA: parser = buildPubMed(); break; + case EtlCommons.PHARMACOGENOMICS_DATA: + parser = buildPharmacogenomics(); + break; default: logger.error("Build option '" + buildCommandOptions.data + "' is not valid"); break; @@ -414,4 +419,22 @@ private CellBaseBuilder buildPubMed() throws IOException { CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(pubmedOutputFolder); return new PubMedBuilder(pubmedInputFolder, serializer); } + + private CellBaseBuilder buildPharmacogenomics() throws IOException { + Path inFolder = downloadFolder.resolve(EtlCommons.PHARMACOGENOMICS_DATA); + Path outFolder = buildFolder.resolve(EtlCommons.PHARMACOGENOMICS_DATA); + if (!outFolder.toFile().exists()) { + outFolder.toFile().mkdirs(); + } + + logger.info("Copying PharmGKB version file..."); + if (inFolder.resolve(PHARMGKB_DATA).resolve(EtlCommons.PHARMGKB_VERSION_FILENAME).toFile().exists()) { + Files.copy(inFolder.resolve(PHARMGKB_DATA).resolve(EtlCommons.PHARMGKB_VERSION_FILENAME), + outFolder.resolve(EtlCommons.PHARMGKB_VERSION_FILENAME), + StandardCopyOption.REPLACE_EXISTING); + } + + CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(outFolder); + return new PharmGKBBuilder(inFolder, serializer); + } } diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java index 698f2df033..f8197e6558 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java @@ -102,6 +102,9 @@ public void execute() { case EtlCommons.PUBMED_DATA: downloadFiles.addAll(downloader.downloadPubMed()); break; + case EtlCommons.PHARMACOGENOMICS_DATA: + downloadFiles.addAll(downloader.downloadPharmKGB()); + break; default: System.out.println("Value \"" + data + "\" is not allowed for the data parameter. Allowed values" + " are: {genome, gene, gene_disease_association, variation, variation_functional_score," diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java new file mode 100644 index 0000000000..19aff216c0 --- /dev/null +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java @@ -0,0 +1,494 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.app.cli.admin.executors; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.formats.protein.uniprot.v202003jaxb.Entry; +import org.opencb.biodata.models.core.*; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.Repeat; +import org.opencb.cellbase.app.cli.CommandExecutor; +import org.opencb.cellbase.app.cli.admin.AdminCliOptionsParser; +import org.opencb.cellbase.core.api.*; +import org.opencb.cellbase.core.api.query.QueryException; +import org.opencb.cellbase.core.exception.CellBaseException; +import org.opencb.cellbase.core.models.DataRelease; +import org.opencb.cellbase.core.result.CellBaseDataResult; +import org.opencb.cellbase.core.serializer.CellBaseFileSerializer; +import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer; +import org.opencb.cellbase.lib.EtlCommons; +import org.opencb.cellbase.lib.iterator.CellBaseIterator; +import org.opencb.cellbase.lib.managers.*; +import org.opencb.commons.datastore.core.QueryOptions; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.*; +import java.util.stream.Collectors; + +import static org.opencb.cellbase.lib.EtlCommons.CLINICAL_VARIANTS_DATA; +import static org.opencb.cellbase.lib.EtlCommons.OBO_DATA; + +/** + * Created by jtarraga on 29/05/23. + */ +public class ExportCommandExecutor extends CommandExecutor { + + private AdminCliOptionsParser.ExportCommandOptions exportCommandOptions; + + private String species; + private String assembly; + + private Path output; + private String[] dataToExport; + private int dataRelease; + private String token; + + private String database; + private CellBaseManagerFactory managerFactory; + + private static final int THRESHOLD_LENGTH = 1000; + + public ExportCommandExecutor(AdminCliOptionsParser.ExportCommandOptions exportCommandOptions) { + super(exportCommandOptions.commonOptions.logLevel, exportCommandOptions.commonOptions.conf); + + this.exportCommandOptions = exportCommandOptions; + + this.dataRelease = exportCommandOptions.dataRelease; + this.token = exportCommandOptions.token; + + this.output = Paths.get(exportCommandOptions.output); + + this.database = exportCommandOptions.database; + String[] splits = database.split("_"); + this.species = splits[1]; + this.assembly = splits[2]; + + if (exportCommandOptions.data.equals("all")) { + this.dataToExport = new String[]{EtlCommons.GENOME_DATA, EtlCommons.GENE_DATA, EtlCommons.REFSEQ_DATA, + EtlCommons.CONSERVATION_DATA, EtlCommons.REGULATION_DATA, EtlCommons.PROTEIN_DATA, + EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA, EtlCommons.VARIATION_DATA, + EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANTS_DATA, EtlCommons.REPEATS_DATA, + OBO_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PUBMED_DATA}; + } else { + this.dataToExport = exportCommandOptions.data.split(","); + } + } + + /** + * Parse specific 'data' command options. + * + * @throws CellBaseException CellBase exception + */ + public void execute() throws CellBaseException { + checkDataRelease(); + + logger.info("Exporting from data release {}", dataRelease); + this.managerFactory = new CellBaseManagerFactory(configuration); + + if (exportCommandOptions.data != null) { + // Get genes + List geneNames = Arrays.asList(exportCommandOptions.gene.split(",")); + GeneManager geneManager = managerFactory.getGeneManager(species, assembly); + GeneQuery geneQuery = new GeneQuery(); + geneQuery.setNames(geneNames); + geneQuery.setSource(Collections.singletonList("ensembl")); + geneQuery.setDataRelease(dataRelease); + List genes; + try { + CellBaseDataResult geneResutlts = geneManager.search(geneQuery); + genes = geneResutlts.getResults(); + } catch (QueryException | IllegalAccessException e) { + throw new CellBaseException(e.getMessage()); + } + if (CollectionUtils.isEmpty(genes)) { + throw new CellBaseException("None gene retrieved from: " + exportCommandOptions.gene); + } + // Extract regions from genes + int maxRegionSize = 50000; + List regions = new ArrayList<>(); + for (Gene gene : genes) { + int start = Math.max(1, gene.getStart() - THRESHOLD_LENGTH); + int end = gene.getEnd() + THRESHOLD_LENGTH; + logger.info("Gene {}: bounds {}:{}-{}", gene.getName(), gene.getChromosome(), start, end); + for (int pos = start; pos < end; pos += maxRegionSize) { + regions.add(new Region(gene.getChromosome(), pos, Math.min(end, pos + maxRegionSize))); + } + } + + // Add input regions + if (StringUtils.isNotEmpty(exportCommandOptions.region)) { + regions.addAll(Region.parseRegions(exportCommandOptions.region)); + } + + logger.info("{} regions: {}", regions.size(), StringUtils.join(regions.stream().map(r -> r.toString()) + .collect(Collectors.toList()), ",")); + + List variants = new ArrayList<>(); + if (areVariantsNeeded()) { + variants = getVariants(regions); + } + + for (String loadOption : dataToExport) { + try { + int counter = 0; + logger.info("Exporting '{}' data...", loadOption); + long dbTimeStart = System.currentTimeMillis(); + switch (loadOption) { + case EtlCommons.GENOME_DATA: { + GenomeManager genomeManager = managerFactory.getGenomeManager(species, assembly); + + // Genome sequence + CellBaseDataResult results = genomeManager.getGenomeSequenceRawData(regions, dataRelease); + counter = writeExportedData(results.getResults(), "genome_sequence", output); + + // Genome info + CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output); + results = genomeManager.getGenomeInfo(QueryOptions.empty(), dataRelease); + writeExportedData(results.getResults(), "genome_info", serializer); + serializer.close(); + break; + } + case EtlCommons.GENE_DATA: { + // Export data + counter = writeExportedData(genes, "gene", output); + break; + } + case EtlCommons.REFSEQ_DATA: { + // Export data + geneQuery.setSource(Collections.singletonList("refseq")); + geneQuery.setDataRelease(dataRelease); + + CellBaseDataResult results = geneManager.search(geneQuery); + counter = writeExportedData(results.getResults(), "refseq", output); + break; + } + case EtlCommons.VARIATION_DATA: { + // Export data + counter = writeExportedData(variants, "variation_chr_all", output); + break; + } + case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA: { + // Export data + VariantManager variantManager = managerFactory.getVariantManager(species, assembly); + CellBaseDataResult results = variantManager.getFunctionalScoreRegion(regions, null, + dataRelease); + counter = writeExportedData(results.getResults(), "cadd", output); + break; + } + case EtlCommons.MISSENSE_VARIATION_SCORE_DATA: { + CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output); + ProteinManager proteinManager = managerFactory.getProteinManager(species, assembly); + Map> positionMap = new HashMap<>(); + for (Variant variant : variants) { + if (!positionMap.containsKey(variant.getChromosome())) { + positionMap.put(variant.getChromosome(), new ArrayList<>()); + } + positionMap.get(variant.getChromosome()).add(variant.getStart()); + if (positionMap.get(variant.getChromosome()).size() >= 200) { + CellBaseDataResult results = proteinManager + .getMissenseVariantFunctionalScores(variant.getChromosome(), + positionMap.get(variant.getChromosome()), null, dataRelease); + counter += writeExportedData(results.getResults(), "missense_variation_functional_score", serializer); + positionMap.put(variant.getChromosome(), new ArrayList<>()); + } + } + + // Process map + for (Map.Entry> entry : positionMap.entrySet()) { + if (CollectionUtils.isEmpty(entry.getValue())) { + continue; + } + CellBaseDataResult results = proteinManager + .getMissenseVariantFunctionalScores(entry.getKey(), entry.getValue(), null, dataRelease); + counter += writeExportedData(results.getResults(), "missense_variation_functional_score", serializer); + } + serializer.close(); + break; + } + case EtlCommons.CONSERVATION_DATA: { + // Export data + CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output); + GenomeManager genomeManager = managerFactory.getGenomeManager(species, assembly); + CellBaseDataResult results = genomeManager.getConservationScoreRegion(regions, null, + dataRelease); + for (GenomicScoreRegion scoreRegion : results.getResults()) { + String chromosome = scoreRegion.getChromosome(); + if (chromosome.equals("M")) { + chromosome = "MT"; + } + serializer.serialize(scoreRegion, "conservation_" + chromosome); + counter++; + } + serializer.close(); + break; + } + case EtlCommons.REGULATION_DATA: { + RegulatoryManager regulatoryManager = managerFactory.getRegulatoryManager(species, assembly); + RegulationQuery query = new RegulationQuery(); + query.setRegions(regions); + query.setDataRelease(dataRelease); + CellBaseDataResult results = regulatoryManager.search(query); + counter = writeExportedData(results.getResults(), "regulatory_region", output); + break; + } + case EtlCommons.PROTEIN_DATA: { + ProteinManager proteinManager = managerFactory.getProteinManager(species, assembly); + ProteinQuery query = new ProteinQuery(); + query.setGenes(geneNames); + query.setDataRelease(dataRelease); + CellBaseDataResult results = proteinManager.search(query); + counter = writeExportedData(results.getResults(), "protein", output); + break; + } + case EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA: { + ProteinManager proteinManager = managerFactory.getProteinManager(species, assembly); + Map> transcriptsMap = new HashMap<>(); + for (Gene gene : genes) { + for (Transcript transcript : gene.getTranscripts()) { + if (!transcriptsMap.containsKey(transcript.getChromosome())) { + transcriptsMap.put(transcript.getChromosome(), new ArrayList<>()); + } + transcriptsMap.get(transcript.getChromosome()).add(transcript.getId().split("\\.")[0]); + } + } + CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output); + for (Map.Entry> entry : transcriptsMap.entrySet()) { + CellBaseDataResult results = proteinManager.getProteinSubstitutionRawData(entry.getValue(), null, + dataRelease); + counter += writeExportedData(results.getResults(), "prot_func_pred_chr_" + entry.getKey(), output); + } + serializer.close(); + break; + } + case EtlCommons.CLINICAL_VARIANTS_DATA: { + counter = exportClinicalVariantData(regions); + break; + } + case EtlCommons.REPEATS_DATA: { + // Export data + RepeatsManager repeatsManager = managerFactory.getRepeatsManager(species, assembly); + RepeatsQuery repeatsQuery = new RepeatsQuery(); + repeatsQuery.setRegions(regions); + repeatsQuery.setDataRelease(dataRelease); + CellBaseDataResult results = repeatsManager.search(repeatsQuery); + counter = writeExportedData(results.getResults(), "repeats", output); + break; + } + case OBO_DATA: { + counter = exportOntologyData(); + break; + } + case EtlCommons.SPLICE_SCORE_DATA: { + counter = exportSpliceScoreData(variants); + break; + } +// case EtlCommons.PUBMED_DATA: { +// // Load data, create index and update release +// loadPubMed(); +// break; +// } + default: + logger.warn("Not valid 'data'. We should not reach this point"); + break; + } + long dbTimeEnd = System.currentTimeMillis(); + logger.info("Exported {} '{}' items in {} ms!", counter, loadOption, dbTimeEnd - dbTimeStart); + } catch (IllegalAccessException | IOException | QueryException e) { + e.printStackTrace(); + } + } + } + } + + private int exportClinicalVariantData(List regions) throws CellBaseException, QueryException, IllegalAccessException, + IOException { + String baseFilename = CLINICAL_VARIANTS_DATA + ".full"; + CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output, baseFilename); + ClinicalManager clinicalManager = managerFactory.getClinicalManager(species, assembly); + ClinicalVariantQuery query = new ClinicalVariantQuery(); + query.setDataRelease(dataRelease); + query.setToken(token); + int counter = 0; + for (Region region : regions) { + query.setRegions(Collections.singletonList(region)); + CellBaseDataResult results = clinicalManager.search(query); + logger.info("{} retrieved clinical variant data from region {}", results.getNumResults(), region); + for (Variant variant : results.getResults()) { + serializer.serialize(variant); + counter++; + if (counter % 1000 == 0) { + logger.info("{} clinical variants written....", counter); + } + } + } + serializer.close(); + return counter; + } + + private int exportOntologyData() throws CellBaseException, IOException { + int counter = 0; + CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output, OBO_DATA); + OntologyManager ontologyManager = managerFactory.getOntologyManager(species, assembly); + CellBaseIterator iterator = ontologyManager.iterator(new OntologyQuery()); + while (iterator.hasNext()) { + serializer.serialize(iterator.next()); + counter++; + if (counter % 5000 == 0) { + logger.info("{} ontology terms written....", counter); + } + } + serializer.close(); + return counter; + } + + private int exportSpliceScoreData(List variants) throws CellBaseException, IOException { + int counter = 0; + CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output.resolve("splice_score")); + serializer.getOutdir().resolve("mmsplice").toFile().mkdirs(); + serializer.getOutdir().resolve("spliceai").toFile().mkdirs(); + VariantManager variantManager = managerFactory.getVariantManager(species, assembly); + int maxNumVariants = 200; + for (int start = 0; start < variants.size(); start += maxNumVariants) { + List vars = variants.subList(start, Math.min(start + maxNumVariants, variants.size())); + logger.info("Searching splice scores in variants [{}..{})", start, Math.min(start + maxNumVariants, variants.size())); + List> resultList = variantManager.getSpliceScoreVariant(vars, null, + dataRelease); + for (CellBaseDataResult result : resultList) { + for (SpliceScore spliceScore : result.getResults()) { + switch (spliceScore.getSource()) { + case "MMSplice": { + serializer.serialize(spliceScore, "mmsplice/splice_score_all"); + counter++; + break; + } + case "SpliceAI": { + serializer.serialize(spliceScore, "spliceai/splice_score_all"); + counter++; + break; + } + default: + logger.info("Splice score unknown, skipping it!"); + break; + } + if (counter % 10000 == 0) { + logger.info("{} splice scores written....", counter); + } + } + } + } + serializer.close(); + return counter; + } + + private List getVariants(List regions) throws CellBaseException { + List variants = new ArrayList<>(); + VariantManager variantManager = managerFactory.getVariantManager(species, assembly); + VariantQuery query = new VariantQuery(); + query.setDataRelease(dataRelease); + int batchSize = 10; + for (Region region : regions) { + query.setRegions(Collections.singletonList(region)); + try { + List results = variantManager.search(query).getResults(); + logger.info("{} retrieved variants from region {}", results.size(), region); + variants.addAll(results); + } catch (QueryException | IllegalAccessException e) { + throw new CellBaseException("Searching variants: " + e.getMessage()); + } + } + logger.info("Total variants retrieved: {}", variants.size()); + return variants; + } + + private boolean areVariantsNeeded() { + for (String data : dataToExport) { + if (data.equals(EtlCommons.VARIATION_DATA) + || data.equals(EtlCommons.MISSENSE_VARIATION_SCORE_DATA) + || data.equals(EtlCommons.SPLICE_SCORE_DATA)) { + // || data.equals(EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA)) { + return true; + } + } + return false; + } + + private int writeExportedData(List objects, String baseFilename, CellBaseFileSerializer serializer) throws IOException { + int counter = 0; + for (Object object : objects) { + serializer.serialize(object, baseFilename); + counter++; + } + return counter; + } + + private int writeExportedData(List objects, String baseFilename, Path outDir) throws IOException { + checkPath(outDir); + int counter = 0; + CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(outDir); + for (Object object : objects) { + serializer.serialize(object, baseFilename); + counter++; + } + serializer.close(); + return counter; + } + + private int writeExportedDataList(List> results, String baseFilename, Path outDir) throws IOException { + checkPath(outDir); + int counter = 0; + CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(outDir); + for (CellBaseDataResult result : results) { + for (Object object : result.getResults()) { + serializer.serialize(object, baseFilename); + counter++; + } + } + serializer.close(); + return counter; + } + + private void checkPath(Path outDir) throws IOException { + if (!outDir.toFile().exists()) { + if (!outDir.toFile().mkdirs()) { + throw new IOException("Impossible to create output directory: " + outDir); + } + } + } + + private void checkDataRelease() throws CellBaseException { + // Check data release + DataReleaseManager dataReleaseManager = new DataReleaseManager(database, configuration); + CellBaseDataResult dataReleaseResults = dataReleaseManager.getReleases(); + if (CollectionUtils.isEmpty(dataReleaseResults.getResults())) { + throw new CellBaseException("No data releases are available"); + } + + List dataReleaseList = new ArrayList<>(); + for (DataRelease dr : dataReleaseResults.getResults()) { + if (dr.getRelease() == dataRelease) { + return; + } + dataReleaseList.add(dr.getRelease()); + } + + throw new CellBaseException("Invalid data release: " + dataRelease + ". Valid data releases are: " + + StringUtils.join(dataReleaseList, ",")); + } +} diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java index 2eed03257d..5a8fd9417b 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java @@ -80,7 +80,8 @@ public LoadCommandExecutor(AdminCliOptionsParser.LoadCommandOptions loadCommandO EtlCommons.CONSERVATION_DATA, EtlCommons.REGULATION_DATA, EtlCommons.PROTEIN_DATA, EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA, EtlCommons.VARIATION_DATA, EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANTS_DATA, EtlCommons.REPEATS_DATA, - EtlCommons.OBO_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PUBMED_DATA}; + EtlCommons.OBO_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PUBMED_DATA, + EtlCommons.PHARMACOGENOMICS_DATA}; } else { loadOptions = loadCommandOptions.data.split(","); } @@ -125,7 +126,11 @@ public void execute() throws CellBaseException { switch (loadOption) { case EtlCommons.GENOME_DATA: { // Load data - loadIfExists(input.resolve("genome_info.json"), "genome_info"); + if (input.resolve("genome_info.json").toFile().exists()) { + loadIfExists(input.resolve("genome_info.json"), "genome_info"); + } else { + loadIfExists(input.resolve("genome_info.json.gz"), "genome_info"); + } loadIfExists(input.resolve("genome_sequence.json.gz"), "genome_sequence"); // Create index @@ -289,6 +294,11 @@ public void execute() throws CellBaseException { loadPubMed(); break; } + case EtlCommons.PHARMACOGENOMICS_DATA: { + // Load data, create index and update release + loadPharmacogenomica(); + break; + } default: logger.warn("Not valid 'data'. We should not reach this point"); break; @@ -546,12 +556,39 @@ private void loadPubMed() throws CellBaseException { // Update release (collection and sources) List sources = Collections.singletonList(pubmedPath.resolve(EtlCommons.PUBMED_VERSION_FILENAME)); - dataReleaseManager.update(dataRelease, "pubmed", EtlCommons.REPEATS_DATA, sources); + dataReleaseManager.update(dataRelease, EtlCommons.PUBMED_DATA, EtlCommons.PUBMED_DATA, sources); } else { logger.warn("PubMed folder {} not found", pubmedPath); } } + private void loadPharmacogenomica() throws IOException, CellBaseException { + Path pharmaPath = input.resolve(EtlCommons.PHARMACOGENOMICS_DATA); + + if (!Files.exists(pharmaPath)) { + logger.warn("Pharmacogenomics folder {} not found to load", pharmaPath); + return; + } + + // Load data + Path pharmaJsonPath = pharmaPath.resolve(EtlCommons.PHARMACOGENOMICS_DATA + ".json.gz"); + logger.info("Loading file '{}'", pharmaJsonPath.toFile().getName()); + try { + loadRunner.load(pharmaJsonPath, EtlCommons.PHARMACOGENOMICS_DATA, dataRelease); + } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException + | IllegalAccessException | ExecutionException | IOException | InterruptedException | CellBaseException + | LoaderException e) { + logger.error("Error loading file '{}': {}", pharmaJsonPath.toFile().getName(), e.toString()); + } + + // Create index + createIndex(EtlCommons.PHARMACOGENOMICS_DATA); + + // Update release (collection and sources) + List sources = Collections.singletonList(pharmaPath.resolve(EtlCommons.PHARMGKB_VERSION_FILENAME)); + dataReleaseManager.update(dataRelease, EtlCommons.PHARMACOGENOMICS_DATA, EtlCommons.PHARMACOGENOMICS_DATA, sources); + } + private void createIndex(String collection) { if (!createIndexes) { return; diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/CellBaseCliOptionsParser.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/CellBaseCliOptionsParser.java index e73260e7b3..91101246b9 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/CellBaseCliOptionsParser.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/CellBaseCliOptionsParser.java @@ -145,30 +145,30 @@ public class VariantAnnotationCommandOptions { required = false, arity = 1) public String referenceFasta; - @Parameter(names = {"--skip-normalize"}, description = "Skip normalization of input variants. Should not be used" + @Parameter(names = {"normalize"}, description = "Normalize input variants. Should be used" + " when the input (-i, --input-file) is a VCF file. Normalization includes splitting multi-allele positions " + "read from a VCF, allele trimming and decomposing MNVs. Has" + " no effect if reading variants from a CellBase variation collection " + "(\"--input-variation-collection\") or running a variant annotation benchmark (\"--benchmark\"): in" + " these two cases variant normalization is never carried out.", required = false, arity = 0) - public boolean skipNormalize = false; + public boolean normalize = false; - @Parameter(names = {"--skip-decompose"}, description = "Use this flag to avoid decomposition of " + @Parameter(names = {"--decompose"}, description = "Use this flag to decompose of " + "multi-nucleotide-variants (MNVs) / block substitutions as part of the normalization process. If this" - + " flag is NOT activated, as a step during the normalization process reference and alternate alleles" + + " flag is activated, as a step during the normalization process reference and alternate alleles" + " from MNVs/Block substitutions will be aligned and decomposed into their forming simple variants. " - + " This flag has no effect if --skip-normalize is present.", + + " This flag has no effect without normalization.", required = false, arity = 0) - public boolean skipDecompose = false; + public boolean decompose = false; - @Parameter(names = {"--skip-left-align"}, description = "Use this flag to avoid left alignment as part of the" + @Parameter(names = {"--left-align"}, description = "Use this flag left align as part of the" + " normalization process. If this" - + " flag is NOT activated, as a step during the normalization process will left align the variant with" + + " flag is activated, as a step during the normalization process will left align the variant with" + " respect to the reference genome." - + " This flag has no effect if --skip-normalize is present.", + + " This flag has no effect without normalization.", required = false, arity = 0) - public boolean skipLeftAlign = false; + public boolean leftAlign = false; // TODO: remove "phased" CLI parameter in next release. Default behavior from here onwards should be // ignorePhase = false diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java index a0c3ca3008..d6e19c61e2 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java @@ -564,7 +564,7 @@ private void checkParameters() throws IOException, CellBaseException { FileUtils.checkDirectory(input); normalize = false; } else { - normalize = !variantAnnotationCommandOptions.skipNormalize; + normalize = variantAnnotationCommandOptions.normalize; FileUtils.checkFile(input); inputFormat = getFileFormat(input); } @@ -574,8 +574,8 @@ private void checkParameters() throws IOException, CellBaseException { } parsePhaseConfiguration(); - decompose = !variantAnnotationCommandOptions.skipDecompose; - leftAlign = !variantAnnotationCommandOptions.skipLeftAlign; + decompose = variantAnnotationCommandOptions.decompose; + leftAlign = variantAnnotationCommandOptions.leftAlign; // Update serverQueryOptions serverQueryOptions.put("checkAminoAcidChange", variantAnnotationCommandOptions.checkAminoAcidChange); @@ -621,7 +621,7 @@ private void checkParameters() throws IOException, CellBaseException { // to the server. Actual normalization and decomposition options are set and processed here in the server code // using this.decompose and this.normalize fields. serverQueryOptions.add("normalize", false); - serverQueryOptions.add("skipDecompose", true); + serverQueryOptions.add("decompose", false); if (variantAnnotationCommandOptions.include != null && !variantAnnotationCommandOptions.include.isEmpty()) { serverQueryOptions.add("include", variantAnnotationCommandOptions.include); diff --git a/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java b/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java index 9ce1fcb3d2..22b93d33bd 100644 --- a/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java +++ b/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java @@ -935,9 +935,9 @@ private void cleanUp() throws IOException { variantAnnotationCommandOptions.benchmark = false; variantAnnotationCommandOptions.phased = true; variantAnnotationCommandOptions.input = inputFilename; - variantAnnotationCommandOptions.skipNormalize = false; - variantAnnotationCommandOptions.skipDecompose = !decompose; - variantAnnotationCommandOptions.skipLeftAlign = false; + variantAnnotationCommandOptions.normalize = true; + variantAnnotationCommandOptions.decompose = decompose; + variantAnnotationCommandOptions.leftAlign = true; variantAnnotationCommandOptions.output = Paths.get(OUTPUT_FILENAME).toString(); variantAnnotationCommandOptions.outputFormat = "json"; variantAnnotationCommandOptions.include = "cytobands"; diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 034ba0b665..7dd719a482 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.5.0 + 5.7.0-SNAPSHOT ../pom.xml diff --git a/cellbase-client/src/main/resources/client-configuration.yml b/cellbase-client/src/main/resources/client-configuration.yml index f13e1f33fa..5db9eeaca8 100644 --- a/cellbase-client/src/main/resources/client-configuration.yml +++ b/cellbase-client/src/main/resources/client-configuration.yml @@ -1,10 +1,10 @@ --- -version: "v5" +version: "v5.1" defaultSpecies: "hsapiens" ## These are the RESTful configurations parameters rest: hosts: - - "bioinfodev.hpc.cam.ac.uk/cellbase-5.0.0" - timeout: 2000 + - "https://ws.zettagenomics.com/cellbase" + timeout: 10000 diff --git a/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java b/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java index 867d32ffd2..24c6486a04 100644 --- a/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java +++ b/cellbase-client/src/test/java/org/opencb/cellbase/client/rest/VariantClientTest.java @@ -57,6 +57,7 @@ public void getAnnotationId(CellBaseClient cellBaseClient) throws Exception { @ParameterizedTest @ArgumentsSource(CellbaseClientProvider.class) + @Disabled public void getAnnotations(CellBaseClient cellBaseClient) throws Exception { CellBaseDataResponse annotationsGet; diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 0d4d0fba8d..30a4d6ec63 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.5.0 + 5.7.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java index 3d24a3050d..a07f6daa64 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/ParamConstants.java @@ -119,6 +119,7 @@ public Type type() { public static final String ASSEMBLY_DESCRIPTION = "Set the reference genome assembly, e.g. grch38. For a full list of " + "potentially available assemblies, please call the endpoint 'meta/species'"; + public static final String COUNT_DESCRIPTION = "Get the total number of results matching the query. "; public static final String SPLIT_RESULT_DESCRIPTION = "If TRUE, each id provided will be a separate result, even if no records are " @@ -174,6 +175,7 @@ public Type type() { + "e.g. Diabetes mellitus,histone kinase activity"; public static final String ONTOLOGY_NAMESPACES = "Comma separated list of namespaces, e.g. human_phenotype,biological_process. " + "For a full list of potentially available namespaces, please call the endpoint 'feature/ontology/distinct?field=namespace'"; + public static final String ONTOLOGY_SOURCES = "Name of ontology, e.g. HP or GO"; public static final String ONTOLOGY_SYNONYMS = "Comma separated list of synonyms, e.g. Cerebellar ataxia"; public static final String ONTOLOGY_XREFS = "Comma separated list of cross references, e.g. MSH:D002524"; @@ -299,6 +301,7 @@ public Type type() { + " Possible values are clinvar or cosmic"; public static final String SOURCE_PARAM = "source"; public static final String SEQUENCE_ONTOLOGY_DESCRIPTION = "Comma separated list of consequence types, " + + "e.g. missense_variant. Exact text matches will be returned. To get list of searchable consequence types, please call" + " the endpoint 'genomic/variant/consequenceTypes'"; public static final String SEQUENCE_ONTOLOGY_PARAM = "consequenceType"; @@ -332,6 +335,7 @@ public Type type() { public static final String MODE_INHERITANCE_PARAM = "modeInheritance"; public static final String ALLELE_ORIGIN_DESCRIPTION = "Comma separated list of allele origin labels. To get the list of searchable " + "allele origin labels, please call the endpoint 'clinical/variant/alleleOriginLabels'"; + public static final String ALLELE_ORIGIN_PARAM = "alleleOrigin"; // --------------------------------------------- @@ -340,9 +344,11 @@ public Type type() { + "19:45411941:T:C,14:38679764:-:GATCTG,1:6635210:G:-," + "2:114340663:GCTGGGCATCCT:ACTGGGCATCCT,1:816505-825225:"; public static final String NORMALISE = "Boolean to indicate whether input variants shall be " - + "normalized or not. Normalization process does NOT include decomposing "; - public static final String SKIP_DECOMPOSE = "Boolean to indicate whether input MNVs should be " - + "decomposed or not as part of the normalisation step. MNV decomposition is strongly encouraged."; + + "normalized or not. Normalization process does NOT include decomposing nor left alignment."; + public static final String DECOMPOSE = "Boolean to indicate whether input MNVs should be " + + "decomposed or not as part of the normalisation step."; + public static final String LEFT_ALIGN = "Boolean to indicate whether input ambiguous INDELS should be " + + "left aligned or not as part of the normalisation step."; public static final String IGNORE_PHASE = "Boolean to indicate whether phase data should be taken into account."; public static final String PHASED = "DEPRECATED. Will be removed in next release. Please, use ignorePhase instead. " + " Boolean to indicate whether phase should be considered during the annotation process"; @@ -378,6 +384,7 @@ public Type type() { public static final String REGULATION_FEATURE_TYPES = "Comma separated list of regulatory region types, e.g.: " + "TF_binding_site,histone_acetylation_site. Exact text matches will be returned."; + public static final String CELLTYPE = "Type of cell."; // --------------------------------------------- diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/PharmaChemicalQuery.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/PharmaChemicalQuery.java new file mode 100644 index 0000000000..c2fec9ceb3 --- /dev/null +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/PharmaChemicalQuery.java @@ -0,0 +1,226 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.core.api; + +import org.apache.commons.collections4.CollectionUtils; +import org.opencb.cellbase.core.api.query.AbstractQuery; +import org.opencb.cellbase.core.api.query.QueryException; +import org.opencb.cellbase.core.api.query.QueryParameter; + +import java.util.List; +import java.util.Map; + +public class PharmaChemicalQuery extends AbstractQuery { + + @QueryParameter(id = "id") + private List ids; + + @QueryParameter(id = "name") + private List names; + + @QueryParameter(id = "source", allowedValues = {"PharmGKB"}) + private List sources; + + @QueryParameter(id = "types", alias = {"type"}) + private List types; + + @QueryParameter(id = "variants.variantId", alias = {"variant"}) + private List variants; + + @QueryParameter(id = "variants.location", alias = {"location"}) + private List locations; + + @QueryParameter(id = "variants.chromosome", alias = {"chromosome"}) + private List chromosomes; + + @QueryParameter(id = "variants.haplotypes", alias = {"haplotype"}) + private List hapolotypes; + + @QueryParameter(id = "variants.geneNames", alias = {"geneName"}) + private List geneNames; + + @QueryParameter(id = "variants.phenotypes", alias = {"phenotype"}) + private List phenotypes; + + @QueryParameter(id = "variants.phenotypeTypes", alias = {"phenotypeType"}) + private List phenotypeTypes; + + @QueryParameter(id = "variants.confidence", alias = {"confidence"}) + private List confidences; + + @QueryParameter(id = "variants.evidences.pubmed", alias = {"pubmedId"}) + private List pubmedIds; + + public PharmaChemicalQuery() { + } + + public PharmaChemicalQuery(Map params) throws QueryException { + super(params); + + objectMapper.readerForUpdating(this); + objectMapper.readerFor(PharmaChemicalQuery.class); + objectWriter = objectMapper.writerFor(PharmaChemicalQuery.class); + } + + @Override + protected void validateQuery() throws QueryException { + if (CollectionUtils.isNotEmpty(variants)) { + for (String variant : variants) { + if (!variant.startsWith("rs")) { + throw new QueryException("Invalid variant ID: '" + variant + "'; it has to start with rs"); + } + } + } + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("PharmaChemicalQuery{"); + sb.append("ids=").append(ids); + sb.append(", names=").append(names); + sb.append(", sources=").append(sources); + sb.append(", types=").append(types); + sb.append(", variants=").append(variants); + sb.append(", locations=").append(locations); + sb.append(", chromosomes=").append(chromosomes); + sb.append(", hapolotypes=").append(hapolotypes); + sb.append(", geneNames=").append(geneNames); + sb.append(", phenotypes=").append(phenotypes); + sb.append(", phenotypeTypes=").append(phenotypeTypes); + sb.append(", confidences=").append(confidences); + sb.append(", pubmedIds=").append(pubmedIds); + sb.append('}'); + return sb.toString(); + } + + public List getIds() { + return ids; + } + + public PharmaChemicalQuery setIds(List ids) { + this.ids = ids; + return this; + } + + public List getNames() { + return names; + } + + public PharmaChemicalQuery setNames(List names) { + this.names = names; + return this; + } + + public List getSources() { + return sources; + } + + public PharmaChemicalQuery setSources(List sources) { + this.sources = sources; + return this; + } + + public List getTypes() { + return types; + } + + public PharmaChemicalQuery setTypes(List types) { + this.types = types; + return this; + } + + public List getVariants() { + return variants; + } + + public PharmaChemicalQuery setVariants(List variants) { + this.variants = variants; + return this; + } + + public List getLocations() { + return locations; + } + + public PharmaChemicalQuery setLocations(List locations) { + this.locations = locations; + return this; + } + + public List getChromosomes() { + return chromosomes; + } + + public PharmaChemicalQuery setChromosomes(List chromosomes) { + this.chromosomes = chromosomes; + return this; + } + + public List getHapolotypes() { + return hapolotypes; + } + + public PharmaChemicalQuery setHapolotypes(List hapolotypes) { + this.hapolotypes = hapolotypes; + return this; + } + + public List getGeneNames() { + return geneNames; + } + + public PharmaChemicalQuery setGeneNames(List geneNames) { + this.geneNames = geneNames; + return this; + } + + public List getPhenotypes() { + return phenotypes; + } + + public PharmaChemicalQuery setPhenotypes(List phenotypes) { + this.phenotypes = phenotypes; + return this; + } + + public List getPhenotypeTypes() { + return phenotypeTypes; + } + + public PharmaChemicalQuery setPhenotypeTypes(List phenotypeTypes) { + this.phenotypeTypes = phenotypeTypes; + return this; + } + + public List getConfidences() { + return confidences; + } + + public PharmaChemicalQuery setConfidences(List confidences) { + this.confidences = confidences; + return this; + } + + public List getPubmedIds() { + return pubmedIds; + } + + public PharmaChemicalQuery setPubmedIds(List pubmedIds) { + this.pubmedIds = pubmedIds; + return this; + } +} diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/query/AbstractQuery.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/query/AbstractQuery.java index 50b0451e5f..c3d2b4e6ae 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/query/AbstractQuery.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/query/AbstractQuery.java @@ -122,7 +122,10 @@ public void updateParams(ObjectMap objectMap) { public void updateParams(Map uriParams) { classAttributesToType = getClassAttributesToType(); annotations = getAnnotations(); + try { + validateParams(uriParams, classAttributesToType, annotations); + Map objectHashMap = new HashMap<>(); for (Map.Entry> entry : classAttributesToType.entrySet()) { String fieldNameDotNotation = null; @@ -172,11 +175,48 @@ public void updateParams(Map uriParams) { } } objectMapper.updateValue(this, objectHashMap); - } catch (JsonProcessingException e) { + } catch (JsonProcessingException | QueryException e) { throw new IllegalArgumentException(e); } } + private void validateParams(Map uriParams, Map> classAttributesToType, + Map annotations) throws QueryException { + for (String uriParamName : uriParams.keySet()) { + boolean validUriParamName = false; + for (Map.Entry> entry : classAttributesToType.entrySet()) { + String fieldNameDotNotation = null; + String[] fieldAliases = new String[0]; + String fieldNameCamelCase = entry.getKey(); + QueryParameter queryParameter = annotations.get(fieldNameCamelCase); + if (queryParameter != null) { + fieldNameDotNotation = queryParameter.id(); + fieldAliases = queryParameter.alias(); + } + if (fieldNameDotNotation == null) { + // field has no annotation + continue; + } + String s = fieldNameDotNotation.replace("\\.", "\\\\."); + if (uriParamName.equals(s)) { + validUriParamName = true; + break; + } else { + for (String alias : fieldAliases) { + s = alias.replace("\\.", "\\\\."); + if (uriParamName.equals(s)) { + validUriParamName = true; + break; + } + } + } + } + if (!validUriParamName) { + throw new QueryException("Unknown query parameter '" + uriParamName + "'"); + } + } + } + /** * For this Query class, returns a map of class attributes and the types of those attributes. * diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java index e05b19a065..ee4216f560 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java @@ -73,6 +73,7 @@ public class DownloadProperties { private URLProperties goAnnotation; private URLProperties revel; private URLProperties pubmed; + private URLProperties pharmGKB; public EnsemblProperties getEnsembl() { return ensembl; @@ -475,6 +476,15 @@ public DownloadProperties setPubmed(URLProperties pubmed) { return this; } + public URLProperties getPharmGKB() { + return pharmGKB; + } + + public DownloadProperties setPharmGKB(URLProperties pharmGKB) { + this.pharmGKB = pharmGKB; + return this; + } + public DownloadProperties setRefSeqProteinFasta(URLProperties refSeqProteinFasta) { this.refSeqProteinFasta = refSeqProteinFasta; return this; diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml index 343f14abd4..0f8d199118 100644 --- a/cellbase-core/src/main/resources/configuration.yml +++ b/cellbase-core/src/main/resources/configuration.yml @@ -1,5 +1,5 @@  -version: "v5" +version: "${CELLBASE.VERSION}" apiVersion: "${project.version}" wiki: https://github.com/opencb/cellbase/wiki maintenanceFlagFile: "/tmp/maintenance" @@ -164,6 +164,19 @@ download: host: https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/ files: - pubmed22n[1..1114..4].xml.gz + pharmGKB: + host: https://www.pharmgkb.org/downloads + version: v1 + files: + - https://api.pharmgkb.org/v1/download/file/data/genes.zip + - https://api.pharmgkb.org/v1/download/file/data/chemicals.zip + - https://api.pharmgkb.org/v1/download/file/data/variants.zip + - https://api.pharmgkb.org/v1/download/file/data/guidelineAnnotations.json.zip + - https://api.pharmgkb.org/v1/download/file/data/variantAnnotations.zip + - https://api.pharmgkb.org/v1/download/file/data/clinicalAnnotations.zip + - https://api.pharmgkb.org/v1/download/file/data/clinicalVariants.zip + - https://api.pharmgkb.org/v1/download/file/data/drugLabels.zip + - https://api.pharmgkb.org/v1/download/file/data/relationships.zip species: vertebrates: - id: hsapiens diff --git a/cellbase-core/src/test/java/org/opencb/cellbase/core/GeneQueryTest.java b/cellbase-core/src/test/java/org/opencb/cellbase/core/GeneQueryTest.java index 0e0c8dbf24..d0d2bf263d 100644 --- a/cellbase-core/src/test/java/org/opencb/cellbase/core/GeneQueryTest.java +++ b/cellbase-core/src/test/java/org/opencb/cellbase/core/GeneQueryTest.java @@ -144,11 +144,11 @@ public void testCount() throws QueryException { } @Test - public void testValidate() throws QueryException, NoSuchFieldException, IllegalAccessException { + public void testValidate() throws QueryException { paramMap.put("id", "1"); paramMap.put("name", "42"); paramMap.put("biotype", "a,b,c"); - paramMap.put("annotation.drugs.gene", "x,y"); + paramMap.put("annotation.drugs.drugName", "x,y"); paramMap.put("limit", "11"); paramMap.put("skip", "-1"); diff --git a/cellbase-core/src/test/resources/configuration.yml b/cellbase-core/src/test/resources/configuration.yml index c9a5d98743..64ce73d692 100644 --- a/cellbase-core/src/test/resources/configuration.yml +++ b/cellbase-core/src/test/resources/configuration.yml @@ -1,4 +1,4 @@ -version: v5 +version: ${CELLBASE.VERSION} apiVersion: "${project.version}" wiki: https://github.com/opencb/cellbase/wiki maintenanceFlagFile: "/tmp/maintenance" diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index f862a80d5f..9159359ffe 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.5.0 + 5.7.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java index dd2b6caff9..4396f0c2f1 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java @@ -48,6 +48,11 @@ public class EtlCommons { public static final String CLINICAL_VARIANTS_DATA = "clinical_variants"; public static final String SPLICE_SCORE_DATA = "splice_score"; + public static final String PHARMACOGENOMICS_DATA = "pharmacogenomics"; + public static final String PHARMGKB_NAME = "PharmGKB"; + public static final String PHARMGKB_DATA = "pharmgkb"; + public static final String PHARMGKB_VERSION_FILENAME = "pharmgkbVersion.json"; + public static final String CLINICAL_VARIANTS_FOLDER = "clinicalVariant"; public static final String CLINVAR_VERSION = "2022.11"; public static final String CLINVAR_DATE = "2022-11"; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PharmGKBBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PharmGKBBuilder.java new file mode 100644 index 0000000000..1f7a4836ca --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PharmGKBBuilder.java @@ -0,0 +1,1016 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.builders; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.models.core.Xref; +import org.opencb.biodata.models.pharma.*; +import org.opencb.biodata.models.pharma.guideline.BasicObject; +import org.opencb.cellbase.core.serializer.CellBaseFileSerializer; +import org.opencb.commons.utils.FileUtils; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.util.*; +import java.util.stream.Collectors; + +import static org.opencb.cellbase.lib.EtlCommons.*; + +public class PharmGKBBuilder extends CellBaseBuilder { + + private final Path inputDir; + private final Path pharmGKBDir; + + private static final String CHEMICALS_BASENAME = "chemicals"; + private static final String CHEMICALS_TSV_FILENAME = "chemicals.tsv"; + + private static final String VARIANTS_BASENAME = "variants"; + private static final String VARIANTS_TSV_FILENAME = "variants.tsv"; + + private static final String GENES_BASENAME = "genes"; + private static final String GENES_TSV_FILENAME = "genes.tsv"; + + private static final String CLINICAL_ANNOTATIONS_BASENAME = "clinicalAnnotations"; + private static final String CLINICAL_ANNOTATIONS_TSV_FILENAME = "clinical_annotations.tsv"; + private static final String CLINICAL_ANN_ALLELES_TSV_FILENAME = "clinical_ann_alleles.tsv"; + private static final String CLINICAL_ANN_EVIDENCE_TSV_FILENAME = "clinical_ann_evidence.tsv"; + + private static final String VARIANT_ANNOTATIONS_BASENAME = "variantAnnotations"; + private static final String VARIANT_ANNOTATIONS_TSV_FILENAME = "var_drug_ann.tsv"; + private static final String PHENOTYPE_ANNOTATIONS_TSV_FILENAME = "var_pheno_ann.tsv"; + private static final String FUNCTIONAL_ANNOTATIONS_TSV_FILENAME = "var_fa_ann.tsv"; + private static final String STUDY_PARAMETERS_TSV_FILENAME = "study_parameters.tsv"; + + private static final String GUIDELINE_ANNOTATIONS_BASENAME = "guidelineAnnotations"; + + private static final String DRUG_LABELS_BASENAME = "drugLabels"; + private static final String DRUG_LABELS_TSV_FILENAME = "drugLabels.tsv"; + + private static final String RELATIONSHIPS_BASENAME = "relationships"; + private static final String RELATIONSHIPS_TSV_FILENAME = "relationships.tsv"; + + private static final String GUIDELINE_ANNOTATION_EVIDENCE_TYPE = "Guideline Annotation"; + private static final String DRUG_LABEL_ANNOTATION_EVIDENCE_TYPE = "Label Annotation"; + private static final String VARIANT_ANNOTATION_EVIDENCE_TYPE = "Variant Drug Annotation"; + private static final String FUNCTIONAL_ANNOTATION_EVIDENCE_TYPE = "Variant Functional Assay Annotation"; + private static final String PHENOTYPE_ANNOTATION_EVIDENCE_TYPE = "Variant Phenotype Annotation"; + + private static final String LOCATION_KEY = "location"; + private static final String CHROMOSOME_KEY = "chrom"; + private static final String POSITION_KEY = "pos"; + + private static final String GENE_ENTITY = "Gene"; + private static final String CHEMICAL_ENTITY = "Chemical"; + + private static final String PHARMGKB_ID_KEY = "PHARMGKB_ID"; + private static final String PHARMGKB_ASSOCIATION_TYPE_KEY = "PHARMGKB_ASSOCIATION_TYPE"; + private static final String PHARMGKB_LEVEL_OVERRIDE_KEY = "PHARMGKB_LEVEL_OVERRIDE"; + private static final String PHARMGKB_LEVEL_MODIFIERS_KEY = "PHARMGKB_LEVEL_MODIFIERS"; + private static final String PHARMGKB_LAST_UPDATE_DATE_KEY = "PHARMGKB_LAST_UPDATE_DATE"; + private static final String PHARMGKB_IS_VIP_KEY = "PHARMGKB_IS_VIP"; + + public PharmGKBBuilder(Path inputDir, CellBaseFileSerializer serializer) { + super(serializer); + + this.inputDir = inputDir; + this.pharmGKBDir = inputDir.resolve(PHARMGKB_DATA); + } + + @Override + public void parse() throws Exception { + // Check input folder + FileUtils.checkDirectory(inputDir); + + // PharmGKB + FileUtils.checkDirectory(pharmGKBDir); + logger.info("Parsing {} files and building the data models...", PHARMGKB_NAME); + + // Parse chemical file + Map chemicalsMap = parseChemicalFile(); + + // Parse clinical annotation files + parseClinicalAnnotationFiles(chemicalsMap); + + // Parse gene file + parseGeneFile(chemicalsMap); + + logger.info("Parsing {} files finished.", PHARMGKB_NAME); + + // Generation the pharmacogenomics JSON file + logger.info("Writing {} JSON file to {} ...", PHARMACOGENOMICS_DATA, serializer.getOutdir()); + int counter = 0; + for (Map.Entry entry : chemicalsMap.entrySet()) { + ((CellBaseFileSerializer) serializer).serialize(entry.getValue(), PHARMACOGENOMICS_DATA); + if (++counter % 1000 == 0) { + logger.info("\t\t {} chemicals/drugs written.", counter); + } + } + serializer.close(); + logger.info("Writing {} JSON file done!", PHARMACOGENOMICS_DATA); + } + + private Map parseChemicalFile() throws IOException { + Path chemicalsFile = pharmGKBDir.resolve(CHEMICALS_BASENAME).resolve(CHEMICALS_TSV_FILENAME); + Map chemicalsMap = new HashMap<>(); + try (BufferedReader br = FileUtils.newBufferedReader(chemicalsFile)) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + // 0 1 2 3 4 5 6 7 8 + // PharmGKB Accession ID Name Generic Names Trade Names Brand Mixtures Type Cross-references SMILES InChI + // 9 10 11 12 13 14 + // Dosing Guideline External Vocabulary Clinical Annotation Count Variant Annotation Count Pathway Count VIP Count + // 15 16 17 18 + // Dosing Guideline Sources Top Clinical Annotation Level Top FDA Label Testing Level Top Any Drug Label Testing Level + // 19 20 21 22 23 + // Label Has Dosing Info Has Rx Annotation RxNorm Identifiers ATC Identifiers PubChem Compound Identifiers + PharmaChemical pharmaChemical = new PharmaChemical() + .setId(fields[0]) + .setSource(PHARMGKB_NAME) + .setName(fields[1]) + .setSmiles(fields[7]) + .setInChI(fields[8]); + + // Generic Names + if (StringUtils.isNotEmpty(fields[2])) { + pharmaChemical.setGenericNames(stringFieldToList(fields[2])); + } + + // Trade Names + if (StringUtils.isNotEmpty(fields[3])) { + pharmaChemical.setTradeNames(stringFieldToList(fields[3])); + } + + // Brand Mixtures + if (StringUtils.isNotEmpty(fields[4])) { + pharmaChemical.setTradeMixtures(stringFieldToList(fields[4])); + } + + // Types + if (StringUtils.isNotEmpty(fields[5])) { + pharmaChemical.setTypes(Arrays.stream(fields[5].split(",")).map(String::trim).collect(Collectors.toList())); + } + + // We need to keep the name not the ID to map by drug name in the clinical annotation method + chemicalsMap.put(pharmaChemical.getName(), pharmaChemical); + } + } + logger.info("Number of Chemical items read {}", chemicalsMap.size()); + + return chemicalsMap; + } + + /** + * This method parses clinical_annotations.tsv, then it parses alleles and evidences to add them to the first one. + * @param chemicalsMap + * @throws IOException + */ + private void parseClinicalAnnotationFiles(Map chemicalsMap) throws IOException { + Map variantAnnotationMap = new HashMap<>(); + Map> drugToVariantAnnotationIdMap = new HashMap<>(); + + Map> variantMap = parseVariantFile(); + + // clinical_annotations.tsv + try (BufferedReader br = FileUtils.newBufferedReader(pharmGKBDir.resolve(CLINICAL_ANNOTATIONS_BASENAME) + .resolve(CLINICAL_ANNOTATIONS_TSV_FILENAME))) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + + // Sanity check + if (StringUtils.isEmpty(fields[0])) { + logger.warn("Clinical annotation ID is missing in clinical annotations line: {}", line); + continue; + } + + // 0 1 2 3 4 5 6 + // Clinical Annotation ID Variant/Haplotypes Gene Level of Evidence Level Override Level Modifiers Score + // 7 8 9 10 11 12 13 + // Phenotype Category PMID Count Evidence Count Drug(s) Phenotype(s) Latest History Date (YYYY-MM-DD) URL + // 14 + // Specialty Population + PharmaVariantAnnotation pharmaVariantAnnotation = new PharmaVariantAnnotation() + .setConfidence(fields[3]) + .setScore(fields[6]) + .setUrl(fields[13]) + .setPopulation(fields[14]); + + // Variant or haplotypes + if (StringUtils.isNotEmpty(fields[1])) { + if (isHaplotype(fields[1])) { + // Haplotype + pharmaVariantAnnotation.setHaplotypes(getHaplotypeList(fields[1])); + } else { + // Variant + pharmaVariantAnnotation.setVariantId(fields[1]); + } + } + + // Genes + if (StringUtils.isNotEmpty(fields[2])) { + pharmaVariantAnnotation.setGeneNames(Arrays.asList(fields[2].split(";"))); + } + + if (StringUtils.isNotEmpty(fields[7])) { + pharmaVariantAnnotation.setPhenotypeTypes(Arrays.asList(fields[7].split(";"))); + } + + if (StringUtils.isNotEmpty(fields[11])) { + pharmaVariantAnnotation.setPhenotypes(Arrays.asList(fields[11].split(";"))); + } + + Map attributes = new HashMap<>(); + attributes.put(PHARMGKB_ID_KEY, fields[0]); + attributes.put(PHARMGKB_LEVEL_OVERRIDE_KEY, fields[4]); + attributes.put(PHARMGKB_LEVEL_MODIFIERS_KEY, fields[5]); + attributes.put(PHARMGKB_LAST_UPDATE_DATE_KEY, fields[12]); + pharmaVariantAnnotation.setAttributes(attributes); + + // Add some fields from the variant map + if (variantMap.containsKey(pharmaVariantAnnotation.getVariantId())) { + pharmaVariantAnnotation.setLocation((String) variantMap.get(pharmaVariantAnnotation.getVariantId()).get(LOCATION_KEY)); + pharmaVariantAnnotation + .setChromosome((String) variantMap.get(pharmaVariantAnnotation.getVariantId()).get(CHROMOSOME_KEY)); + pharmaVariantAnnotation.setPosition((int) variantMap.get(pharmaVariantAnnotation.getVariantId()).get(POSITION_KEY)); + } else { + logger.warn("Variant {} from clinical annotation not found in the variant map, so chromosome and position are not set", + pharmaVariantAnnotation.getVariantId()); + } + + // Add the annotation to the annotationMap by annotation ID + variantAnnotationMap.put(fields[0], pharmaVariantAnnotation); + + // Process the drug names to update the drugToClinicalAnnotationId map + // This will be used at the end of the method to update the chemical map + if (StringUtils.isNotEmpty(fields[10])) { + // Drugs are separated by semicolon + String[] drugs = fields[10].split(";"); + for (String drug : drugs) { + if (!drugToVariantAnnotationIdMap.containsKey(drug)) { + // Add the drug to the map + drugToVariantAnnotationIdMap.put(drug, new ArrayList<>()); + } + // Add the clinical annotation ID to that drug + drugToVariantAnnotationIdMap.get(drug).add(fields[0]); + } + } + } + } + + // Update the clinical annotation map by parsing the clinical annotation evidences + parseClinicalAnnotationEvidenceFile(variantAnnotationMap); + + // Update the clinical annotation map by parsing the clinical annotation alleles + parseClinicalAnnotationAlleleFile(variantAnnotationMap); + + // Update chemicals map by adding the clinical annotation + for (Map.Entry> entry : drugToVariantAnnotationIdMap.entrySet()) { + if (chemicalsMap.containsKey(entry.getKey())) { + for (String variantAnnotationId : entry.getValue()) { + chemicalsMap.get(entry.getKey()).getVariants().add(variantAnnotationMap.get(variantAnnotationId)); + } + } else { + logger.warn("Drug '{}' not found in the chemicals map", entry.getKey()); + } + } + } + + private Map> parseVariantFile() throws IOException { + Map> variantMap = new HashMap<>(); + // Parse the variant file (i.e., variants.tsv) + Path varPath = pharmGKBDir.resolve(VARIANTS_BASENAME).resolve(VARIANTS_TSV_FILENAME); + try (BufferedReader br = FileUtils.newBufferedReader(varPath)) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + String variantName = fields[1]; + + // Sanity check + if (StringUtils.isEmpty(variantName)) { + logger.warn("Variant name is missing in variant line: {}", line); + continue; + } + + if (variantMap.containsKey(variantName)) { + logger.warn("Variant name is duplicated in variant line: {}", line); + continue; + } + + // 0 1 2 3 4 5 6 + // Variant ID Variant Name Gene IDs Gene Symbols Location Variant Annotation count Clinical Annotation count + // 7 8 9 10 + // Level 1/2 Clinical Annotation count Guideline Annotation count Label Annotation count Synonyms + String location = fields[4]; + if (StringUtils.isEmpty(location)) { + logger.warn("Location is missing for Variant name {}", variantName); + continue; + } + if (!location.startsWith("NC_")) { + logger.warn("Unknown location {}, it has to be a RefSeq ID", location); + continue; + } + Map attrMap = new HashMap<>(); + String[] splits = location.split("[_\\.:]"); + try { + int chrom = Integer.parseInt(splits[1]); + if (chrom >= 1 && chrom <= 22) { + attrMap.put(CHROMOSOME_KEY, String.valueOf(chrom)); + } else if (chrom == 23) { + attrMap.put(CHROMOSOME_KEY, "X"); + } else if (chrom == 24) { + attrMap.put(CHROMOSOME_KEY, "Y"); + } else if (chrom == 12920) { + attrMap.put(CHROMOSOME_KEY, "MT"); + } else { + logger.warn("Unknown chromosome {}", chrom); + continue; + } + } catch (NumberFormatException e) { + logger.warn("Error computing chromosome from location {}: {}", location, e.getMessage()); + continue; + } + try { + int position = Integer.parseInt(splits[3]); + attrMap.put(POSITION_KEY, position); + } catch (NumberFormatException e) { + logger.warn("Error computing chromosome position from location {}: {}", location, e.getMessage()); + continue; + } + attrMap.put(LOCATION_KEY, attrMap.get(CHROMOSOME_KEY) + ":" + attrMap.get(POSITION_KEY)); + + // Add it to the variant map + variantMap.put(variantName, attrMap); + } + } + logger.info("Number of variants = {}", variantMap.size()); + + return variantMap; + } + + private void parseClinicalAnnotationEvidenceFile(Map variantAnnotationMap) throws IOException { + // For CellBase, variant annotation correponds to the PharmGKB clinical annotation + // Processing clinical annotation evidences implies to process the variant annotation, guideline annotations, + // drug label annotations, phenotype annotations and functional analysis annotations + Map variantAssociationMap = new HashMap<>(); + Map guidelineAnnotationsMap = parseGuidelineAnnotationFiles(); + Map drugLabelAnnotationsMap = parseDrugLabelAnnotationFile(); + + // Parse study parameters and update the variant, phenotype and functional annotations with the parsed study parameters + parseVariantAnnotationFile(variantAssociationMap); + parsePhenotypeAnnotationFile(variantAssociationMap); + parseFunctionalAnnotationFile(variantAssociationMap); + parseStudyParameterFile(variantAssociationMap); + + // Parse the clinical annotation alleles file (i.e., clinical_ann_alleles.tsv) + Path evidencesPath = pharmGKBDir.resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANN_EVIDENCE_TSV_FILENAME); + try (BufferedReader br = FileUtils.newBufferedReader(evidencesPath)) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + String clinicalAnnotationId = fields[0]; + + // Sanity check + if (StringUtils.isEmpty(clinicalAnnotationId)) { + logger.warn("Clinical annotation ID is missing in clinical annotation evidence line: {}", line); + continue; + } + + // 0 1 2 3 4 5 6 + // Clinical Annotation ID Evidence ID Evidence Type Evidence URL PMID Summary Score + String evidenceId = fields[1]; + String evidenceType = fields[2]; + PharmaClinicalEvidence evidence = new PharmaClinicalEvidence() + .setType(evidenceType) + .setUrl(fields[3]) + .setPubmed(fields[4]) + .setSummary(fields[5]) + .setScore(fields[6]); + + switch (evidenceType) { + case VARIANT_ANNOTATION_EVIDENCE_TYPE: + case PHENOTYPE_ANNOTATION_EVIDENCE_TYPE: + case FUNCTIONAL_ANNOTATION_EVIDENCE_TYPE: { + if (variantAssociationMap.containsKey(evidenceId)) { + evidence.getVariantAssociations().add(variantAssociationMap.get(evidenceId)); + } else { + logger.warn("Evidence ID '{}' of type '{}' not found in the variant association map", evidenceId, evidenceType); + } + break; + } + case GUIDELINE_ANNOTATION_EVIDENCE_TYPE: { + if (guidelineAnnotationsMap.containsKey(evidenceId)) { + evidence.getGuidelineAnnotations().add(guidelineAnnotationsMap.get(evidenceId)); + } else { + logger.warn("Evidence ID '{}' of type '{}' not found in the variant annotations map", + evidenceId, evidenceType); + } + break; + } + case DRUG_LABEL_ANNOTATION_EVIDENCE_TYPE: { + if (drugLabelAnnotationsMap.containsKey(evidenceId)) { + evidence.getDrugLabelAnnotations().add(drugLabelAnnotationsMap.get(evidenceId)); + } else { + logger.warn("Evidence ID '{}' of type '{}' not found in the drug label annotations map", + evidenceId, evidenceType); + } + break; + } + default: { + logger.warn("Unknown evidence type '{}': this evidence is skipped. Valid evidence types are: {}", + evidenceType, + StringUtils.join( + Arrays.asList(VARIANT_ANNOTATION_EVIDENCE_TYPE, GUIDELINE_ANNOTATION_EVIDENCE_TYPE, + DRUG_LABEL_ANNOTATION_EVIDENCE_TYPE, FUNCTIONAL_ANNOTATION_EVIDENCE_TYPE, + PHENOTYPE_ANNOTATION_EVIDENCE_TYPE), ",")); + break; + } + } + + Map attributes = new HashMap<>(); + attributes.put(PHARMGKB_ID_KEY, fields[0]); + evidence.setAttributes(attributes); + + // Add evidence to clinical annotation + if (variantAnnotationMap.containsKey(clinicalAnnotationId)) { + variantAnnotationMap.get(clinicalAnnotationId).getEvidences().add(evidence); + } else { + logger.warn("Clinical annotation ID {} from clinical annotation evidence not found in clinical annotations", + clinicalAnnotationId); + } + } + } + } + + private void parseClinicalAnnotationAlleleFile(Map variantAnnotationMap) throws IOException { + // Parse the clinical annotation alleles file (i.e., clinical_ann_alleles.tsv) + Path allelesPath = pharmGKBDir.resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANN_ALLELES_TSV_FILENAME); + try (BufferedReader br = FileUtils.newBufferedReader(allelesPath)) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + // For CellBase, variant annotation is equivalent to PharmGKB clinical annotation + String variantAnnotationId = fields[0]; + + // Sanity check + if (StringUtils.isEmpty(variantAnnotationId)) { + logger.warn("Clinical annotation ID is missing in clinical annotation alleles line: {}", line); + continue; + } + + // 0 1 2 3 + // Clinical Annotation ID Genotype/Allele Annotation Text Allele Function + PharmaClinicalAllele clinicalAllele = new PharmaClinicalAllele() + .setAllele(fields[1]) + .setAnnotation(fields[2]) + .setDescription(fields[3]); + + Map attributes = new HashMap<>(); + attributes.put(PHARMGKB_ID_KEY, variantAnnotationId); + clinicalAllele.setAttributes(attributes); + + // Add allele to clinical annotation + if (variantAnnotationMap.containsKey(variantAnnotationId)) { + variantAnnotationMap.get(variantAnnotationId).getAlleles().add(clinicalAllele); + } else { + logger.warn("Clinical annotation ID {} from clinical annotation alleles file not found in the clinical annotations map", + variantAnnotationId); + } + } + } + } + + private void parseVariantAnnotationFile(Map variantAssociationMap) throws IOException { + // For CellBase, variant association corresponds to PharmGKB variant annotation + // Parse the variant annotation file (i.e., var_drug_ann.tsv) + Path varDrugPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(VARIANT_ANNOTATIONS_TSV_FILENAME); + int counter = 0; + try (BufferedReader br = FileUtils.newBufferedReader(varDrugPath)) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + String variantAnnotationId = fields[0]; + + // Sanity check + if (StringUtils.isEmpty(variantAnnotationId)) { + logger.warn("Variant annotation ID is missing in variant annotations line: {}", line); + continue; + } + + // 0 1 2 3 4 5 + // Variant Annotation ID Variant/Haplotypes Gene Drug(s) PMID Phenotype Category + // 6 7 8 9 10 + // Significance Notes Sentence Alleles Specialty Population + PharmaVariantAssociation variantAssociation = new PharmaVariantAssociation() + .setGeneName(fields[2]) + .setPubmed(fields[4]) + .setPhenotypeType(fields[5]) + .setSignificance(fields[6]) + .setDiscussion(fields[7]) + .setDescription(fields[8]) + .setAlleles(fields[9]) + .setPopulation(fields[10]); + + // Variant or haplotypes + if (StringUtils.isNotEmpty(fields[1])) { + if (isHaplotype(fields[1])) { + // Haplotype + variantAssociation.setHaplotypes(getHaplotypeList(fields[1])); + } else { + // Variant + variantAssociation.setVariantId(fields[1]); + } + } + + Map attributes = new HashMap<>(); + attributes.put(PHARMGKB_ID_KEY, fields[0]); + attributes.put(PHARMGKB_ASSOCIATION_TYPE_KEY, VARIANT_ANNOTATION_EVIDENCE_TYPE); + variantAssociation.setAttributes(attributes); + + if (StringUtils.isNotEmpty(fields[3])) { + variantAssociation.setDrugs(stringFieldToList(fields[3])); + } + + // Add the annotation to the variantAnnotationMap by variant and gene + variantAssociationMap.put(variantAnnotationId, variantAssociation); + counter++; + } + } + logger.info("Number of variant annotations = {}", counter); + } + + private Map parseGuidelineAnnotationFiles() throws IOException { + Map guidelineAnnotationMap = new HashMap<>(); + + ObjectMapper mapper = new ObjectMapper(); + ObjectReader objectReader = mapper.readerFor(PharmaGuidelineAnnotation.class); + + // Parse the guideline annotations JSON files + Path guidelinesPath = pharmGKBDir.resolve(GUIDELINE_ANNOTATIONS_BASENAME); + FileUtils.checkDirectory(guidelinesPath); + for (File file : Objects.requireNonNull(guidelinesPath.toFile().listFiles())) { + if (file.getName().endsWith("json")) { + PharmaGuidelineAnnotation guidelineAnnotation = objectReader.readValue(file); + if (guidelineAnnotation.getGuideline() != null + && StringUtils.isEmpty(guidelineAnnotation.getGuideline().getId())) { + logger.warn("Guideline ID is missing for guideline filename: {}", file.getName()); + continue; + } + // Add the guideline annotation to the map by guideline ID (= Evidence ID) + guidelineAnnotationMap.put(guidelineAnnotation.getGuideline().getId(), guidelineAnnotation); + } + } + logger.info("Number of guideline annotations = {}", guidelineAnnotationMap.size()); + + return guidelineAnnotationMap; + } + + private Map parseDrugLabelAnnotationFile() throws IOException { + Map drugLabelAnnotationMap = new HashMap<>(); + // Parse the drug labels annotations file (i.e., drugLabels.tsv) + Path drugLabelPath = pharmGKBDir.resolve(DRUG_LABELS_BASENAME).resolve(DRUG_LABELS_TSV_FILENAME); + try (BufferedReader br = FileUtils.newBufferedReader(drugLabelPath)) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + String drugLabelId = fields[0]; + + // Sanity check + if (StringUtils.isEmpty(drugLabelId)) { + logger.warn("PharmGKB ID is missing in drug label line: {}", line); + continue; + } + + // 0 1 2 3 4 5 6 7 + // PharmGKB ID Name Source Biomarker Flag Testing Level Has Prescribing Info Has Dosing Info Has Alternate Drug + // 8 9 10 11 12 13 + // Cancer Genome Prescribing Chemicals Genes Variants/Haplotypes Latest History Date (YYYY-MM-DD) + PharmaDrugLabelAnnotation labelAnnotation = new PharmaDrugLabelAnnotation() + .setName(fields[1]) + .setSource(fields[2]) + .setBiomarkerFlag(fields[3]) + .setTestingLevel(fields[4]) + .setPrescribingInfo(fields[5]) + .setDosingInfo(fields[6]) + .setAlternateDrug(fields[7]) + .setCancerGenome(fields[8]); + + Map attributes = new HashMap<>(); + attributes.put(PHARMGKB_ID_KEY, drugLabelId); + labelAnnotation.setAttributes(attributes); + + // Add the drug label annotation to the map by ParhmGKB (= Evidence ID) + drugLabelAnnotationMap.put(drugLabelId, labelAnnotation); + } + } + logger.info("Number of drug label annotations = {}", drugLabelAnnotationMap.size()); + + return drugLabelAnnotationMap; + } + + private void parsePhenotypeAnnotationFile(Map variantAssociationMap) throws IOException { + // Parse the variant annotation file (i.e., var_pheno_ann.tsv) + Path varDrugPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(PHENOTYPE_ANNOTATIONS_TSV_FILENAME); + int counter = 0; + try (BufferedReader br = FileUtils.newBufferedReader(varDrugPath)) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + String variantAnnotationId = fields[0]; + + // Sanity check + if (StringUtils.isEmpty(variantAnnotationId)) { + logger.warn("Variant annotation ID is missing in phenotype annotations line: {}", line); + continue; + } + + // 0 1 2 3 4 5 6 7 8 + // Variant Annotation ID Variant/Haplotypes Gene Drug(s) PMID Phenotype Category Significance Notes Sentence + // 9 10 ..... + // Alleles Specialty Population ..... + PharmaVariantAssociation variantAssociation = new PharmaVariantAssociation() + .setGeneName(fields[2]) + .setPubmed(fields[4]) + .setPhenotypeType(fields[5]) + .setSignificance(fields[6]) + .setDiscussion(fields[7]) + .setDescription(fields[8]) + .setAlleles(fields[9]) + .setPopulation(fields[10]); + + // Variant or haplotypes + if (StringUtils.isNotEmpty(fields[1])) { + if (isHaplotype(fields[1])) { + // Haplotype + variantAssociation.setHaplotypes(getHaplotypeList(fields[1])); + } else { + // Variant + variantAssociation.setVariantId(fields[1]); + } + } + + Map attributes = new HashMap<>(); + attributes.put(PHARMGKB_ID_KEY, variantAnnotationId); + attributes.put(PHARMGKB_ASSOCIATION_TYPE_KEY, PHENOTYPE_ANNOTATION_EVIDENCE_TYPE); + variantAssociation.setAttributes(attributes); + + if (StringUtils.isNotEmpty(fields[3])) { + variantAssociation.setDrugs(stringFieldToList(fields[3])); + } + + // Add the annotation to the variantAnnotationMap by variant and gene + variantAssociationMap.put(variantAnnotationId, variantAssociation); + counter++; + } + } + logger.info("Number of phenotype annotations = {}", counter); + } + + private void parseFunctionalAnnotationFile(Map variantAssociationMap) throws IOException { + // Parse the variant annotation file (i.e., var_fa_ann.tsv) + Path varDrugPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(FUNCTIONAL_ANNOTATIONS_TSV_FILENAME); + int counter = 0; + try (BufferedReader br = FileUtils.newBufferedReader(varDrugPath)) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + String variantAnnotationId = fields[0]; + + // Sanity check + if (StringUtils.isEmpty(variantAnnotationId)) { + logger.warn("Variant annotation ID is missing in variant annotations line: {}", line); + continue; + } + + // 0 1 2 3 4 5 + // Variant Annotation ID Variant/Haplotypes Gene Drug(s) PMID Phenotype Category + // 6 7 8 9 10 11 ..... + // Significance Notes Sentence Alleles Specialty Population Assay type ..... + PharmaVariantAssociation variantAssociation = new PharmaVariantAssociation() + .setGeneName(fields[2]) + .setPubmed(fields[4]) + .setPhenotypeType(fields[5]) + .setSignificance(fields[6]) + .setDiscussion(fields[7]) + .setDescription(fields[8]) + .setAlleles(fields[9]) + .setPopulation(fields[10]) + .setAssayType(fields[11]); + + // Variant or haplotypes + if (StringUtils.isNotEmpty(fields[1])) { + if (isHaplotype(fields[1])) { + // Haplotype + variantAssociation.setHaplotypes(getHaplotypeList(fields[1])); + } else { + // Variant + variantAssociation.setVariantId(fields[1]); + } + } + + Map attributes = new HashMap<>(); + attributes.put(PHARMGKB_ID_KEY, variantAnnotationId); + attributes.put(PHARMGKB_ASSOCIATION_TYPE_KEY, FUNCTIONAL_ANNOTATION_EVIDENCE_TYPE); + variantAssociation.setAttributes(attributes); + + if (StringUtils.isNotEmpty(fields[3])) { + variantAssociation.setDrugs(stringFieldToList(fields[3])); + } + + // Add the annotation to the variantAnnotationMap by variant and gene + variantAssociationMap.put(variantAnnotationId, variantAssociation); + counter++; + } + } + logger.info("Number of variant annotations = {}", counter); + } + + private void parseStudyParameterFile(Map variantAssociationMap) throws IOException { + Map> studyParametersMap = new HashMap<>(); + // Parse the study parameters file (i.e., study_parameters.tsv) + Path studyParamsPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(STUDY_PARAMETERS_TSV_FILENAME); + try (BufferedReader br = FileUtils.newBufferedReader(studyParamsPath)) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + String variantAnnotationId = fields[1]; + + // Sanity check + if (StringUtils.isEmpty(variantAnnotationId)) { + logger.warn("Variant annotation ID is missing in study parameters line: {}", line); + continue; + } + + // 0 1 2 3 4 5 + // Study Parameters ID Variant Annotation ID Study Type Study Cases Study Controls Characteristics + // 6 7 8 9 + // Characteristics Type Frequency In Cases Allele Of Frequency In Cases Frequency In Controls + // 10 11 12 13 14 15 + // Allele Of Frequency In Controls P Value Ratio Stat Type Ratio Stat Confidence Interval Start Confidence Interval Stop + // 16 + // Biogeographical Groups + PharmaStudyParameters studyParams = new PharmaStudyParameters() + .setStudyType(fields[2]) + .setStudyCases(fields[3]) + .setStudyControls(fields[4]) + .setCharacteristics(fields[5]) + .setCharacteristicsType(fields[6]) + .setFrequencyInCases(fields[7]) + .setAlleleOfFrequencyInCases(fields[8]) + .setFrequencyInControls(fields[9]) + .setAlleleOfFrequencyInControls(fields[10]) + .setpValue(fields[11]) + .setRatioStatType(fields[12]) + .setRatioStat(fields[13]) + .setConfidenceIntervalStart(fields[14]) + .setConfidenceIntervalStop(fields[15]) + .setBiogeographicalGroups(fields[16]); + + Map attributes = new HashMap<>(); + attributes.put(PHARMGKB_ID_KEY, variantAnnotationId); + studyParams.setAttributes(attributes); + + // Add the study parameters map + if (!studyParametersMap.containsKey(variantAnnotationId)) { + studyParametersMap.put(variantAnnotationId, new ArrayList<>()); + } + studyParametersMap.get(variantAnnotationId).add(studyParams); + } + } + logger.info("Number of study parameters lines = {}", studyParametersMap.size()); + + for (Map.Entry> entry : studyParametersMap.entrySet()) { + if (variantAssociationMap.containsKey(entry.getKey())) { + variantAssociationMap.get(entry.getKey()).setStudyParameters(entry.getValue()); + } else { + logger.warn("Study parameters with variant annotation ID {} not found in variant association map", entry.getKey()); + } + } + } + + private void parseGeneFile(Map chemicalsMap) throws IOException { + // To relate genes with chemicals we will take the relationships from: + // 1. From guidelines (from the members 'relatedGenes' and 'relatedChemicals') + // 2. From the file relationships.tsv (from the relationship Gene - Chemical) + + // Create the PharmGKB gene ID map by chemical name + Map> pgkbGeneIdMapByChemicalName = new HashMap<>(); + + // Create and populate guideline annotations map by PharmGKB gene ID + List guidelineAnnotations = new ArrayList<>(parseGuidelineAnnotationFiles().values()); + Map> guidelineAnnotationMapByPgkbGeneId = new HashMap<>(); + for (PharmaGuidelineAnnotation guidelineAnnotation : guidelineAnnotations) { + if (guidelineAnnotation.getGuideline() != null + && CollectionUtils.isNotEmpty(guidelineAnnotation.getGuideline().getRelatedGenes())) { + for (BasicObject relatedGene : guidelineAnnotation.getGuideline().getRelatedGenes()) { + if (StringUtils.isNotEmpty(relatedGene.getId())) { + String pgkbGeneId = relatedGene.getId(); + if (StringUtils.isNotEmpty(pgkbGeneId)) { + // Populate the guideline annotation map by PharmGKB gene ID + if (!guidelineAnnotationMapByPgkbGeneId.containsKey(pgkbGeneId)) { + guidelineAnnotationMapByPgkbGeneId.put(pgkbGeneId, new ArrayList<>()); + } + guidelineAnnotationMapByPgkbGeneId.get(pgkbGeneId).add(guidelineAnnotation); + + // Populate the PharmGKB gene ID map by chemical names + if (CollectionUtils.isNotEmpty(guidelineAnnotation.getGuideline().getRelatedChemicals())) { + for (BasicObject relatedChemical : guidelineAnnotation.getGuideline().getRelatedChemicals()) { + String chemicalName = relatedChemical.getName(); + if (StringUtils.isNotEmpty(chemicalName)) { + if (!pgkbGeneIdMapByChemicalName.containsKey(chemicalName)) { + pgkbGeneIdMapByChemicalName.put(chemicalName, new HashSet<>()); + } + pgkbGeneIdMapByChemicalName.get(chemicalName).add(pgkbGeneId); + } + } + } + } + } + } + } + } + + // Parse the genes file (i.e., genes.tsv) + Map geneAnnotationMapByPgkbGeneId = new HashMap<>(); + Path genesPath = pharmGKBDir.resolve(GENES_BASENAME).resolve(GENES_TSV_FILENAME); + try (BufferedReader br = FileUtils.newBufferedReader(genesPath)) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + String pgkbGeneId = fields[0]; + + // Sanity check + if (StringUtils.isEmpty(pgkbGeneId)) { + logger.warn("PharmGKB accession ID is missing in genes file line: {}", line); + continue; + } + // 0 1 2 3 4 5 6 7 8 + // PharmGKB Accession Id NCBI Gene ID HGNC ID Ensembl Id Name Symbol Alternate Names Alternate Symbols Is VIP + // 9 10 11 12 13 + // Has Variant Annotation Cross-references Has CPIC Dosing Guideline Chromosome Chromosomal Start - GRCh37 + // 14 15 16 + // Chromosomal Stop - GRCh37 Chromosomal Start - GRCh38 Chromosomal Stop - GRCh38 + PharmaGeneAnnotation geneAnnotation = new PharmaGeneAnnotation() + .setId(pgkbGeneId) + .setName(fields[4]); + + List xrefs = new ArrayList<>(); + if (StringUtils.isNotEmpty(fields[1])) { + xrefs.add(new Xref(fields[1], "NCBI", "NCBI gene ID")); + } + if (StringUtils.isNotEmpty(fields[2])) { + xrefs.add(new Xref(fields[2], "HGNC", "HGNC gene ID")); + } + if (StringUtils.isNotEmpty(fields[3])) { + xrefs.add(new Xref(fields[3], "Ensembl", "Ensembl gene ID")); + } + if (StringUtils.isNotEmpty(fields[5])) { + xrefs.add(new Xref(fields[5], "HGNC", "HGNC gene symbol")); + } + if (CollectionUtils.isNotEmpty(xrefs)) { + geneAnnotation.setXrefs(xrefs); + } + + if (StringUtils.isNotEmpty(fields[9])) { + geneAnnotation.setHasVariantAnnotation(fields[9].toLowerCase(Locale.ROOT).equals("yes")); + } + + // Set guidelines by getting them from the guideline annotations map + if (guidelineAnnotationMapByPgkbGeneId.containsKey(pgkbGeneId)) { + geneAnnotation.setGuidelineAnnotations(guidelineAnnotationMapByPgkbGeneId.get(pgkbGeneId)); + } + + Map attributes = new HashMap<>(); + attributes.put(PHARMGKB_IS_VIP_KEY, fields[8]); + geneAnnotation.setAttributes(attributes); + + // Add to the map + if (geneAnnotationMapByPgkbGeneId.containsKey(pgkbGeneId)) { + logger.warn("PharmGKB gene ID {} is duplicated in the PharmGKB file {}", pgkbGeneId, GENES_TSV_FILENAME); + } else { + geneAnnotationMapByPgkbGeneId.put(pgkbGeneId, geneAnnotation); + } + } + } + + // Parse the chemical-gene relationships and update the PharmGKB gene ID map byh chemical name + // In addtion, updata the gene annotation map with additional fields (e.g., evidences, pubmeds...) + parseChemicalGeneRelationships(pgkbGeneIdMapByChemicalName, geneAnnotationMapByPgkbGeneId); + + // Finally, update the chemical map with the gene annotation + for (Map.Entry entry : chemicalsMap.entrySet()) { + String chemicalName = entry.getKey(); + if (pgkbGeneIdMapByChemicalName.containsKey(chemicalName)) { + for (String pgkbGeneId : pgkbGeneIdMapByChemicalName.get(chemicalName)) { + if (geneAnnotationMapByPgkbGeneId.containsKey(pgkbGeneId)) { + entry.getValue().getGenes().add(geneAnnotationMapByPgkbGeneId.get(pgkbGeneId)); + } + } + } + } + + logger.info("Number of parsed genes = {}", geneAnnotationMapByPgkbGeneId.size()); + } + + private void parseChemicalGeneRelationships(Map> pgkbGeneIdMapByChemicalName, + Map geneAnnotationMapByPgkbGeneId) throws IOException { + int counter = 0; + // Parse the genes file (i.e., relationships.tsv) + Path relationshipsPath = pharmGKBDir.resolve(RELATIONSHIPS_BASENAME).resolve(RELATIONSHIPS_TSV_FILENAME); + try (BufferedReader br = FileUtils.newBufferedReader(relationshipsPath)) { + // Skip first line, i.e. the header line + String line = br.readLine(); + while ((line = br.readLine()) != null) { + String[] fields = line.split("\t", -1); + + // 0 1 2 3 4 5 6 7 8 0 10 + // Entity1_id Entity1_name Entity1_type Entity2_id Entity2_name Entity2_type Evidence Association PK PD PMIDs + String pgkbGeneId = fields[0]; + String entity1Type = fields[2]; + String chemicalName = fields[4]; + String entity2Type = fields[5]; + if (StringUtils.isNotEmpty(pgkbGeneId) && StringUtils.isNotEmpty(entity1Type) && StringUtils.isNotEmpty(chemicalName) + && StringUtils.isNotEmpty(entity2Type) && entity1Type.equals(GENE_ENTITY) && entity2Type.equals(CHEMICAL_ENTITY)) { + if (!pgkbGeneIdMapByChemicalName.containsKey(chemicalName)) { + pgkbGeneIdMapByChemicalName.put(chemicalName, new HashSet<>()); + } + pgkbGeneIdMapByChemicalName.get(chemicalName).add(pgkbGeneId); + + // Update gene annotation map + if (geneAnnotationMapByPgkbGeneId.containsKey(pgkbGeneId)) { + if (StringUtils.isNotEmpty(fields[6])) { + geneAnnotationMapByPgkbGeneId.get(pgkbGeneId).setEvidences(Arrays.asList(fields[6].split(",", -1))); + } + if (StringUtils.isNotEmpty(fields[7])) { + geneAnnotationMapByPgkbGeneId.get(pgkbGeneId).setConfidence(fields[7]); + } + if (StringUtils.isNotEmpty(fields[10])) { + geneAnnotationMapByPgkbGeneId.get(pgkbGeneId).setPubmed(Arrays.asList(fields[10].split(";", -1))); + } + } else { + logger.warn("PhamGKB gene ID {} found in the file {} but not in the file {}", pgkbGeneId, + RELATIONSHIPS_TSV_FILENAME, GENES_TSV_FILENAME); + } + counter++; + } + } + } + logger.info("Number of parsed {}-{} relationships = {}", GENE_ENTITY, CHEMICAL_ENTITY, counter); + } + + private List stringFieldToList(String field) { + if (field.startsWith("\"")) { + return Arrays + .stream(field.replace("\"\"\"", "\"").replace("\"\"", "\"").replace("\", \"", "\",\"").split("\",\"")) + .map(s -> s.replace("\"", "").trim()) + .collect(Collectors.toList()); + } else { + if (field.contains(", ")) { + return Arrays + .stream(field.replace(", ", ",").split(",")) + .map(String::trim) + .collect(Collectors.toList()); + } else { + return Collections.singletonList(field); + } + } + } + + private boolean isHaplotype(String value) { + return (!value.startsWith("rs") && value.contains("*")); + } + + private List getHaplotypeList(String value) { + return Arrays.stream(value.split(",")).map(s -> s.trim()).collect(Collectors.toList()); + } +} diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/db/MongoDBManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/db/MongoDBManager.java index 3c3b893308..d78c0446c8 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/db/MongoDBManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/db/MongoDBManager.java @@ -20,15 +20,14 @@ import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; import org.bson.Document; -import org.opencb.biodata.models.core.Gene; -import org.opencb.cellbase.core.api.GeneQuery; import org.opencb.cellbase.core.common.Species; import org.opencb.cellbase.core.config.CellBaseConfiguration; import org.opencb.cellbase.core.config.DatabaseCredentials; import org.opencb.cellbase.core.exception.CellBaseException; +import org.opencb.cellbase.core.models.DataRelease; import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.core.utils.SpeciesUtils; -import org.opencb.cellbase.lib.impl.core.GeneMongoDBAdaptor; +import org.opencb.cellbase.lib.impl.core.ReleaseMongoDBAdaptor; import org.opencb.commons.datastore.core.DataStoreServerAddress; import org.opencb.commons.datastore.mongodb.MongoDBConfiguration; import org.opencb.commons.datastore.mongodb.MongoDataStore; @@ -50,6 +49,8 @@ public class MongoDBManager { + public static final String DBNAME_SEPARATOR = "_"; + private MongoDataStoreManager mongoDataStoreManager; private CellBaseConfiguration cellBaseConfiguration; @@ -98,7 +99,7 @@ public MongoDataStore createMongoDBDatastore(String speciesStr, String assemblyS // cellbase_speciesId_assembly_cellbaseVersion // Example: // cellbase_hsapiens_grch37_v3 - String database = getDatabaseName(species.getId(), species.getAssembly()); + String database = getDatabaseName(species.getId(), species.getAssembly(), cellBaseConfiguration.getVersion()); logger.debug("Database for the species is '{}'", database); return createMongoDBDatastore(database); } catch (CellBaseException e) { @@ -161,7 +162,7 @@ public MongoDataStore createMongoDBDatastore(String database) { return mongoDatastore; } - public String getDatabaseName(String species, String assembly) { + public static String getDatabaseName(String species, String assembly, String version) { if (StringUtils.isEmpty(species) || StringUtils.isEmpty(assembly)) { throw new InvalidParameterException("Species and assembly are required"); } @@ -170,7 +171,18 @@ public String getDatabaseName(String species, String assembly) { .replaceAll("\\.", "") .replaceAll("-", "") .replaceAll("_", ""); - return "cellbase" + "_" + species.toLowerCase() + "_" + cleanAssembly.toLowerCase() + "_" + cellBaseConfiguration.getVersion(); + + // Process version from the configuration file, in order to suffix the database name + // - Production environment, e.g.: if version is "v5", the suffix added wil be "_v5" + // - Test environment, e.g.: if version is "v5.6" or "v5.6.0-SNAPSHOT", the suffix added will be "_v5_6" + String auxVersion = version.replace(".", DBNAME_SEPARATOR).replace("-", DBNAME_SEPARATOR); + String[] split = auxVersion.split(DBNAME_SEPARATOR); + String dbName = "cellbase" + DBNAME_SEPARATOR + species.toLowerCase() + DBNAME_SEPARATOR + cleanAssembly.toLowerCase() + + DBNAME_SEPARATOR + split[0]; + if (split.length > 1) { + dbName += (DBNAME_SEPARATOR + split[1]); + } + return dbName; } public Map getDatabaseStatus(String species, String assembly) { @@ -231,23 +243,19 @@ private String getDataResponseTime(String species, String assembly) { MongoDataStore mongoDatastore = createMongoDBDatastore(species, assembly); // TODO: check and get the default data release int dataRelease = 0; - GeneMongoDBAdaptor geneMongoDBAdaptor = new GeneMongoDBAdaptor(mongoDatastore); + ReleaseMongoDBAdaptor releaseMongoDBAdaptor = new ReleaseMongoDBAdaptor(mongoDatastore); +// GeneMongoDBAdaptor geneMongoDBAdaptor = new GeneMongoDBAdaptor(mongoDatastore); try { - GeneQuery geneQuery = new GeneQuery(); - geneQuery.setLimit(1); - CellBaseDataResult cellBaseDataResult = geneMongoDBAdaptor.query(geneQuery); - // Query must return one gene. Otherwise there's a problem - if (cellBaseDataResult.getNumResults() == 1) { - return cellBaseDataResult.getTime() + "ms"; + CellBaseDataResult releases = releaseMongoDBAdaptor.getAll(); + // Query must return at least one data release. Otherwise there's a problem + if (releases.getNumResults() >= 1) { + return releases.getTime() + "ms"; } else { return null; } } catch (MongoTimeoutException e) { e.printStackTrace(); return null; - } catch (CellBaseException e) { - e.printStackTrace(); - return null; } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java index e178a3b4af..a4ade6603e 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java @@ -28,8 +28,8 @@ import org.opencb.cellbase.core.config.CellBaseConfiguration; import org.opencb.cellbase.core.config.SpeciesConfiguration; import org.opencb.cellbase.core.exception.CellBaseException; -import org.opencb.cellbase.lib.EtlCommons; import org.opencb.cellbase.core.utils.SpeciesUtils; +import org.opencb.cellbase.lib.EtlCommons; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java index 1ddba56aa9..17022cae4b 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java @@ -93,4 +93,9 @@ public List downloadPubMed() throws IOException, CellBaseException PubMedDownloadManager manager = new PubMedDownloadManager(species, assembly, outputDirectory, configuration); return manager.download(); } + + public List downloadPharmKGB() throws IOException, CellBaseException, InterruptedException { + PharmGKBDownloadManager manager = new PharmGKBDownloadManager(species, assembly, outputDirectory, configuration); + return manager.download(); + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PharmGKBDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PharmGKBDownloadManager.java new file mode 100644 index 0000000000..274f6c62a7 --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PharmGKBDownloadManager.java @@ -0,0 +1,88 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.download; + +import org.opencb.cellbase.core.config.CellBaseConfiguration; +import org.opencb.cellbase.core.config.DownloadProperties; +import org.opencb.cellbase.core.exception.CellBaseException; +import org.opencb.commons.exec.Command; +import org.opencb.commons.utils.FileUtils; + +import java.io.IOException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.opencb.cellbase.lib.EtlCommons.*; + +public class PharmGKBDownloadManager extends AbstractDownloadManager { + + public PharmGKBDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration) + throws IOException, CellBaseException { + super(species, assembly, targetDirectory, configuration); + } + + @Override + public List download() throws IOException, InterruptedException { + logger.info("Downloading PharmGKB files..."); + DownloadProperties.URLProperties pharmGKB = configuration.getDownload().getPharmGKB(); + Path pharmgkbDownloadFolder = downloadFolder.resolve(PHARMACOGENOMICS_DATA).resolve(PHARMGKB_DATA); + Files.createDirectories(pharmgkbDownloadFolder); + + List urls = new ArrayList<>(); + List downloadFiles = new ArrayList<>(); + for (String url : pharmGKB.getFiles()) { + urls.add(url); + + Path downloadedFileName = Paths.get(new URL(url).getPath()).getFileName(); + Path downloadedFilePath = pharmgkbDownloadFolder.resolve(downloadedFileName); + logger.info("Downloading file {} to {}", url, downloadedFilePath); + DownloadFile downloadFile = downloadFile(url, downloadedFilePath.toString()); + downloadFiles.add(downloadFile); + + // Unzip downloaded file + unzip(downloadedFilePath.getParent(), downloadedFileName.toString(), Collections.emptyList(), + pharmgkbDownloadFolder.resolve(downloadedFileName.toString().split("\\.")[0])); + } + + // Save versions + saveVersionData(PHARMACOGENOMICS_DATA, PHARMGKB_NAME, pharmGKB.getVersion(), getTimeStamp(), urls, + pharmgkbDownloadFolder.resolve(PHARMGKB_VERSION_FILENAME)); + + return downloadFiles; + } + + private void unzip(Path inPath, String zipFilename, List outFilenames, Path outPath) throws IOException { + // Check zip file exists + FileUtils.checkFile(inPath.resolve(zipFilename)); + + // Unzip files if output dir does NOT exist + if (!outPath.toFile().exists()) { + logger.info("Unzipping {} into {}", zipFilename, outPath); + Command cmd = new Command("unzip -d " + outPath + " " + inPath.resolve(zipFilename)); + cmd.run(); + // Check if expected files exist + for (String outFilename : outFilenames) { + FileUtils.checkFile(outPath.resolve(outFilename)); + } + } + } +} diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/GenomeMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/GenomeMongoDBAdaptor.java index e231e6df95..32c9aba037 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/GenomeMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/GenomeMongoDBAdaptor.java @@ -21,14 +21,12 @@ import com.mongodb.client.model.Projections; import org.bson.Document; import org.bson.conversions.Bson; -import org.opencb.biodata.models.core.Chromosome; -import org.opencb.biodata.models.core.GenomeSequenceFeature; -import org.opencb.biodata.models.core.GenomicScoreRegion; -import org.opencb.biodata.models.core.Region; +import org.opencb.biodata.models.core.*; import org.opencb.biodata.models.variant.avro.Cytoband; import org.opencb.biodata.models.variant.avro.Score; import org.opencb.cellbase.core.ParamConstants; import org.opencb.cellbase.core.api.GenomeQuery; +import org.opencb.cellbase.core.api.query.CellBaseQueryOptions; import org.opencb.cellbase.core.api.query.ProjectionQueryOptions; import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.result.CellBaseDataResult; @@ -46,6 +44,9 @@ import java.util.*; +import static org.opencb.cellbase.lib.MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE; +import static org.opencb.cellbase.lib.MongoDBCollectionConfiguration.GENOME_SEQUENCE_CHUNK_SIZE; + /** * Created by imedina on 07/12/15. */ @@ -188,9 +189,9 @@ public CellBaseDataResult getSequence(Region region, Quer // The first chunk does contain 1 nt less than the rest and is 0-indexed - The rest of chunks contain // GENOME_SEQUENCE_CHUNK_SIZE nts and are 1 indexed (position 0 contains the GENOME_SEQUENCE_CHUNK_SIZE) nt - int startIndex = (region.getStart() < MongoDBCollectionConfiguration.GENOME_SEQUENCE_CHUNK_SIZE) - ? (region.getStart() - 1) % MongoDBCollectionConfiguration.GENOME_SEQUENCE_CHUNK_SIZE - : region.getStart() % MongoDBCollectionConfiguration.GENOME_SEQUENCE_CHUNK_SIZE; + int startIndex = (region.getStart() < GENOME_SEQUENCE_CHUNK_SIZE) + ? (region.getStart() - 1) % GENOME_SEQUENCE_CHUNK_SIZE + : region.getStart() % GENOME_SEQUENCE_CHUNK_SIZE; int length = region.getEnd() - region.getStart() + 1; // If end is out of the right boundary, there will be no chunks containing the right boundary. This means the // length of stringBuilder will be < than "end", since the for above will have just appended the chunks @@ -416,6 +417,23 @@ public List> getAllScoresByRegionList(List reg return allScoresByRegionList; } + public CellBaseDataResult getGenomeSequenceRawData(List chunkIds, int dataRelease) + throws CellBaseException { + MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease); + + CellBaseQueryOptions queryOptions = new CellBaseQueryOptions(); + queryOptions.setExcludes(Arrays.asList("_id", "_chunkIds")); + Bson projection = getProjection(queryOptions); + + List orBsonList = new ArrayList<>(); + for (String chunkId: chunkIds) { + orBsonList.add(Filters.eq("chunkId", chunkId)); + } + Bson query = Filters.or(orBsonList); + + return new CellBaseDataResult<>(mongoDBCollection.find(query, projection, GenomeSequenceChunk.class, new QueryOptions())); + } + @Deprecated public CellBaseDataResult nativeGet(Query query, QueryOptions options, int dataRelease) throws CellBaseException { Bson bson = parseQuery(query); @@ -429,7 +447,7 @@ private Bson parseQuery(Query query) { List andBsonList = new ArrayList<>(); createRegionQuery(query, ParamConstants.QueryParams.REGION.key(), - MongoDBCollectionConfiguration.GENOME_SEQUENCE_CHUNK_SIZE, andBsonList); + GENOME_SEQUENCE_CHUNK_SIZE, andBsonList); if (andBsonList.size() > 0) { return Filters.and(andBsonList); @@ -550,4 +568,33 @@ public Bson parseQuery(GenomeQuery query) { return new Document(); } } + + public Collection getConservationScoreChunkIds(Region region) { + Set chunkIdSet = new HashSet<>(); + chunkIdSet.add(getChunkIdPrefix(region.getChromosome(), region.getStart(), CONSERVATION_CHUNK_SIZE)); + chunkIdSet.add(getChunkIdPrefix(region.getChromosome(), region.getEnd(), CONSERVATION_CHUNK_SIZE)); + return new ArrayList<>(chunkIdSet); + } + + public CellBaseDataResult getConservationScoreRegion(List chunkIds, CellBaseQueryOptions options, + int dataRelease) + throws CellBaseException { + MongoDBCollection mongoDBCollection = getCollectionByRelease(conservationMongoDBCollectionByRelease, dataRelease); + + Bson projection = getProjection(options); + List orBsonList = new ArrayList<>(); + for (String chunkId : chunkIds) { + orBsonList.add(Filters.eq("_chunkIds", chunkId)); + } + Bson bson = Filters.or(orBsonList); + + return new CellBaseDataResult<>(mongoDBCollection.find(bson, projection, GenomicScoreRegion.class, new QueryOptions())); + } + + public Collection getGenomeSequenceChunkId(Region region) { + Set chunkIdSet = new HashSet<>(); + chunkIdSet.add(getChunkIdPrefix(region.getChromosome(), region.getStart(), GENOME_SEQUENCE_CHUNK_SIZE)); + chunkIdSet.add(getChunkIdPrefix(region.getChromosome(), region.getEnd(), GENOME_SEQUENCE_CHUNK_SIZE)); + return new ArrayList<>(chunkIdSet); + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MissenseVariationFunctionalScoreMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MissenseVariationFunctionalScoreMongoDBAdaptor.java index 06b254982b..bf649886af 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MissenseVariationFunctionalScoreMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MissenseVariationFunctionalScoreMongoDBAdaptor.java @@ -20,6 +20,7 @@ import org.bson.conversions.Bson; import org.opencb.biodata.models.core.MissenseVariantFunctionalScore; import org.opencb.biodata.models.core.TranscriptMissenseVariantFunctionalScore; +import org.opencb.cellbase.core.api.query.CellBaseQueryOptions; import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.lib.variant.VariantAnnotationUtils; @@ -93,4 +94,24 @@ public CellBaseDataResult getScores(St return new CellBaseDataResult<>(id, -1, new ArrayList<>(), 0, null, 0); } + public CellBaseDataResult getScores(String chromosome, List positions, + CellBaseQueryOptions options, int dataRelease) + throws CellBaseException { + MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease); + + Bson projection = getProjection(options); + + List orBsonList = new ArrayList<>(); + for (int position : positions) { + orBsonList.add(Filters.eq("position", position)); + } + + List andBsonList = new ArrayList<>(); + andBsonList.add(Filters.eq("chromosome", chromosome)); + andBsonList.add(Filters.or(orBsonList)); + Bson query = Filters.and(andBsonList); + + return new CellBaseDataResult<>(mongoDBCollection.find(query, projection, MissenseVariantFunctionalScore.class, + new QueryOptions())); + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java index 4245bb9da2..e120e0ae51 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java @@ -91,6 +91,10 @@ public PublicationMongoDBAdaptor getPublicationMongoDBAdaptor() { return new PublicationMongoDBAdaptor(mongoDatastore); } + public PharmacogenomicsMongoDBAdaptor getPharmacogenomicsMongoDBAdaptor() { + return new PharmacogenomicsMongoDBAdaptor(mongoDatastore); + } + @Override public String toString() { final StringBuilder sb = new StringBuilder("MongoDBAdaptorFactory{"); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PharmacogenomicsMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PharmacogenomicsMongoDBAdaptor.java new file mode 100644 index 0000000000..aabf539eac --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PharmacogenomicsMongoDBAdaptor.java @@ -0,0 +1,136 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.impl.core; + +import com.mongodb.client.model.Filters; +import org.bson.Document; +import org.bson.conversions.Bson; +import org.opencb.biodata.models.pharma.PharmaChemical; +import org.opencb.cellbase.core.api.PharmaChemicalQuery; +import org.opencb.cellbase.core.api.query.ProjectionQueryOptions; +import org.opencb.cellbase.core.exception.CellBaseException; +import org.opencb.cellbase.core.result.CellBaseDataResult; +import org.opencb.cellbase.lib.iterator.CellBaseIterator; +import org.opencb.cellbase.lib.iterator.CellBaseMongoDBIterator; +import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.commons.datastore.core.QueryParam; +import org.opencb.commons.datastore.mongodb.GenericDocumentComplexConverter; +import org.opencb.commons.datastore.mongodb.MongoDBCollection; +import org.opencb.commons.datastore.mongodb.MongoDBIterator; +import org.opencb.commons.datastore.mongodb.MongoDataStore; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Created by jtarraga on 9/4/23. + */ +public class PharmacogenomicsMongoDBAdaptor extends CellBaseDBAdaptor + implements CellBaseCoreDBAdaptor { + + private static final GenericDocumentComplexConverter CONVERTER; + + static { + CONVERTER = new GenericDocumentComplexConverter<>(PharmaChemical.class); + } + + public PharmacogenomicsMongoDBAdaptor(MongoDataStore mongoDataStore) { + super(mongoDataStore); + + this.init(); + } + + private void init() { + mongoDBCollectionByRelease = buildCollectionByReleaseMap("pharmacogenomics"); + + logger.debug("PharmacogenomicsMongoDBAdaptor initialised"); + } + + @Override + public CellBaseDataResult aggregationStats(PharmaChemicalQuery query) { + logger.error("Not implemented yet"); + return null; + } + + @Override + public List> info(List ids, ProjectionQueryOptions queryOptions, int dataRelease, + String token) throws CellBaseException { + List> results = new ArrayList<>(); + Bson projection = getProjection(queryOptions); + for (String id : ids) { + List orBsonList = new ArrayList<>(ids.size()); + orBsonList.add(Filters.eq("id", id)); + orBsonList.add(Filters.eq("name", id)); + Bson query = Filters.or(orBsonList); + MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease); + results.add(new CellBaseDataResult<>(mongoDBCollection.find(query, projection, CONVERTER, new QueryOptions()))); + } + return results; + } + + @Override + public CellBaseIterator iterator(PharmaChemicalQuery query) throws CellBaseException { + Bson bson = parseQuery(query); + QueryOptions queryOptions = query.toQueryOptions(); + Bson projection = getProjection(query); + MongoDBIterator iterator; + MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, query.getDataRelease()); + iterator = mongoDBCollection.iterator(null, bson, projection, CONVERTER, queryOptions); + return new CellBaseMongoDBIterator<>(iterator); + } + + @Override + public CellBaseDataResult distinct(PharmaChemicalQuery query) throws CellBaseException { + Bson bsonDocument = parseQuery(query); + MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, query.getDataRelease()); + return new CellBaseDataResult<>(mongoDBCollection.distinct(query.getFacet(), bsonDocument, String.class)); + } + + @Override + public CellBaseDataResult groupBy(PharmaChemicalQuery query) throws CellBaseException { + throw new CellBaseException("Not implemented yet"); + } + + public Bson parseQuery(PharmaChemicalQuery pharmaQuery) { + List andBsonList = new ArrayList<>(); + boolean visited = false; + try { + for (Map.Entry entry : pharmaQuery.toObjectMap().entrySet()) { + String dotNotationName = entry.getKey(); + Object value = entry.getValue(); + switch (dotNotationName) { + case "dataRelease": + case "token": + // do nothing + break; + default: + createAndOrQuery(value, dotNotationName, QueryParam.Type.STRING, andBsonList); + break; + } + } + } catch (IllegalAccessException e) { + e.printStackTrace(); + } + logger.debug("pharmacogenomics parsed query: " + andBsonList); + if (andBsonList.size() > 0) { + return Filters.and(andBsonList); + } else { + return new Document(); + } + } +} diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptor.java index 1460dfe5e1..c5d2bd599f 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptor.java @@ -28,6 +28,7 @@ import org.opencb.biodata.models.variant.avro.Score; import org.opencb.cellbase.core.api.ProteinQuery; import org.opencb.cellbase.core.api.TranscriptQuery; +import org.opencb.cellbase.core.api.query.CellBaseQueryOptions; import org.opencb.cellbase.core.api.query.ProjectionQueryOptions; import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.result.CellBaseDataResult; @@ -486,5 +487,28 @@ private ProteinVariantAnnotation processProteinVariantData(ProteinVariantAnnotat return proteinVariantAnnotation; } + public CellBaseDataResult getProteinSubstitutionRawData(List transcriptIds, CellBaseQueryOptions options, + int dataRelease) throws CellBaseException { + MongoDBCollection mongoDBCollection = getCollectionByRelease(proteinSubstitutionMongoDBCollectionByRelease, dataRelease); + // Be sure to exclude the internal field "_id" + Bson projection; + if (options != null) { + options.addExcludes("_id"); + projection = getProjection(options); + options.getExcludes().remove("_id"); + } else { + CellBaseQueryOptions queryOptions = new CellBaseQueryOptions(); + queryOptions.setExcludes(Collections.singletonList("_id")); + projection = getProjection(queryOptions); + } + + List orBsonList = new ArrayList<>(); + for (String transcriptId : transcriptIds) { + orBsonList.add(Filters.eq("transcriptId", transcriptId)); + } + Bson bson = Filters.or(orBsonList); + + return new CellBaseDataResult<>(mongoDBCollection.find(bson, projection, Object.class, new QueryOptions())); + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java index e400badb20..599822f789 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptor.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.StringUtils; import org.bson.Document; import org.bson.conversions.Bson; +import org.opencb.biodata.models.core.GenomicScoreRegion; import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.Score; @@ -28,6 +29,7 @@ import org.opencb.biodata.models.variant.avro.VariantType; import org.opencb.cellbase.core.ParamConstants; import org.opencb.cellbase.core.api.VariantQuery; +import org.opencb.cellbase.core.api.query.CellBaseQueryOptions; import org.opencb.cellbase.core.api.query.LogicalList; import org.opencb.cellbase.core.api.query.ProjectionQueryOptions; import org.opencb.cellbase.core.exception.CellBaseException; @@ -48,6 +50,8 @@ import java.util.*; import java.util.function.Consumer; +import static org.opencb.cellbase.lib.MongoDBCollectionConfiguration.VARIATION_FUNCTIONAL_SCORE_CHUNK_SIZE; + /** * Created by imedina on 26/11/15. */ @@ -545,7 +549,7 @@ public CellBaseDataResult getFunctionalScoreVariant(Variant variant, Quer String reference = variant.getReference(); String alternate = variant.getAlternate(); - String chunkId = getChunkIdPrefix(chromosome, position, MongoDBCollectionConfiguration.VARIATION_FUNCTIONAL_SCORE_CHUNK_SIZE); + String chunkId = getChunkIdPrefix(chromosome, position, VARIATION_FUNCTIONAL_SCORE_CHUNK_SIZE); // QueryBuilder builder = QueryBuilder.start("_chunkIds").is(chunkId); Document query = new Document("_chunkIds", chunkId); @@ -749,6 +753,28 @@ public List> info(List ids, ProjectionQueryO } return results; } + + public List getFunctionalScoreChunkIds(Region region) { + Set chunkIdSet = new HashSet<>(); + chunkIdSet.add(getChunkIdPrefix(region.getChromosome(), region.getStart(), VARIATION_FUNCTIONAL_SCORE_CHUNK_SIZE)); + chunkIdSet.add(getChunkIdPrefix(region.getChromosome(), region.getEnd(), VARIATION_FUNCTIONAL_SCORE_CHUNK_SIZE)); + return new ArrayList<>(chunkIdSet); + } + + public CellBaseDataResult getFunctionalScoreRegion(List chunkIds, CellBaseQueryOptions options, + int dataRelease) + throws CellBaseException { + MongoDBCollection mongoDBCollection = getCollectionByRelease(caddDBCollectionByRelease, dataRelease); + + Bson projection = getProjection(options); + List orBsonList = new ArrayList<>(); + for (String chunkId : chunkIds) { + orBsonList.add(Filters.eq("_chunkIds", chunkId)); + } + Bson bson = Filters.or(orBsonList); + + return new CellBaseDataResult<>(mongoDBCollection.find(bson, projection, GenomicScoreRegion.class, new QueryOptions())); + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/LoadRunner.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/LoadRunner.java index 0fbe3f4f5d..3a6605c4f5 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/LoadRunner.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/LoadRunner.java @@ -195,6 +195,8 @@ private int readInputJsonFile(Path inputFile) { logger.info("{} records read from {}", inputFileRecords, inputFile.toString()); } } + br.close(); + // Last batch if (!batch.isEmpty()) { blockingQueue.put(batch); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/MongoDBCellBaseLoader.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/MongoDBCellBaseLoader.java index e673843aba..8ab745feab 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/MongoDBCellBaseLoader.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/MongoDBCellBaseLoader.java @@ -61,6 +61,7 @@ public class MongoDBCellBaseLoader extends CellBaseLoader { private static final String GENOMIC_FEATURES = "genomicFeatures"; private static final String XREFS = "xrefs"; private static final String TRAIT_ASSOCIATION = "traitAssociation"; + private static final String GWAS = "gwas"; private static final String SOMATIC_INFORMATION = "somaticInformation"; private static final String PRIMARY_SITE = "primarySite"; private static final String SITE_SUBTYPE = "siteSubtype"; @@ -314,6 +315,8 @@ private List getTraitFromClinicalVariants(Document document) throws Json } } } + } else if (document.containsKey(GWAS)) { + logger.info("Clinical variant {} contains GWAS info but not association traits", document.get("id")); } else { ObjectMapper jsonObjectMapper = new ObjectMapper(); jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); @@ -359,6 +362,8 @@ private List getFeatureXrefsFromClinicalVariants(Document document) thro List evidenceEntryList = (List) document.get(TRAIT_ASSOCIATION); getFeatureXrefsFromClinicalObject(evidenceEntryList, values); getFeatureXrefsFromConsequenceTypes((List) document.get("consequenceTypes"), values); + } else if (document.containsKey(GWAS)) { + logger.info("Clinical variant {} contains GWAS info but not association traits", document.get("id")); } else { ObjectMapper jsonObjectMapper = new ObjectMapper(); jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java index f89120edb6..ba6e90e150 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java @@ -44,6 +44,7 @@ public class CellBaseManagerFactory { private Map ontologyManagers; private FileManager fileManager; private PublicationManager publicationManager; + private Map pharmacogenomicsManagers; private Map dataReleaseManagers; @@ -65,6 +66,7 @@ public CellBaseManagerFactory(CellBaseConfiguration configuration) { tfManagers = new HashMap<>(); ontologyManagers = new HashMap<>(); dataReleaseManagers = new HashMap<>(); + pharmacogenomicsManagers = new HashMap<>(); } private String getMultiKey(String species, String assembly) { @@ -353,4 +355,23 @@ public PublicationManager getPublicationManager() throws CellBaseException { } return publicationManager; } + + public PharmacogenomicsManager getPharmacogenomicsManager(String species) throws CellBaseException { + if (species == null) { + throw new CellBaseException("Species is required."); + } + SpeciesConfiguration.Assembly assembly = SpeciesUtils.getDefaultAssembly(configuration, species); + return getPharmacogenomicsManager(species, assembly.getName()); + } + + public PharmacogenomicsManager getPharmacogenomicsManager(String species, String assembly) throws CellBaseException { + String multiKey = getMultiKey(species, assembly); + if (!pharmacogenomicsManagers.containsKey(multiKey)) { + if (!validateSpeciesAssembly(species, assembly)) { + throw new CellBaseException("Invalid species " + species + " or assembly " + assembly); + } + pharmacogenomicsManagers.put(multiKey, new PharmacogenomicsManager(species, assembly, configuration)); + } + return pharmacogenomicsManagers.get(multiKey); + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ClinicalManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ClinicalManager.java index 404f272944..c9e484b33b 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ClinicalManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ClinicalManager.java @@ -72,7 +72,7 @@ public CellBaseDataResult search(ClinicalVariantQuery query) throws Que Set validSources = tokenManager.getValidSources(query.getToken(), DataAccessTokenUtils.UNLICENSED_CLINICAL_DATA); // Check if is necessary to use the token licensed variant iterator - if (validSources.size() != DataAccessTokenUtils.NUM_CLINICAL_SOURCES) { + if (DataAccessTokenUtils.needFiltering(validSources, DataAccessTokenUtils.LICENSED_CLINICAL_DATA)) { return DataAccessTokenUtils.filterDataSources(results, validSources); } else { return results; @@ -97,7 +97,7 @@ public List> info(List ids, CellBaseQueryOpt Set validSources = tokenManager.getValidSources(token, DataAccessTokenUtils.UNLICENSED_CLINICAL_DATA); // Check if is necessary to use the token licensed variant iterator - if (validSources.size() != DataAccessTokenUtils.NUM_CLINICAL_SOURCES) { + if (DataAccessTokenUtils.needFiltering(validSources, DataAccessTokenUtils.LICENSED_CLINICAL_DATA)) { return DataAccessTokenUtils.filterDataSources(results, validSources); } else { return results; @@ -109,7 +109,7 @@ public CellBaseIterator iterator(ClinicalVariantQuery query) throws Cel Set validSources = tokenManager.getValidSources(query.getToken(), DataAccessTokenUtils.UNLICENSED_CLINICAL_DATA); // Check if is necessary to use the token licensed variant iterator - if (validSources.size() != DataAccessTokenUtils.NUM_CLINICAL_SOURCES) { + if (DataAccessTokenUtils.needFiltering(validSources, DataAccessTokenUtils.LICENSED_CLINICAL_DATA)) { return new TokenFilteredVariantIterator(getDBAdaptor().iterator(query), validSources); } else { return getDBAdaptor().iterator(query); @@ -133,7 +133,7 @@ public CellBaseDataResult search(Query query, QueryOptions queryOptions } // Check if is necessary to use the token licensed variant iterator - if (validSources.size() != DataAccessTokenUtils.NUM_CLINICAL_SOURCES) { + if (DataAccessTokenUtils.needFiltering(validSources, DataAccessTokenUtils.LICENSED_CLINICAL_DATA)) { return DataAccessTokenUtils.filterDataSources(result, validSources); } else { return result; @@ -192,7 +192,7 @@ public List> getByVariant(List variants, Li } // Check if is necessary to use the token licensed variant iterator - if (validSources.size() != DataAccessTokenUtils.NUM_CLINICAL_SOURCES) { + if (DataAccessTokenUtils.needFiltering(validSources, DataAccessTokenUtils.LICENSED_CLINICAL_DATA)) { return DataAccessTokenUtils.filterDataSources(results, validSources); } else { return results; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java index 9a81a58b19..d7c924afa1 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java @@ -107,7 +107,7 @@ public DataRelease get(int release) throws CellBaseException { } } } - throw new CellBaseException("Data release '" + release + "' does not exist."); + throw new CellBaseException("Data release '" + release + "' does not exist for species = " + species + ", assembly = " + assembly); } public DataRelease getDefault(String cellBaseVersion) throws CellBaseException { @@ -119,7 +119,8 @@ public DataRelease getDefault(String cellBaseVersion) throws CellBaseException { } } } - throw new CellBaseException("No data release found for CellBase " + cellBaseVersion); + throw new CellBaseException("No data release found for CellBase " + cellBaseVersion + " (species = " + species + ", assembly = " + + assembly + ")"); } public DataRelease update(int release, List versions) throws CellBaseException { @@ -172,7 +173,7 @@ public DataRelease update(int release, String collection, String data, List dr.getRelease()).collect(Collectors.toList()), ",") + ". And use 0 to" - + " use the default data release."); + throw new CellBaseException("Invalid data release " + outRelease + " for species = " + species + ", assembly = " + assembly + + ". Valid data releases are: " + StringUtils.join(dataReleases.stream().map(dr -> dr.getRelease()) + .collect(Collectors.toList()), ",")); } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/GenomeManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/GenomeManager.java index 38e9d0b19b..0ea9ab8351 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/GenomeManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/GenomeManager.java @@ -17,14 +17,12 @@ package org.opencb.cellbase.lib.managers; import com.google.common.base.Splitter; -import org.opencb.biodata.models.core.Chromosome; -import org.opencb.biodata.models.core.GenomeSequenceFeature; -import org.opencb.biodata.models.core.GenomicScoreRegion; -import org.opencb.biodata.models.core.Region; +import org.opencb.biodata.models.core.*; import org.opencb.biodata.models.variant.avro.Cytoband; import org.opencb.biodata.models.variant.avro.Score; import org.opencb.cellbase.core.ParamConstants; import org.opencb.cellbase.core.api.GenomeQuery; +import org.opencb.cellbase.core.api.query.CellBaseQueryOptions; import org.opencb.cellbase.core.config.CellBaseConfiguration; import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.result.CellBaseDataResult; @@ -34,7 +32,9 @@ import org.opencb.commons.datastore.core.QueryOptions; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; public class GenomeManager extends AbstractManager implements AggregationApi { @@ -139,6 +139,16 @@ public List>> getConservation(Query return queryResultList; } + public CellBaseDataResult getConservationScoreRegion(List regions, CellBaseQueryOptions options, + int dataRelease) throws CellBaseException { + Set chunkIdSet = new HashSet<>(); + for (Region region : regions) { + chunkIdSet.addAll(genomeDBAdaptor.getConservationScoreChunkIds(region)); + } + + return genomeDBAdaptor.getConservationScoreRegion(new ArrayList<>(chunkIdSet), options, dataRelease); + } + public List> getAllScoresByRegionList(List regionList, QueryOptions options, int dataRelease) throws CellBaseException { return genomeDBAdaptor.getAllScoresByRegionList(regionList, options, dataRelease); @@ -170,4 +180,13 @@ public List> getCytobands(List regionList, public List> getCytobands(List regionList, int dataRelease) throws CellBaseException { return getCytobands(regionList, null, dataRelease); } + + public CellBaseDataResult getGenomeSequenceRawData(List regions, int dataRelease) + throws CellBaseException { + Set chunkIdSet = new HashSet<>(); + for (Region region : regions) { + chunkIdSet.addAll(genomeDBAdaptor.getGenomeSequenceChunkId(region)); + } + return genomeDBAdaptor.getGenomeSequenceRawData(new ArrayList<>(chunkIdSet), dataRelease); + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/PharmacogenomicsManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/PharmacogenomicsManager.java new file mode 100644 index 0000000000..72c564945d --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/PharmacogenomicsManager.java @@ -0,0 +1,57 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.managers; + +import org.opencb.biodata.models.pharma.PharmaChemical; +import org.opencb.cellbase.core.api.PharmaChemicalQuery; +import org.opencb.cellbase.core.api.query.ProjectionQueryOptions; +import org.opencb.cellbase.core.config.CellBaseConfiguration; +import org.opencb.cellbase.core.exception.CellBaseException; +import org.opencb.cellbase.core.result.CellBaseDataResult; +import org.opencb.cellbase.lib.impl.core.CellBaseCoreDBAdaptor; +import org.opencb.cellbase.lib.impl.core.PharmacogenomicsMongoDBAdaptor; + +import java.util.List; + +public class PharmacogenomicsManager extends AbstractManager implements AggregationApi { + + private PharmacogenomicsMongoDBAdaptor pharmacogenomicsDBAdaptor; + + public PharmacogenomicsManager(String species, CellBaseConfiguration configuration) throws CellBaseException { + this(species, null, configuration); + } + + public PharmacogenomicsManager(String species, String assembly, CellBaseConfiguration configuration) throws CellBaseException { + super(species, assembly, configuration); + + this.init(); + } + + private void init() { + pharmacogenomicsDBAdaptor = dbAdaptorFactory.getPharmacogenomicsMongoDBAdaptor(); + } + + @Override + public CellBaseCoreDBAdaptor getDBAdaptor() { + return pharmacogenomicsDBAdaptor; + } + + public List> info(List ids, ProjectionQueryOptions query, int dataRelease, + String token) throws CellBaseException { + return pharmacogenomicsDBAdaptor.info(ids, query, dataRelease, token); + } +} diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ProteinManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ProteinManager.java index 116daa58bf..0505c80ad9 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ProteinManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ProteinManager.java @@ -18,6 +18,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import org.opencb.biodata.formats.protein.uniprot.v202003jaxb.Entry; +import org.opencb.biodata.models.core.MissenseVariantFunctionalScore; import org.opencb.biodata.models.core.Transcript; import org.opencb.biodata.models.core.TranscriptMissenseVariantFunctionalScore; import org.opencb.biodata.models.variant.Variant; @@ -25,6 +26,7 @@ import org.opencb.biodata.models.variant.avro.Score; import org.opencb.cellbase.core.api.ProteinQuery; import org.opencb.cellbase.core.api.TranscriptQuery; +import org.opencb.cellbase.core.api.query.CellBaseQueryOptions; import org.opencb.cellbase.core.config.CellBaseConfiguration; import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.result.CellBaseDataResult; @@ -116,6 +118,17 @@ public CellBaseDataResult getVariantAnnotation(Variant } return proteinVariantAnnotation; } + + public CellBaseDataResult getProteinSubstitutionRawData(List transcriptIds, CellBaseQueryOptions options, + int dataRelease) throws CellBaseException { + return proteinDBAdaptor.getProteinSubstitutionRawData(transcriptIds, options, dataRelease); + } + + public CellBaseDataResult getMissenseVariantFunctionalScores(String chromosome, List positions, + CellBaseQueryOptions options, + int dataRelease) throws CellBaseException { + return missenseVariationFunctionalScoreMongoDBAdaptor.getScores(chromosome, positions, options, dataRelease); + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java index 9dbef6a14c..c478e28f45 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java @@ -17,6 +17,7 @@ package org.opencb.cellbase.lib.managers; import org.opencb.biodata.models.core.Gene; +import org.opencb.biodata.models.core.GenomicScoreRegion; import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.core.SpliceScore; import org.opencb.biodata.models.variant.Variant; @@ -27,6 +28,7 @@ import org.opencb.biodata.models.variant.avro.VariantType; import org.opencb.cellbase.core.ParamConstants; import org.opencb.cellbase.core.api.VariantQuery; +import org.opencb.cellbase.core.api.query.CellBaseQueryOptions; import org.opencb.cellbase.core.api.query.QueryException; import org.opencb.cellbase.core.config.CellBaseConfiguration; import org.opencb.cellbase.core.exception.CellBaseException; @@ -37,6 +39,7 @@ import org.opencb.cellbase.lib.impl.core.VariantMongoDBAdaptor; import org.opencb.cellbase.lib.token.DataAccessTokenUtils; import org.opencb.cellbase.lib.variant.VariantAnnotationUtils; +import org.opencb.cellbase.lib.variant.annotation.CellBaseNormalizerSequenceAdaptor; import org.opencb.cellbase.lib.variant.annotation.VariantAnnotationCalculator; import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator; import org.opencb.commons.datastore.core.Query; @@ -106,14 +109,31 @@ public List> getHgvsByVariant(String variants, int da * Normalises a list of variants. * * @param variants list of variant strings + * @param decompose boolean to set the decompose MNV behaviour + * @param leftAlign boolean to set the left alignment behaviour * @param dataRelease data release * @return list of normalised variants * @throws CellBaseException if the species is incorrect */ - public CellBaseDataResult getNormalizationByVariant(String variants, int dataRelease) throws CellBaseException { + public CellBaseDataResult getNormalizationByVariant(String variants, boolean decompose, boolean leftAlign, + int dataRelease) throws CellBaseException { List variantList = parseVariants(variants); VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly, dataRelease, "", cellbaseManagerFactory); + + + // Set decompose MNV behaviour + variantAnnotationCalculator.getNormalizer().getConfig().setDecomposeMNVs(decompose); + + // Set left alignment behaviour + if (leftAlign) { + variantAnnotationCalculator.getNormalizer().getConfig().enableLeftAlign(new CellBaseNormalizerSequenceAdaptor(genomeManager, + dataRelease)); + } else { + variantAnnotationCalculator.getNormalizer().getConfig().disableLeftAlign(); + } + + List normalisedVariants = variantAnnotationCalculator.normalizer(variantList); return new CellBaseDataResult<>(variants, 0, new ArrayList<>(), normalisedVariants.size(), normalisedVariants, -1); } @@ -121,7 +141,8 @@ public CellBaseDataResult getNormalizationByVariant(String variants, in public List> getAnnotationByVariant(QueryOptions queryOptions, String variants, Boolean normalize, - Boolean skipDecompose, + Boolean decompose, + Boolean leftAlign, Boolean ignorePhase, @Deprecated Boolean phased, Boolean imprecise, @@ -150,8 +171,11 @@ public List> getAnnotationByVariant(QueryO if (normalize != null) { queryOptions.put("normalize", normalize); } - if (skipDecompose != null) { - queryOptions.put("skipDecompose", skipDecompose); + if (decompose != null) { + queryOptions.put("decompose", decompose); + } + if (leftAlign != null) { + queryOptions.put("leftAlign", leftAlign); } if (imprecise != null) { queryOptions.put("imprecise", imprecise); @@ -292,7 +316,7 @@ public CellBaseDataResult getSpliceScoreVariant(Variant variant, St CellBaseDataResult result = spliceDBAdaptor.getScores(variant.getChromosome(), variant.getStart(), variant.getReference(), variant.getAlternate(), dataRelease); - if (validSources.size() != DataAccessTokenUtils.NUM_SPLICE_SCORE_SOURCES) { + if (DataAccessTokenUtils.needFiltering(validSources, DataAccessTokenUtils.LICENSED_SPLICE_SCORES_DATA)) { return DataAccessTokenUtils.filterDataSources(result, validSources); } else { return result; @@ -304,17 +328,28 @@ public List> getSpliceScoreVariant(List Set validSources = tokenManager.getValidSources(token, DataAccessTokenUtils.UNLICENSED_SPLICE_SCORES_DATA); List> cellBaseDataResults = new ArrayList<>(variants.size()); - if (validSources.size() != DataAccessTokenUtils.NUM_SPLICE_SCORE_SOURCES) { + if (DataAccessTokenUtils.needFiltering(validSources, DataAccessTokenUtils.LICENSED_SPLICE_SCORES_DATA)) { for (Variant variant : variants) { - cellBaseDataResults.add(spliceDBAdaptor.getScores(variant.getChromosome(), variant.getStart(), variant.getReference(), - variant.getAlternate(), dataRelease)); + cellBaseDataResults.add(DataAccessTokenUtils.filterDataSources(spliceDBAdaptor.getScores(variant.getChromosome(), + variant.getStart(), variant.getReference(), variant.getAlternate(), dataRelease), validSources)); } } else { for (Variant variant : variants) { - cellBaseDataResults.add(DataAccessTokenUtils.filterDataSources(spliceDBAdaptor.getScores(variant.getChromosome(), - variant.getStart(), variant.getReference(), variant.getAlternate(), dataRelease), validSources)); + cellBaseDataResults.add(spliceDBAdaptor.getScores(variant.getChromosome(), variant.getStart(), variant.getReference(), + variant.getAlternate(), dataRelease)); } } return cellBaseDataResults; } + + public CellBaseDataResult getFunctionalScoreRegion(List regions, CellBaseQueryOptions options, + int dataRelease) + throws CellBaseException { + Set chunkIdSet = new HashSet<>(); + for (Region region : regions) { + chunkIdSet.addAll(variantDBAdaptor.getFunctionalScoreChunkIds(region)); + } + + return variantDBAdaptor.getFunctionalScoreRegion(new ArrayList<>(chunkIdSet), options, dataRelease); + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/token/DataAccessTokenUtils.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/token/DataAccessTokenUtils.java index a50297ed3a..5aa35fdf04 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/token/DataAccessTokenUtils.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/token/DataAccessTokenUtils.java @@ -28,14 +28,25 @@ public final class DataAccessTokenUtils { public static final int NUM_SPLICE_SCORE_SOURCES = 2; + public static final Set LICENSED_SPLICE_SCORES_DATA = new HashSet<>(Collections.singletonList("spliceai")); public static final Set UNLICENSED_SPLICE_SCORES_DATA = new HashSet<>(Collections.singletonList("mmsplice")); public static final int NUM_CLINICAL_SOURCES = 3; + public static final Set LICENSED_CLINICAL_DATA = new HashSet<>(Arrays.asList("cosmic", "hgmd")); public static final Set UNLICENSED_CLINICAL_DATA = new HashSet<>(Collections.singletonList("clinvar")); private DataAccessTokenUtils() { } + public static boolean needFiltering(Set inputSources, Set licensedSources) { + for (String licensedSource : licensedSources) { + if (!inputSources.contains(licensedSource)) { + return true; + } + } + return false; + } + public static CellBaseDataResult filterDataSources(CellBaseDataResult results, Set validSources) { List list = new ArrayList<>(); for (T result : results.getResults()) { diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java index 0ab1461591..adb587fb7a 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java @@ -19,6 +19,7 @@ import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.models.core.*; +import org.opencb.biodata.models.pharma.PharmaChemical; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.VariantBuilder; import org.opencb.biodata.models.variant.annotation.ConsequenceTypeMappings; @@ -36,6 +37,7 @@ import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.lib.managers.*; import org.opencb.cellbase.lib.variant.VariantAnnotationUtils; +import org.opencb.cellbase.lib.variant.annotation.futures.FuturePharmacogenomicsAnnotator; import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator; import org.opencb.commons.datastore.core.QueryOptions; import org.slf4j.Logger; @@ -68,6 +70,7 @@ public class VariantAnnotationCalculator { private ClinicalManager clinicalManager; private RepeatsManager repeatsManager; private ProteinManager proteinManager; + private PharmacogenomicsManager pharmacogenomicsManager; private int dataRelease; private String token; private Set annotatorSet; @@ -75,7 +78,8 @@ public class VariantAnnotationCalculator { private final VariantNormalizer normalizer; private boolean normalize = false; - private boolean decompose = true; + private boolean decompose = false; + private boolean leftAlign = false; private boolean phased = true; private Boolean imprecise = true; private Integer svExtraPadding = 0; @@ -100,6 +104,7 @@ public VariantAnnotationCalculator(String species, String assembly, int dataRele this.proteinManager = cellbaseManagerFactory.getProteinManager(species, assembly); this.clinicalManager = cellbaseManagerFactory.getClinicalManager(species, assembly); this.repeatsManager = cellbaseManagerFactory.getRepeatsManager(species, assembly); + this.pharmacogenomicsManager = cellbaseManagerFactory.getPharmacogenomicsManager(species, assembly); // Check data release this.dataRelease = cellbaseManagerFactory.getDataReleaseManager(species, assembly).checkDataRelease(dataRelease); @@ -116,11 +121,16 @@ public VariantAnnotationCalculator(String species, String assembly, int dataRele } private VariantNormalizer.VariantNormalizerConfig getNormalizerConfig() { - return (new VariantNormalizer.VariantNormalizerConfig()) + VariantNormalizer.VariantNormalizerConfig variantNormalizerConfig = new VariantNormalizer.VariantNormalizerConfig() .setReuseVariants(false) .setNormalizeAlleles(false) - .setDecomposeMNVs(decompose) - .enableLeftAlign(new CellBaseNormalizerSequenceAdaptor(genomeManager, dataRelease)); + .setDecomposeMNVs(decompose); + if (leftAlign) { + variantNormalizerConfig.enableLeftAlign(new CellBaseNormalizerSequenceAdaptor(genomeManager, dataRelease)); + } else { + variantNormalizerConfig.disableLeftAlign(); + } + return variantNormalizerConfig; } @Deprecated @@ -183,8 +193,7 @@ private List> generateCellBaseDataResultLi // Return only one result per CellBaseDataResult if either // - size original variant list and normalised one is the same - // - MNV decomposition is switched OFF, i.e. queryOptions.skipDecompose = true and therefore - // this.decompose = false + // - MNV decomposition is switched OFF, i.e. queryOptions.decompose = false and therefore if (!decompose || variantList.size() == normalizedVariantList.size()) { for (int i = 0; i < variantList.size(); i++) { CellBaseDataResult cellBaseDataResult = new CellBaseDataResult<>(variantList.get(i).toString(), @@ -430,37 +439,6 @@ private List getTargets(Gene mirna) throws QueryException, Ille return geneMirnaTargets; } -// private boolean isPhased(Variant variant) { -// return (variant.getStudies() != null && !variant.getStudies().isEmpty()) -// && variant.getStudies().get(0).getSampleDataKeys().contains("PS"); -// } -// -// private String getCachedVariationIncludeFields() { -// StringBuilder stringBuilder = new StringBuilder("annotation.chromosome,annotation.start,annotation.reference"); -// stringBuilder.append(",annotation.alternate,annotation.id"); -// -// if (annotatorSet.contains("variation")) { -// stringBuilder.append(",annotation.id,annotation.additionalAttributes.dgvSpecificAttributes"); -// } -// if (annotatorSet.contains("clinical") || annotatorSet.contains("traitAssociation")) { -// stringBuilder.append(",annotation.variantTraitAssociation"); -// } -// if (annotatorSet.contains("conservation")) { -// stringBuilder.append(",annotation.conservation"); -// } -// if (annotatorSet.contains("functionalScore")) { -// stringBuilder.append(",annotation.functionalScore"); -// } -// if (annotatorSet.contains("consequenceType")) { -// stringBuilder.append(",annotation.consequenceTypes,annotation.displayConsequenceType"); -// } -// if (annotatorSet.contains("populationFrequencies")) { -// stringBuilder.append(",annotation.populationFrequencies"); -// } -// -// return stringBuilder.toString(); -// } - private List runAnnotationProcess(List normalizedVariantList, int dataRelease) throws InterruptedException, ExecutionException, QueryException, IllegalAccessException, CellBaseException { long globalStartTime = System.currentTimeMillis(); @@ -531,6 +509,14 @@ private List runAnnotationProcess(List normalizedVar spliceScoreFuture = CACHED_THREAD_POOL.submit(futureSpliceScoreAnnotator); } + FuturePharmacogenomicsAnnotator futurePharmacogenomicsAnnotator = null; + Future>> pharmacogenomicsFuture = null; + if (annotatorSet.contains("pharmacogenomics")) { + futurePharmacogenomicsAnnotator = new FuturePharmacogenomicsAnnotator(normalizedVariantList, QueryOptions.empty(), dataRelease, + pharmacogenomicsManager, logger); + pharmacogenomicsFuture = CACHED_THREAD_POOL.submit(futurePharmacogenomicsAnnotator); + } + // We iterate over all variants to get the rest of the annotations and to create the VariantAnnotation objects Queue variantBuffer = new LinkedList<>(); long startTime = System.currentTimeMillis(); @@ -666,10 +652,12 @@ private List runAnnotationProcess(List normalizedVar if (futureCytobandAnnotator != null) { futureCytobandAnnotator.processResults(cytobandFuture, variantAnnotationList); } - if (futureSpliceScoreAnnotator != null) { futureSpliceScoreAnnotator.processResults(spliceScoreFuture, variantAnnotationList); } + if (futurePharmacogenomicsAnnotator != null) { + futurePharmacogenomicsAnnotator.processResults(pharmacogenomicsFuture, variantAnnotationList); + } // Not needed with newCachedThreadPool // fixedThreadPool.shutdown(); @@ -728,12 +716,25 @@ private void parseQueryParam(QueryOptions queryOptions) { normalize = (queryOptions.get("normalize") != null && (Boolean) queryOptions.get("normalize")); logger.debug("normalize = {}", normalize); - // Default behaviour decompose - decompose = (queryOptions.get("skipDecompose") == null || !queryOptions.getBoolean("skipDecompose")); + // Default behaviour decompose MNV + if (queryOptions.containsKey("skipDecompose")) { + throw new IllegalArgumentException("Param 'skipDecompose' is not supported. Please, use 'decompose' instead"); + } + decompose = (boolean) queryOptions.getOrDefault("decompose", false); logger.debug("decompose = {}", decompose); // Must update normaliser configuration since normaliser was created on constructor normalizer.getConfig().setDecomposeMNVs(decompose); + // Default behaviour left align + leftAlign = (boolean) queryOptions.getOrDefault("leftAlign", false); + logger.debug("leftAlign = {}", leftAlign); + // Must update normaliser configuration since normaliser was created on constructor + if (leftAlign) { + normalizer.getConfig().enableLeftAlign(new CellBaseNormalizerSequenceAdaptor(genomeManager, dataRelease)); + } else { + normalizer.getConfig().disableLeftAlign(); + } + // New parameter "ignorePhase" present overrides presence of old "phased" parameter if (queryOptions.get("ignorePhase") != null) { phased = !queryOptions.getBoolean("ignorePhase"); @@ -1163,7 +1164,7 @@ private Set getAnnotatorSet(QueryOptions queryOptions) { } else { // 'expression' removed in CB 5.0 annotatorSet = new HashSet<>(Arrays.asList("variation", "traitAssociation", "conservation", "functionalScore", - "consequenceType", "geneDisease", "drugInteraction", "geneConstraints", "mirnaTargets", + "consequenceType", "geneDisease", "drugInteraction", "geneConstraints", "mirnaTargets", "pharmacogenomics", "cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs")); List excludeList = queryOptions.getAsStringList("exclude"); excludeList.forEach(annotatorSet::remove); @@ -1968,5 +1969,17 @@ public void processResults(Future>> spliceF } } } -} + public VariantNormalizer getNormalizer() { + return normalizer; + } + + public String getToken() { + return token; + } + + public VariantAnnotationCalculator setToken(String token) { + this.token = token; + return this; + } +} diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FuturePharmacogenomicsAnnotator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FuturePharmacogenomicsAnnotator.java new file mode 100644 index 0000000000..7760dbfe03 --- /dev/null +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FuturePharmacogenomicsAnnotator.java @@ -0,0 +1,211 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.lib.variant.annotation.futures; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.models.pharma.*; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.Pharmacogenomics; +import org.opencb.biodata.models.variant.avro.PharmacogenomicsAlleles; +import org.opencb.biodata.models.variant.avro.PharmacogenomicsClinicalAnnotation; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.cellbase.core.api.PharmaChemicalQuery; +import org.opencb.cellbase.core.result.CellBaseDataResult; +import org.opencb.cellbase.lib.managers.PharmacogenomicsManager; +import org.opencb.commons.datastore.core.QueryOptions; +import org.slf4j.Logger; + +import java.util.*; +import java.util.concurrent.*; +import java.util.stream.Collectors; + +public class FuturePharmacogenomicsAnnotator implements Callable>> { + private PharmacogenomicsManager pharmacogenomicsManager; + + private List variantList; + private QueryOptions queryOptions; + private int dataRelease; + + private Logger logger; + + public FuturePharmacogenomicsAnnotator(List variantList, QueryOptions queryOptions, int dataRelease, + PharmacogenomicsManager pharmacogenomicsManager, Logger logger) { + this.pharmacogenomicsManager = pharmacogenomicsManager; + + this.variantList = variantList; + this.queryOptions = queryOptions; + this.dataRelease = dataRelease; + + this.logger = logger; + } + + @Override + public List> call() throws Exception { + long startTime = System.currentTimeMillis(); + + List> cellBaseDataResultList = new ArrayList<>(variantList.size()); + + logger.debug("Pharmacogenomics queries..."); + // Want to return only one CellBaseDataResult object per Variant + List includes = new ArrayList<>(); + includes.add("id"); + includes.add("name"); + includes.add("source"); + includes.add("types"); + includes.add("smiles"); + includes.add("inChI"); + includes.add("variants.variantId"); + includes.add("variants.geneName"); + includes.add("variants.chromosome"); + includes.add("variants.position"); + includes.add("variants.phenotypes"); + includes.add("variants.phenotypeType"); + includes.add("variants.confidence"); + includes.add("variants.score"); + includes.add("variants.url"); + includes.add("variants.evidences.pubmed"); + includes.add("variants.evidences.variantAssociations.description"); + includes.add("variants.evidences.variantAssociations.discussion"); + includes.add("variants.alleles"); + logger.info("Pharmacogenomics variant annotation/search includes: {}", StringUtils.join(includes, ",")); + for (Variant variant : variantList) { + PharmaChemicalQuery query = new PharmaChemicalQuery(); + query.setLocations(Collections.singletonList(variant.getChromosome() + ":" + variant.getStart())); + query.setDataRelease(dataRelease); + query.setIncludes(includes); + cellBaseDataResultList.add(pharmacogenomicsManager.search(query)); + } + logger.info("Pharmacogenomics queries performance in {} ms for {} variants", System.currentTimeMillis() - startTime, + variantList.size()); + return cellBaseDataResultList; + } + + public void processResults(Future>> pharmaFuture, + List variantAnnotationList) + throws InterruptedException, ExecutionException { + List> pharmaChemicalCellBaseDataResults; + try { + pharmaChemicalCellBaseDataResults = pharmaFuture.get(30, TimeUnit.SECONDS); + } catch (TimeoutException e) { + pharmaFuture.cancel(true); + throw new ExecutionException("Unable to finish pharmacogenomics query on time", e); + } + + if (CollectionUtils.isNotEmpty(pharmaChemicalCellBaseDataResults)) { + for (int i = 0; i < variantAnnotationList.size(); i++) { + CellBaseDataResult pharmaChemicalResult = pharmaChemicalCellBaseDataResults.get(i); + if (pharmaChemicalResult != null && CollectionUtils.isNotEmpty(pharmaChemicalResult.getResults())) { + List pharmacogenomicsList = new ArrayList<>(); + for (PharmaChemical pharmaChemical : pharmaChemicalResult.getResults()) { + Pharmacogenomics pharmacogenomics = new Pharmacogenomics(); + // Basic annotation fields + pharmacogenomics.setId(pharmaChemical.getId()); + pharmacogenomics.setName(pharmaChemical.getName()); + pharmacogenomics.setSource(pharmaChemical.getSource()); + pharmacogenomics.setTypes(pharmaChemical.getTypes()); + pharmacogenomics.setSmiles(pharmaChemical.getSmiles()); + pharmacogenomics.setInChI(pharmaChemical.getInChI()); + + // Clinical annotation fields + if (CollectionUtils.isNotEmpty(pharmaChemical.getVariants())) { + String varAnnotChrom = variantAnnotationList.get(i).getChromosome(); + int varAnnotStart = variantAnnotationList.get(i).getStart(); + + List resultClinicalAnnotations = new ArrayList<>(); + + // We must filter out those annotations based on different alternate alleles + // 1. Construct the HOM ALT genotype + final String queryAllele = + variantAnnotationList.get(i).getAlternate() + variantAnnotationList.get(i).getAlternate(); + for (PharmaVariantAnnotation pharmaVariantAnnotation : pharmaChemical.getVariants()) { + // 2. Check the variant is the same + if (!varAnnotChrom.equals(pharmaVariantAnnotation.getChromosome()) + || varAnnotStart != pharmaVariantAnnotation.getPosition()) { + continue; + } + + // 3. Check if the 'alleles' contains the alternate homozygous genotype, or 'null' or '*', + // otherwise go to next annotation + if (CollectionUtils.isNotEmpty(pharmaVariantAnnotation.getAlleles())) { + boolean found = false; + for (PharmaClinicalAllele allele : pharmaVariantAnnotation.getAlleles()) { + if (allele.getAllele().equalsIgnoreCase(queryAllele) + || allele.getAllele().contains("null") + || allele.getAllele().contains("*")) { + found = true; + break; + } + } + if (!found) { + continue; + } + } + + // 4. Create, build and add the annotation + PharmacogenomicsClinicalAnnotation resultClinicalAnnotation = new PharmacogenomicsClinicalAnnotation(); + resultClinicalAnnotation.setVariantId(pharmaVariantAnnotation.getVariantId()); + + resultClinicalAnnotation.setGeneNames(pharmaVariantAnnotation.getGeneNames()); + resultClinicalAnnotation.setPhenotypes(pharmaVariantAnnotation.getPhenotypes()); + resultClinicalAnnotation.setPhenotypeTypes(pharmaVariantAnnotation.getPhenotypeTypes()); + resultClinicalAnnotation.setConfidence(pharmaVariantAnnotation.getConfidence()); + resultClinicalAnnotation.setScore(pharmaVariantAnnotation.getScore()); + resultClinicalAnnotation.setUrl(pharmaVariantAnnotation.getUrl()); + + if (CollectionUtils.isNotEmpty(pharmaVariantAnnotation.getEvidences())) { + Set pubmeds = new LinkedHashSet<>(); + Set summaries = new LinkedHashSet<>(); + for (PharmaClinicalEvidence evidence : pharmaVariantAnnotation.getEvidences()) { + if (StringUtils.isNotEmpty(evidence.getPubmed())) { + pubmeds.add(evidence.getPubmed()); + } + if (CollectionUtils.isNotEmpty(evidence.getVariantAssociations())) { + for (PharmaVariantAssociation variantAssociation : evidence.getVariantAssociations()) { + summaries.add(variantAssociation.getDescription()); + summaries.add(variantAssociation.getDiscussion()); + } + } + } + resultClinicalAnnotation.setPubmed(new ArrayList<>(pubmeds)); + resultClinicalAnnotation.setSummary(String.join(" ", summaries)); + } + + if (CollectionUtils.isNotEmpty(pharmaVariantAnnotation.getAlleles())) { + resultClinicalAnnotation.setAlleles(pharmaVariantAnnotation.getAlleles().stream() + .map(a -> new PharmacogenomicsAlleles(a.getAllele(), a.getAnnotation(), a.getDescription())) + .collect(Collectors.toList()) + ); + } + // Add pharmacogenomics clinical annotation to the list + resultClinicalAnnotations.add(resultClinicalAnnotation); + } + // Set pharmacogenomics clinical annotation + pharmacogenomics.setAnnotations(resultClinicalAnnotations); + } + // Add pharmacogenomics to the list if at least one annotation for the same variant has been found + if (CollectionUtils.isNotEmpty(pharmacogenomics.getAnnotations())) { + pharmacogenomicsList.add(pharmacogenomics); + } + } + // Set the pharmacogenomics data in the variant annotation + variantAnnotationList.get(i).setPharmacogenomics(pharmacogenomicsList); + } + } + } + } +} diff --git a/cellbase-lib/src/main/resources/mongodb-indexes.json b/cellbase-lib/src/main/resources/mongodb-indexes.json index b2b83cc2f4..de81c7b83b 100644 --- a/cellbase-lib/src/main/resources/mongodb-indexes.json +++ b/cellbase-lib/src/main/resources/mongodb-indexes.json @@ -130,3 +130,18 @@ {"collection": "splice_score", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}} {"collection": "pubmed", "fields": {"medlineCitation.pmid.content": 1}, "options": {"background": true}} + +{"collection": "pharmacogenomics", "fields": {"_chunkIds": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"id": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"name": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"source": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"types": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"variants.variantId": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"variants.location": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"variants.chromosome": 1, "variants.position": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"variants.haplotypes": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"variants.geneNames": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"variants.phenotypes": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"variants.phenotypeType": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"variants.confidence": 1}, "options": {"background": true}} +{"collection": "pharmacogenomics", "fields": {"variants.evidences.pubmed": 1}, "options": {"background": true}} diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java index 3126905331..8666aadb99 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java @@ -17,9 +17,12 @@ package org.opencb.cellbase.lib; import com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.commons.collections4.CollectionUtils; import org.opencb.biodata.models.variant.Variant; +import org.opencb.cellbase.core.common.GitRepositoryState; import org.opencb.cellbase.core.config.CellBaseConfiguration; import org.opencb.cellbase.core.exception.CellBaseException; +import org.opencb.cellbase.core.models.DataRelease; import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.lib.db.MongoDBManager; import org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor; @@ -27,22 +30,25 @@ import org.opencb.cellbase.lib.loader.LoaderException; import org.opencb.cellbase.lib.managers.CellBaseManagerFactory; import org.opencb.cellbase.lib.managers.DataReleaseManager; -import org.opencb.commons.datastore.core.DataStoreServerAddress; -import org.opencb.commons.datastore.mongodb.MongoDBConfiguration; import org.opencb.commons.datastore.mongodb.MongoDataStore; -import org.opencb.commons.datastore.mongodb.MongoDataStoreManager; +import org.opencb.commons.exec.Command; +import org.opencb.commons.utils.URLUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.net.URISyntaxException; +import java.net.URL; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collections; import java.util.List; import java.util.concurrent.ExecutionException; +import static org.opencb.cellbase.lib.db.MongoDBManager.DBNAME_SEPARATOR; + /** * Created by fjlopez on 18/09/15. */ @@ -52,21 +58,33 @@ public class GenericMongoDBAdaptorTest { protected int dataRelease; protected String token; + protected String cellBaseName; + + private static final String DATASET_BASENAME = "cellbase-v5.6-dr4"; + private static final String DATASET_EXTENSION = ".tar.gz"; + private static final String DATASET_URL = "http://reports.test.zettagenomics.com/cellbase/test-data/"; + private static final String DATASET_TMP_DIR = "/tmp/cb"; + private static final String LOCALHOST = "localhost:27017"; protected static final String SPECIES = "hsapiens"; - protected static final String ASSEMBLY = "grch37"; - protected static final String API_VERSION = "v5"; - protected static final String CELLBASE_DBNAME = "cellbase_" + SPECIES + "_" + ASSEMBLY + "_" + API_VERSION; + protected static final String ASSEMBLY = "grch38"; +// protected static final String API_VERSION = "v5"; private static final String MONGODB_CELLBASE_LOADER = "org.opencb.cellbase.lib.loader.MongoDBCellBaseLoader"; protected CellBaseConfiguration cellBaseConfiguration; protected CellBaseManagerFactory cellBaseManagerFactory; + protected String UNIVERSAL_ACDES_TOKEN = "eyJhbGciOiJIUzI1NiJ9.eyJzb3VyY2VzIjp7ImNvc21pYyI6OTIyMzM3MjAzNjg1NDc3NTgwNywiaGdtZCI6OTIyMzM3MjAzNjg1NDc3NTgwNywic3BsaWNlYWkiOjkyMjMzNzIwMzY4NTQ3NzU4MDd9LCJ2ZXJzaW9uIjoiMS4wIiwic3ViIjoiWkVUVEFHRU5PTUlDUyIsImlhdCI6MTY4Nzk0NDYxN30.9puZMYMlmbH1qdH4tUW6vvjfdYdLcAq-6Ts6CRlnLAs"; protected String HGMD_ACCESS_TOKEN = "eyJhbGciOiJIUzI1NiJ9.eyJzb3VyY2VzIjp7ImhnbWQiOjkyMjMzNzIwMzY4NTQ3NzU4MDd9LCJ2ZXJzaW9uIjoiMS4wIiwic3ViIjoiWkVUVEEiLCJpYXQiOjE2NzU4NzI1MDd9.f3JgVRt7_VrifNWTaRMW3aQfrKbtDbIxlzoenJRYJo0"; + protected String COSMIC_ACCESS_TOKEN = "eyJhbGciOiJIUzI1NiJ9.eyJzb3VyY2VzIjp7ImNvc21pYyI6OTIyMzM3MjAzNjg1NDc3NTgwN30sInZlcnNpb24iOiIxLjAiLCJzdWIiOiJaRVRUQUdFTk9NSUNTIiwiaWF0IjoxNjg3OTQ3MDYwfQ.wjEfSmCSxGd4TFuYEzoCUXrDNE7rNPoqfb7BYwRtTlw"; + protected String SPLICEAI_ACCESS_TOKEN = "eyJhbGciOiJIUzI1NiJ9.eyJzb3VyY2VzIjp7InNwbGljZWFpIjo5MjIzMzcyMDM2ODU0Nzc1ODA3fSwidmVyc2lvbiI6IjEuMCIsInN1YiI6IlpFVFRBR0VOT01JQ1MiLCJpYXQiOjE2ODc5NDcwODR9.8CVEQe313N9dP6lKkqRv__mR854VcCvM2RlFMpPtRrk"; protected String HGMD_COSMIC_ACCESS_TOKEN = "eyJhbGciOiJIUzI1NiJ9.eyJzb3VyY2VzIjp7ImNvc21pYyI6OTIyMzM3MjAzNjg1NDc3NTgwNywiaGdtZCI6OTIyMzM3MjAzNjg1NDc3NTgwN30sInZlcnNpb24iOiIxLjAiLCJzdWIiOiJaRVRUQSIsImlhdCI6MTY3NTg3MjUyN30.NCCFc4SAhjUsN5UU0wXGY6nCZx8jLglvaO1cNZYI0u4"; + protected String HGMD_SPLICEAI_ACCESS_TOKEN = "eyJhbGciOiJIUzI1NiJ9.eyJzb3VyY2VzIjp7ImhnbWQiOjkyMjMzNzIwMzY4NTQ3NzU4MDcsInNwbGljZWFpIjo5MjIzMzcyMDM2ODU0Nzc1ODA3fSwidmVyc2lvbiI6IjEuMCIsInN1YiI6IlpFVFRBR0VOT01JQ1MiLCJpYXQiOjE2ODc5NDcxMDh9.Qa_VRbu6dbrrUlqk7ToVQkIA258R4L_kNtLZaeITRFA"; + protected String COSMIC_SPLICEAI_ACCESS_TOKEN = "eyJhbGciOiJIUzI1NiJ9.eyJzb3VyY2VzIjp7ImNvc21pYyI6OTIyMzM3MjAzNjg1NDc3NTgwNywic3BsaWNlYWkiOjkyMjMzNzIwMzY4NTQ3NzU4MDd9LCJ2ZXJzaW9uIjoiMS4wIiwic3ViIjoiWkVUVEFHRU5PTUlDUyIsImlhdCI6MTY4Nzk0NzEyOX0.7WrMgLVgUP1LKYE6v3tDCVvy4XxQfpMPgVU011t8aPM"; protected LoadRunner loadRunner = null; // protected MongoDBAdaptorFactory dbAdaptorFactory; + protected Logger logger = LoggerFactory.getLogger(this.getClass()); public GenericMongoDBAdaptorTest() { @@ -74,22 +92,22 @@ public GenericMongoDBAdaptorTest() { cellBaseConfiguration = CellBaseConfiguration.load( GenericMongoDBAdaptorTest.class.getClassLoader().getResourceAsStream("configuration.test.yaml"), CellBaseConfiguration.ConfigurationFileFormat.YAML); +// cellBaseConfiguration.getDatabases().getMongodb().setHost("localhost:27037"); +// cellBaseConfiguration.getDatabases().getMongodb().setUser("cellbase"); +// cellBaseConfiguration.getDatabases().getMongodb().setPassword("cellbase"); +// cellBaseConfiguration.getDatabases().getMongodb().getOptions().put("authenticationDatabase", "admin"); +// cellBaseConfiguration.getDatabases().getMongodb().getOptions().put("authenticationMechanism", "SCRAM-SHA-256"); + + String[] versionSplit = GitRepositoryState.get().getBuildVersion().split("\\."); + cellBaseConfiguration.setVersion("v" + versionSplit[0] + "." + versionSplit[1]); cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration); - dataReleaseManager = cellBaseManagerFactory.getDataReleaseManager("hsapiens", "GRCh37"); - loadRunner = new LoadRunner(MONGODB_CELLBASE_LOADER, CELLBASE_DBNAME, 2, dataReleaseManager, cellBaseConfiguration); -// dbAdaptorFactory = new MongoDBAdaptorFactory(cellBaseConfiguration); - } catch (Exception e) { - e.printStackTrace(); - } - } - protected void clearDB(String dbName) { - logger.info("Cleaning MongoDB {}", dbName); - try (MongoDataStoreManager mongoManager = new MongoDataStoreManager(Collections.singletonList(new DataStoreServerAddress("localhost", 27017)))) { - MongoDBConfiguration.Builder builder = MongoDBConfiguration.builder(); - MongoDBConfiguration mongoDBConfiguration = builder.build(); - mongoManager.get(dbName, mongoDBConfiguration); - mongoManager.drop(dbName); + cellBaseName = MongoDBManager.getDatabaseName(SPECIES, ASSEMBLY, cellBaseConfiguration.getVersion()); + + loadRunner = new LoadRunner(MONGODB_CELLBASE_LOADER, cellBaseName, 2, + cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY), cellBaseConfiguration); + + initDB(); } catch (Exception e) { e.printStackTrace(); } @@ -98,128 +116,132 @@ protected void clearDB(String dbName) { protected void initDB() throws IOException, ExecutionException, ClassNotFoundException, InterruptedException, InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException, URISyntaxException, CellBaseException, LoaderException { + dataReleaseManager = cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY); + CellBaseDataResult results = dataReleaseManager.getReleases(); + List dataReleaseList = results.getResults(); + if (CollectionUtils.isEmpty(dataReleaseList)) { + // Download data and populate mongo DB + downloadAndPopulate(); + } else if (dataReleaseList.size() != 1) { + throw new CellBaseException("Something wrong with the CellBase dataset, it must contain only ONE data release"); + } else { + dataRelease = dataReleaseList.get(0).getRelease(); + } + } + + private void downloadAndPopulate() throws IOException, ExecutionException, ClassNotFoundException, InterruptedException, InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException, CellBaseException, LoaderException { + // Download and uncompress dataset + URL url = new URL(DATASET_URL + DATASET_BASENAME + DATASET_EXTENSION); + Path tmpPath = Paths.get(DATASET_TMP_DIR); + tmpPath.toFile().mkdirs(); + + URLUtils.download(url, tmpPath); + Path tmpFile = tmpPath.resolve(DATASET_BASENAME + DATASET_EXTENSION); + String commandline = "tar -xvzf " + tmpFile.toAbsolutePath() + " -C " + tmpPath; + logger.info("Running: " + commandline); + Command command = new Command(commandline); + command.run(); + + logger.info("Downloading and decompressing " + tmpFile.toAbsolutePath()); + + Path baseDir = tmpPath.resolve(DATASET_BASENAME); + if (!baseDir.toFile().exists() || !baseDir.toFile().isDirectory()) { + throw new CellBaseException("Something wrong downloading and uncompressing the datasets, please check " + tmpPath); + } + + // Populate mongoDB from the downloaded dataset dataRelease = dataReleaseManager.createRelease().getRelease(); - Path path = Paths.get(getClass() - .getResource("/variant-annotation/gene.test.json.gz").toURI()); - loadRunner.load(path, "gene", dataRelease); - - path = Paths.get(getClass() - .getResource("/hgvs/gene.test.json.gz").toURI()); - loadRunner.load(path, "gene", dataRelease); - dataReleaseManager.update(dataRelease,"gene", "gene", Collections.emptyList()); - - path = Paths.get(getClass() - .getResource("/variant-annotation/genome_sequence.test.json.gz").toURI()); - loadRunner.load(path, "genome_sequence", dataRelease); - - path = Paths.get(getClass() - .getResource("/hgvs/genome_sequence.test.json.gz").toURI()); - loadRunner.load(path, "genome_sequence", dataRelease); - dataReleaseManager.update(dataRelease,"genome_sequence", "genome_sequence", Collections.emptyList()); - - path = Paths.get(getClass() - .getResource("/variant-annotation/regulatory_region.test.json.gz").toURI()); - loadRunner.load(path, "regulatory_region", dataRelease); - dataReleaseManager.update(dataRelease,"regulatory_region", "regulatory_region", Collections.emptyList()); - - path = Paths.get(getClass() - .getResource("/variant-annotation/protein.test.json.gz").toURI()); - loadRunner.load(path, "protein", dataRelease); - dataReleaseManager.update(dataRelease,"protein", "protein", Collections.emptyList()); - - path = Paths.get(getClass() - .getResource("/variant-annotation/prot_func_pred_chr_13.test.json.gz").toURI()); - loadRunner.load(path, "protein_functional_prediction", dataRelease); - - path = Paths.get(getClass() - .getResource("/variant-annotation/prot_func_pred_chr_18.test.json.gz").toURI()); - loadRunner.load(path, "protein_functional_prediction", dataRelease); - - path = Paths.get(getClass() - .getResource("/variant-annotation/prot_func_pred_chr_19.test.json.gz").toURI()); - loadRunner.load(path, "protein_functional_prediction", dataRelease); - - path = Paths.get(getClass() - .getResource("/variant-annotation/prot_func_pred_chr_MT.test.json.gz").toURI()); - loadRunner.load(path, "protein_functional_prediction", dataRelease); - dataReleaseManager.update(dataRelease,"protein_functional_prediction", "protein_functional_prediction", Collections.emptyList()); - - path = Paths.get(getClass() - .getResource("/variant-annotation/variation_chr1.full.test.json.gz").toURI()); - loadRunner.load(path, "variation", dataRelease); - - path = Paths.get(getClass() - .getResource("/variant-annotation/variation_chr2.full.test.json.gz").toURI()); - loadRunner.load(path, "variation", dataRelease); - - path = Paths.get(getClass() - .getResource("/variant-annotation/variation_chr19.full.test.json.gz").toURI()); - loadRunner.load(path, "variation", dataRelease); - - path = Paths.get(getClass() - .getResource("/variant-annotation/variation_chrMT.full.test.json.gz").toURI()); - loadRunner.load(path, "variation", dataRelease); - - path = Paths.get(getClass() - .getResource("/variant-annotation/structuralVariants.json.gz").toURI()); - loadRunner.load(path, "variation", dataRelease); - dataReleaseManager.update(dataRelease,"variation", "variation", Collections.emptyList()); - - path = Paths.get(getClass() - .getResource("/genome/genome_info.json").toURI()); - loadRunner.load(path, "genome_info", dataRelease); - dataReleaseManager.update(dataRelease,"genome_info", "genome_info", Collections.emptyList()); - - path = Paths.get(getClass() - .getResource("/variant-annotation/repeats.json.gz").toURI()); - loadRunner.load(path, "repeats", dataRelease); - dataReleaseManager.update(dataRelease,"repeats", "repeats", Collections.emptyList()); - - path = Paths.get(getClass() - .getResource("/variant-annotation/clinical_variants.test.json.gz").toURI()); - loadRunner.load(path, "clinical_variants", dataRelease); - path = Paths.get(getClass() - .getResource("/variant-annotation/clinical_variants.cosmic.test.json.gz").toURI()); - loadRunner.load(path, "clinical_variants", dataRelease); - dataReleaseManager.update(dataRelease,"clinical_variants", "clinical_variants", Collections.emptyList()); - - path = Paths.get(getClass() - .getResource("/revel/missense_variation_functional_score.json.gz").toURI()); - loadRunner.load(path, "missense_variation_functional_score", dataRelease); - dataReleaseManager.update(dataRelease, "missense_variation_functional_score", "missense_variation_functional_score", Collections.emptyList()); - - // Create empty collection - createEmptyCollection("refseq", dataRelease); - dataReleaseManager.update(dataRelease, "refseq", "refseq", Collections.emptyList()); - - // Create empty collection - createEmptyCollection("conservation", dataRelease); + // Genome: genome_sequence.json.gz, genome_info.json.gz + loadData("genome_info", "genome_info", baseDir.resolve("genome_info.json.gz")); + loadData("genome_sequence", "genome_sequence", baseDir.resolve("genome_sequence.json.gz")); + + // Gene: gene.json.gz, refseq.json.gz + loadData("gene", "gene", baseDir.resolve("gene.json.gz")); + loadData("refseq", "refseq", baseDir.resolve("refseq.json.gz")); + + // Conservation + for (File file : baseDir.toFile().listFiles()) { + if (file.getName().startsWith("conservation_")) { + loadData("conservation", "conservation", file.toPath(), true); + } + } dataReleaseManager.update(dataRelease, "conservation", "conservation", Collections.emptyList()); - // Create empty collection - createEmptyCollection("variation_functional_score", dataRelease); - dataReleaseManager.update(dataRelease, "variation_functional_score", "variation_functional_score", Collections.emptyList()); + // Regulatory regions: regulatory_region.json.gz + loadData("regulatory_region", "regulatory_region", baseDir.resolve("regulatory_region.json.gz")); + + // Protein: protein.json.gz + loadData("protein", "protein", baseDir.resolve("protein.json.gz")); + + // Protein functional prediction + for (File file : baseDir.toFile().listFiles()) { + if (file.getName().startsWith("prot_func_pred_")) { + loadData("protein_functional_prediction", "protein_functional_prediction", file.toPath(), true); + } + } + dataReleaseManager.update(dataRelease, "protein_functional_prediction", "protein_functional_prediction", Collections.emptyList()); + + // Variation: variation_chr_all.json.gz + loadData("variation", "variation", baseDir.resolve("variation_chr_all.json.gz")); - // Create empty collection - createEmptyCollection("splice_score", dataRelease); + // Variant functional score: cadd.json.gz + loadData("variation_functional_score", "variation_functional_score", baseDir.resolve("cadd.json.gz")); + + // Repeats: repeats.json.gz + loadData("repeats", "repeats", baseDir.resolve("repeats.json.gz")); + + // Ontology: ontology.json.gz + loadData("ontology", "ontology", baseDir.resolve("ontology.json.gz")); + + // Missense variation functional scores: missense_variation_functional_score.json.gz + loadData("missense_variation_functional_score", "missense_variation_functional_score", + baseDir.resolve("missense_variation_functional_score.json.gz")); + + // splice_score + loadData("splice_score", "splice_score", baseDir.resolve("splice_score/spliceai/splice_score_all.json.gz"), true); + loadData("splice_score", "splice_score", baseDir.resolve("splice_score/mmsplice/splice_score_all.json.gz"), true); dataReleaseManager.update(dataRelease, "splice_score", "splice_score", Collections.emptyList()); - } - protected void createDataRelease() throws CellBaseException, JsonProcessingException { - cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY).createRelease(); + // clinical_variants.full.json.gz + loadData("clinical_variants", "clinical_variants", baseDir.resolve("clinical_variants.full.json.gz")); + + // Clean temporary dir } - protected void updateDataRelease(int dataRelease, String data, List sources) throws CellBaseException, JsonProcessingException { - cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY).update(dataRelease, data, data, sources); + private void loadData(String collection, String data, Path filePath) throws IOException, ExecutionException, ClassNotFoundException, + InterruptedException, InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException, LoaderException, CellBaseException { + loadData(collection, data, filePath, false); } - protected void createEmptyCollection(String data, int dataRelease) { - MongoDBManager mongoDBManager = new MongoDBManager(cellBaseConfiguration); - MongoDataStore mongoDataStore = mongoDBManager.createMongoDBDatastore(CELLBASE_DBNAME); - mongoDataStore.createCollection(CellBaseDBAdaptor.buildCollectionName(data, dataRelease)); + private void loadData(String collection, String data, Path filePath, boolean skipUpdate) throws IOException, ExecutionException, + ClassNotFoundException, InterruptedException, InvocationTargetException, NoSuchMethodException, InstantiationException, + IllegalAccessException, LoaderException, CellBaseException { + if (filePath.toFile().exists()) { + logger.info("Loading (" + collection + ", " + data + ") from file " + filePath); + loadRunner.load(filePath, collection, dataRelease); + if (!skipUpdate) { + dataReleaseManager.update(dataRelease, collection, data, Collections.emptyList()); + } + } else { + logger.error("(" + collection + ", " + data + ") not loading: file " + filePath + "does not exist"); + } } +// protected void createDataRelease() throws CellBaseException, JsonProcessingException { +// cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY).createRelease(); +// } +// +// protected void updateDataRelease(int dataRelease, String data, List sources) throws CellBaseException, JsonProcessingException { +// cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY).update(dataRelease, data, data, sources); +// } +// +// protected void createEmptyCollection(String data, int dataRelease) { +// MongoDBManager mongoDBManager = new MongoDBManager(cellBaseConfiguration); +// MongoDataStore mongoDataStore = mongoDBManager.createMongoDBDatastore(CELLBASE_DBNAME); +// mongoDataStore.createCollection(CellBaseDBAdaptor.buildCollectionName(data, dataRelease)); +// } + protected CellBaseDataResult getByVariant(List> variantCellBaseDataResultList, Variant variant) { for (CellBaseDataResult variantCellBaseDataResult : variantCellBaseDataResultList) { if (variantCellBaseDataResult != null) { diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/SpeciesUtilsTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/SpeciesUtilsTest.java index 246d41684e..b4ccd693f8 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/SpeciesUtilsTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/SpeciesUtilsTest.java @@ -17,40 +17,22 @@ package org.opencb.cellbase.lib; import org.junit.jupiter.api.Test; -import org.opencb.cellbase.core.utils.SpeciesUtils; -import org.opencb.cellbase.core.config.CellBaseConfiguration; import org.opencb.cellbase.core.config.SpeciesConfiguration; import org.opencb.cellbase.core.exception.CellBaseException; -import org.opencb.cellbase.lib.loader.LoadRunner; -import org.opencb.cellbase.lib.managers.DataReleaseManager; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.opencb.cellbase.core.utils.SpeciesUtils; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import static org.junit.jupiter.api.Assertions.assertEquals; -public class SpeciesUtilsTest { - - private static final String LOCALHOST = "localhost:27017"; - protected static final String GRCH37_DBNAME = "cellbase_hsapiens_grch37_v4"; - private static final String MONGODB_CELLBASE_LOADER = "org.opencb.cellbase.lib.loader.MongoDBCellBaseLoader"; - protected CellBaseConfiguration cellBaseConfiguration; - - protected final LoadRunner loadRunner; - - protected Logger logger = LoggerFactory.getLogger(this.getClass()); +public class SpeciesUtilsTest extends GenericMongoDBAdaptorTest { public SpeciesUtilsTest() throws IOException, CellBaseException { - cellBaseConfiguration = CellBaseConfiguration.load( - SpeciesUtilsTest.class.getClassLoader().getResourceAsStream("configuration.test.yaml"), - CellBaseConfiguration.ConfigurationFileFormat.YAML); - - loadRunner = new LoadRunner(MONGODB_CELLBASE_LOADER, GRCH37_DBNAME, 2, - new DataReleaseManager(GRCH37_DBNAME, cellBaseConfiguration), cellBaseConfiguration); + super(); } @Test @@ -62,7 +44,7 @@ public void testGetSpeciesShortname() { List assemblies = new ArrayList(); SpeciesConfiguration.Assembly assembly = new SpeciesConfiguration.Assembly(); - assembly.setName("grch37"); + assembly.setName(ASSEMBLY); assemblies.add(assembly); speciesConfiguration.setAssemblies(assemblies); shortName = SpeciesUtils.getSpeciesShortname(speciesConfiguration); @@ -73,11 +55,11 @@ public void testGetSpeciesShortname() { public void testGetDefaultAssembly() { SpeciesConfiguration.Assembly assembly = null; try { - assembly = SpeciesUtils.getDefaultAssembly(cellBaseConfiguration, "hsapiens"); + assembly = SpeciesUtils.getDefaultAssembly(cellBaseConfiguration, SPECIES); } catch (CellBaseException e) { e.printStackTrace(); } - assertEquals("GRCh37", assembly.getName()); + assertEquals(ASSEMBLY.toLowerCase(Locale.ROOT), assembly.getName().toLowerCase(Locale.ROOT)); SpeciesConfiguration speciesConfiguration = new SpeciesConfiguration(); speciesConfiguration.setScientificName("H. sapiens"); @@ -86,7 +68,6 @@ public void testGetDefaultAssembly() { } catch (CellBaseException e) { e.printStackTrace(); } - assertEquals("GRCh37", assembly.getName()); - + assertEquals(ASSEMBLY.toLowerCase(Locale.ROOT), assembly.getName().toLowerCase(Locale.ROOT)); } } diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/PharmGKBBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/PharmGKBBuilderTest.java new file mode 100644 index 0000000000..d94e322fd7 --- /dev/null +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/PharmGKBBuilderTest.java @@ -0,0 +1,35 @@ +package org.opencb.cellbase.lib.builders; + +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.*; + +class PharmGKBBuilderTest { + + @Test + public void parseGenericNames() { + String genericName = "\"\"\"(2R,3R,11bR)-9-methoxy-3-(2-methylpropyl)-2,3,4,6,7,11b-hexahydro-1H-benzo[a]quinolizine-2,10-diol\"\", \"\"10-o-desmethyl-alpha-htbz\"\"\""; + List names = stringFieldToList(genericName); + assertTrue(names.size() == 2); + assertTrue(names.contains("(2R,3R,11bR)-9-methoxy-3-(2-methylpropyl)-2,3,4,6,7,11b-hexahydro-1H-benzo[a]quinolizine-2,10-diol")); + assertTrue(names.contains("10-o-desmethyl-alpha-htbz")); + } + + private List stringFieldToList(String field) { + if (field.startsWith("\"")) { + return Arrays.stream(field.replace("\"\"\"", "\"").replace("\"\"", "\"").replace("\", \"", "\",\"").split("\",\"")) + .map(s -> s.replace("\"", "").trim()).collect(Collectors.toList()); + } else { + if (field.contains(", ")) { + return Arrays.stream(field.replace(", ", ",").split(",")).map(String::trim).collect(Collectors.toList()); + } else { + return Collections.singletonList(field); + } + } + } +} \ No newline at end of file diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/CellBaseNormalizerSequenceAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/CellBaseNormalizerSequenceAdaptorTest.java index 13b9366d8b..a58b070219 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/CellBaseNormalizerSequenceAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/CellBaseNormalizerSequenceAdaptorTest.java @@ -16,18 +16,11 @@ package org.opencb.cellbase.lib.impl.core; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; import org.opencb.cellbase.lib.variant.annotation.CellBaseNormalizerSequenceAdaptor; -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Collections; - import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -35,36 +28,13 @@ public class CellBaseNormalizerSequenceAdaptorTest extends GenericMongoDBAdapto private CellBaseNormalizerSequenceAdaptor cellBaseNormalizerSequenceAdaptor; - public CellBaseNormalizerSequenceAdaptorTest() { - } - - @BeforeEach - public void setUp() { - try { - clearDB(CELLBASE_DBNAME); - - dataRelease = 1; - createDataRelease(); - - Path path = Paths.get(getClass() - .getResource("/genome/genome_info.json").toURI()); - loadRunner.load(path, "genome_info", dataRelease); - updateDataRelease(dataRelease, "genome_info", Collections.emptyList()); - - path = Paths.get(getClass() - .getResource("/genome/genome_sequence.test.json.gz").toURI()); - loadRunner.load(path, "genome_sequence", dataRelease); - updateDataRelease(dataRelease, "genome_sequence", Collections.emptyList()); - - cellBaseNormalizerSequenceAdaptor = new CellBaseNormalizerSequenceAdaptor( - cellBaseManagerFactory.getGenomeManager(SPECIES, ASSEMBLY), dataRelease); - } catch (Exception e) { - e.printStackTrace(); - } + public CellBaseNormalizerSequenceAdaptorTest() throws CellBaseException { + super(); + cellBaseNormalizerSequenceAdaptor = new CellBaseNormalizerSequenceAdaptor( + cellBaseManagerFactory.getGenomeManager(SPECIES, ASSEMBLY), dataRelease); } @Test - @Disabled public void testGenomicSequenceChromosomeNotPresent() throws Exception { Throwable exception = assertThrows(RuntimeException.class, () -> { cellBaseNormalizerSequenceAdaptor.query("1234", 1, 1999); @@ -73,31 +43,29 @@ public void testGenomicSequenceChromosomeNotPresent() throws Exception { } @Test - @Disabled public void testGenomicSequenceQueryStartEndOutOfRightBound() throws Exception { // Both start & end out of the right bound Throwable exception = assertThrows(RuntimeException.class, () -> { cellBaseNormalizerSequenceAdaptor.query("17", 73973989, 73974999); }); assertEquals("Unable to find entry for 17:73973989-73974999", exception.getMessage()); - } @Test - @Disabled public void testGenomicSequenceQueryEndOutOfRightBound() throws Exception { // start within the bounds, end out of the right bound. Should return last 10 nts. - String result = cellBaseNormalizerSequenceAdaptor.query("17", 63973989, 63974999); - assertEquals("TCAAGACCAGC", result); - + // { "sequenceName" : "13", "start" : 39856000, "end" : 39857999 } + // { "sequenceName" : "13", "start" : 39858000, "end" : 39859999 } + String result = cellBaseNormalizerSequenceAdaptor.query("13", 39859989, 39869999); + assertEquals("ACAAATGATTT", result); } @Test - @Disabled public void testGenomicSequenceQueryStartOutOfLeftBound() throws Exception { // the left coordinate is out of bounds, but the right one is not. - String result = cellBaseNormalizerSequenceAdaptor.query("17", 63969989, 63970000); - assertEquals("GGAGAGAGAAA", result); + // { "sequenceName" : "13", "start" : 39856000, "end" : 39857999 } + // { "sequenceName" : "13", "start" : 39858000, "end" : 39859999 } + String result = cellBaseNormalizerSequenceAdaptor.query("13", 39857989, 39858000); + assertEquals("TTTATTAATGGC", result); } - } \ No newline at end of file diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ClinicalMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ClinicalMongoDBAdaptorTest.java index fb6e708477..a0dc6a3da2 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ClinicalMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ClinicalMongoDBAdaptorTest.java @@ -17,21 +17,21 @@ package org.opencb.cellbase.lib.impl.core; import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; import org.bson.conversions.Bson; +import org.eclipse.jetty.util.ajax.JSON; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import org.eclipse.jetty.util.ajax.JSON; import org.opencb.biodata.models.core.Gene; -import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.VariantBuilder; import org.opencb.biodata.models.variant.avro.SampleEntry; import org.opencb.cellbase.core.ParamConstants; import org.opencb.cellbase.core.api.ClinicalVariantQuery; +import org.opencb.cellbase.core.api.GeneQuery; +import org.opencb.cellbase.core.api.query.QueryException; import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; @@ -55,13 +55,13 @@ */ public class ClinicalMongoDBAdaptorTest extends GenericMongoDBAdaptorTest { - public ClinicalMongoDBAdaptorTest() throws IOException { + public ClinicalMongoDBAdaptorTest() { + super(); } @Test - @Disabled public void parseQueryTest() throws CellBaseException { - ClinicalManager manager = cellBaseManagerFactory.getClinicalManager("hsapiens"); + ClinicalManager manager = cellBaseManagerFactory.getClinicalManager(SPECIES); ClinicalMongoDBAdaptor dbAdaptor = (ClinicalMongoDBAdaptor) manager.getDBAdaptor(); ClinicalVariantQuery query = new ClinicalVariantQuery(); query.setId("12370"); @@ -78,16 +78,11 @@ public void parseQueryTest() throws CellBaseException { @Test @Disabled public void phasedQueriesTest() throws Exception { - clearDB(CELLBASE_DBNAME); - - createDataRelease(); - dataRelease = 1; - - // Load test data - Path path = Paths.get(getClass() - .getResource("/clinicalMongoDBAdaptor/phasedQueries/clinical_variants.full.test.json.gz").toURI()); - loadRunner.load(path, "clinical_variants", dataRelease); - updateDataRelease(dataRelease, "clinical_variants", Collections.emptyList()); +// // Load test data +// Path path = Paths.get(getClass() +// .getResource("/clinicalMongoDBAdaptor/phasedQueries/clinical_variants.full.test.json.gz").toURI()); +// loadRunner.load(path, "clinical_variants", dataRelease); +// updateDataRelease(dataRelease, "clinical_variants", Collections.emptyList()); ClinicalManager clinicalManager = cellBaseManagerFactory.getClinicalManager(SPECIES, ASSEMBLY); // Two variants being queried with PS and genotype. The PS is different in each of them. In the database, these @@ -539,16 +534,11 @@ public void phasedQueriesTest() throws Exception { @Test @Disabled public void proteinChangeMatchTest() throws Exception { - clearDB(CELLBASE_DBNAME); - - createDataRelease(); - dataRelease = 1; - - // Load test data - Path path = Paths.get(getClass() - .getResource("/clinicalMongoDBAdaptor/nativeGet/clinical_variants.full.test.json.gz").toURI()); - loadRunner.load(path, "clinical_variants", dataRelease); - updateDataRelease(dataRelease, "clinical_variants", Collections.emptyList()); +// // Load test data +// Path path = Paths.get(getClass() +// .getResource("/clinicalMongoDBAdaptor/nativeGet/clinical_variants.full.test.json.gz").toURI()); +// loadRunner.load(path, "clinical_variants", dataRelease); +// updateDataRelease(dataRelease, "clinical_variants", Collections.emptyList()); ClinicalManager clinicalManager = cellBaseManagerFactory.getClinicalManager(SPECIES, ASSEMBLY); @@ -564,23 +554,27 @@ public void proteinChangeMatchTest() throws Exception { } - private List loadGeneList() throws URISyntaxException, IOException { - ObjectMapper jsonObjectMapper = new ObjectMapper(); - jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); -// jsonObjectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); - Path path = Paths.get(getClass() - .getResource("/clinicalMongoDBAdaptor/gene_list.json.gz").toURI()); - BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(path.toFile())))); - - List geneList = new ArrayList<>(); - String line; - while ((line = reader.readLine()) != null) { - Gene gene = jsonObjectMapper.convertValue(JSON.parse(line), Gene.class); - geneList.add(gene); - } - - return geneList; + private List loadGeneList() throws CellBaseException, QueryException, IllegalAccessException { + GeneQuery geneQuery = new GeneQuery(); + geneQuery.setDataRelease(dataRelease); + CellBaseDataResult results = cellBaseManagerFactory.getGeneManager(SPECIES, ASSEMBLY).search(geneQuery); + return results.getResults(); +// ObjectMapper jsonObjectMapper = new ObjectMapper(); +// jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); +//// jsonObjectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); +// jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); +// Path path = Paths.get(getClass() +// .getResource("/clinicalMongoDBAdaptor/gene_list.json.gz").toURI()); +// BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(path.toFile())))); +// +// List geneList = new ArrayList<>(); +// String line; +// while ((line = reader.readLine()) != null) { +// Gene gene = jsonObjectMapper.convertValue(JSON.parse(line), Gene.class); +// geneList.add(gene); +// } +// +// return geneList; } // @Test diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/GeneMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/GeneMongoDBAdaptorTest.java index d04ccf6992..9b643ad681 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/GeneMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/GeneMongoDBAdaptorTest.java @@ -50,26 +50,12 @@ public GeneMongoDBAdaptorTest() throws IOException { super(); } - @BeforeEach - public void setUp() throws Exception { - clearDB(CELLBASE_DBNAME); - - createDataRelease(); - dataRelease = 1; - - Path path = Paths.get(getClass().getResource("/gene/gene-test.json.gz").toURI()); - loadRunner.load(path, "gene", dataRelease); - updateDataRelease(dataRelease, "gene", Collections.emptyList()); - } - @Test - @Disabled public void testQueryId() throws Exception { -// GeneMongoDBAdaptor geneDBAdaptor = dbAdaptorFactory.getGeneDBAdaptor("hsapiens", "GRCh37"); GeneManager geneManager = cellBaseManagerFactory.getGeneManager(SPECIES, ASSEMBLY); Map paramMap = new HashMap<>(); - paramMap.put("id", "ENSG00000223972"); + paramMap.put("id", "ENSG00000248746"); paramMap.put("include", "id,name,start,end"); paramMap.put(AbstractQuery.DATA_RELEASE, String.valueOf(dataRelease)); @@ -80,12 +66,10 @@ public void testQueryId() throws Exception { // WARNING: these values below may slightly change from one data version to another assertEquals(1, cellBaseDataResult.getNumMatches()); assertThat(cellBaseDataResult.getResults().stream().map(gene -> gene.getName()).collect(Collectors.toList()), - CoreMatchers.hasItems("DDX11L1")); - + CoreMatchers.hasItems("ACTN3")); } @Test - @Disabled public void testQuery() throws Exception { GeneManager geneManager = cellBaseManagerFactory.getGeneManager(SPECIES, ASSEMBLY); @@ -100,21 +84,12 @@ public void testQuery() throws Exception { CellBaseDataResult cellBaseDataResult = geneManager.search(geneQuery); // WARNING: these values below may slightly change from one data version to another - assertEquals(6, cellBaseDataResult.getNumMatches()); + assertEquals(12, cellBaseDataResult.getNumMatches()); assertThat(cellBaseDataResult.getResults().stream().map(gene -> gene.getName()).collect(Collectors.toList()), - CoreMatchers.hasItems("DDX11L1", "OR4F5", "AL627309.2", "RNU6-1100P", "AP006222.1", "RPL23AP24")); + CoreMatchers.hasItems("APOE", "BRCA1", "CFTR", "CYP2D6", "DMD", "EGFR", "FMR1")); assertThat(cellBaseDataResult.getResults().stream().map(gene -> gene.getId()).collect(Collectors.toList()), - CoreMatchers.hasItems("ENSG00000223972","ENSG00000186092","ENSG00000239906","ENSG00000222623", - "ENSG00000228463","ENSG00000236679")); - - // These two genes are UP for synovial membrane - cannot be returned - assertThat(cellBaseDataResult.getResults().stream().map(gene -> gene.getId()).collect(Collectors.toList()), - CoreMatchers.not(CoreMatchers.hasItems("ENSG00000187608", "ENSG00000149968"))); - -// query = new Query(GeneDBAdaptor.QueryParams.ANNOTATION_EXPRESSION_TISSUE.key(), "synovial"); -// query.put(GeneDBAdaptor.QueryParams.ANNOTATION_EXPRESSION_VALUE.key(), "DOWN"); -// queryOptions = new QueryOptions("include", "id,name,annotation.expression"); -// queryOptions.put("limit", "10"); + CoreMatchers.hasItems("ENSG00000130203","ENSG00000012048","ENSG00000001626","ENSG00000100197", + "ENSG00000198947","ENSG00000146648")); geneQuery = new GeneQuery(); geneQuery.setAnnotationExpressionTissue(new LogicalList(Collections.singletonList("brain"))); @@ -130,16 +105,15 @@ public void testQuery() throws Exception { cellBaseDataResult = geneManager.search(geneQuery); boolean found = false; for (Gene gene : cellBaseDataResult.getResults()) { - if (gene.getId().equals("ENSG00000223972")) { + if (gene.getId().equals("ENSG00000130203")) { for (Expression expression : gene.getAnnotation().getExpression()) { - if (expression.getFactorValue().equals("subthalamic nucleus") - && expression.getExperimentId().equals("E-GEOD-7307") + if (expression.getFactorValue().equals("placenta") + && expression.getExperimentId().equals("E-MTAB-37") && expression.getTechnologyPlatform().equals("A-AFFY-44") && expression.getExpression().equals(ExpressionCall.UP)) { found = true; break; } - } } if (found) { @@ -149,15 +123,7 @@ public void testQuery() throws Exception { assertTrue(found); } - -// -// constraints":[{"source":"gnomAD","method":"pLoF","name":"oe_mis","value":0.81001}, -// {"source":"gnomAD","method":"pLoF","name":"oe_syn","value":0.91766}, -// {"source":"gnomAD","method":"pLoF","name":"oe_lof","value":0.85584}]}}, - // exac_pLI 0.17633 - // exac_oe_lof 0.45091 @Test - @Disabled public void testConstraints() throws Exception { GeneManager geneManager = cellBaseManagerFactory.getGeneManager(SPECIES, ASSEMBLY); @@ -166,7 +132,7 @@ public void testConstraints() throws Exception { paramMap.put(AbstractQuery.DATA_RELEASE, String.valueOf(dataRelease)); GeneQuery geneQuery = new GeneQuery(paramMap); CellBaseDataResult cellBaseDataResult = geneManager.search(geneQuery); - assertEquals(1, cellBaseDataResult.getNumResults()); + assertEquals(12, cellBaseDataResult.getNumResults()); List constraints = cellBaseDataResult.getResults().get(0).getAnnotation().getConstraints(); assertEquals(5, constraints.size()); @@ -175,14 +141,14 @@ public void testConstraints() throws Exception { paramMap.put(AbstractQuery.DATA_RELEASE, String.valueOf(dataRelease)); geneQuery = new GeneQuery(paramMap); cellBaseDataResult = geneManager.search(geneQuery); - assertEquals(1, cellBaseDataResult.getNumResults()); + assertEquals(9, cellBaseDataResult.getNumResults()); paramMap = new HashMap<>(); paramMap.put("constraints", "oe_syn=0.91766"); paramMap.put(AbstractQuery.DATA_RELEASE, String.valueOf(dataRelease)); geneQuery = new GeneQuery(paramMap); cellBaseDataResult = geneManager.search(geneQuery); - assertEquals(1, cellBaseDataResult.getNumResults()); + assertEquals(0, cellBaseDataResult.getNumResults()); paramMap = new HashMap<>(); paramMap.put("constraints", " exac_pLI<0.17633"); @@ -196,6 +162,6 @@ public void testConstraints() throws Exception { paramMap.put(AbstractQuery.DATA_RELEASE, String.valueOf(dataRelease)); geneQuery = new GeneQuery(paramMap); cellBaseDataResult = geneManager.search(geneQuery); - assertEquals(1, cellBaseDataResult.getNumResults()); + assertEquals(7, cellBaseDataResult.getNumResults()); } } diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/GenomeMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/GenomeMongoDBAdaptorTest.java index 0aae54f43d..dbc0362d3a 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/GenomeMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/GenomeMongoDBAdaptorTest.java @@ -16,9 +16,6 @@ package org.opencb.cellbase.lib.impl.core; -import org.apache.commons.lang3.StringUtils; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.opencb.biodata.models.core.Chromosome; @@ -26,6 +23,7 @@ import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.variant.avro.Cytoband; import org.opencb.cellbase.core.api.GenomeQuery; +import org.opencb.cellbase.core.api.query.QueryException; import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; @@ -33,8 +31,6 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -51,28 +47,10 @@ public class GenomeMongoDBAdaptorTest extends GenericMongoDBAdaptorTest { public GenomeMongoDBAdaptorTest() throws Exception { super(); - } - - @BeforeAll - public void setUp() throws Exception { - clearDB(CELLBASE_DBNAME); - - createDataRelease(); - dataRelease = 1; - - Path path = Paths.get(getClass().getResource("/genome/genome_info.json").toURI()); - loadRunner.load(path, "genome_info", dataRelease); - updateDataRelease(dataRelease, "genome_info", Collections.emptyList()); - - path = Paths.get(getClass().getResource("/genome/genome_sequence.test.json.gz").toURI()); - loadRunner.load(path, "genome_sequence", dataRelease); - updateDataRelease(dataRelease, "genome_sequence", Collections.emptyList()); - genomeManager = cellBaseManagerFactory.getGenomeManager(SPECIES, ASSEMBLY); } @Test - @Disabled public void getChromosomeInfo() throws Exception { GenomeQuery query = new GenomeQuery(); query.setNames(Collections.singletonList("1")); @@ -85,49 +63,48 @@ public void getChromosomeInfo() throws Exception { } @Test - @Disabled public void getGenomicSequence() throws CellBaseException { - CellBaseDataResult cellBaseDataResult = genomeManager.getGenomicSequence(new Query("region", "1:1-1999"), new QueryOptions(), dataRelease); - assertEquals(StringUtils.repeat("N", 1999), cellBaseDataResult.getResults().get(0).getSequence()); + CellBaseDataResult cellBaseDataResult = genomeManager.getGenomicSequence(new Query("region", "1:1-1999"), + new QueryOptions(), dataRelease); + // Inter-genic regions are not stored in the test dataset (maybe in the future should be stored) + assertEquals(0, cellBaseDataResult.getNumResults()); - cellBaseDataResult = genomeManager.getGenomicSequence(new Query("region", "17:63971994-63972004"), new QueryOptions(), dataRelease); - assertEquals("GAGAAAAAACC", cellBaseDataResult.getResults().get(0).getSequence()); + cellBaseDataResult = genomeManager.getGenomicSequence(new Query("region", "11:65497100-65497110"), new QueryOptions(), dataRelease); + assertEquals("GGTCATTGCTT", cellBaseDataResult.getResults().get(0).getSequence()); - cellBaseDataResult = genomeManager.getGenomicSequence(new Query("region", "13:47933990-47934003"), new QueryOptions(), dataRelease); - assertEquals("TTCATTTTTAGATT", cellBaseDataResult.getResults().get(0).getSequence()); + cellBaseDataResult = genomeManager.getGenomicSequence(new Query("region", "9:126426800-126426815"), new QueryOptions(), + dataRelease); + assertEquals("TAAGAGAGAAACAAGC", cellBaseDataResult.getResults().get(0).getSequence()); } @Test - @Disabled public void testGenomicSequenceChromosomeNotPresent() throws CellBaseException { - CellBaseDataResult cellBaseDataResult = genomeManager - .getSequence(new Region("1234:1-1999"), new QueryOptions(), dataRelease); + CellBaseDataResult cellBaseDataResult = genomeManager.getSequence(new Region("1234:1-1999"), + new QueryOptions(), dataRelease); assertEquals(0, cellBaseDataResult.getNumResults()); } @Test - @Disabled - public void testGenomicSequenceQueryOutOfBounds() throws CellBaseException { + public void testGenomicSequenceQueryOutOfBounds() throws CellBaseException, QueryException, IllegalAccessException { // Both start & end out of the right bound CellBaseDataResult cellBaseDataResult = genomeManager - .getSequence(new Region("17", 73973989, 73974999), new QueryOptions(), dataRelease); + .getSequence(new Region("17", 43044999, 43045999), new QueryOptions(), dataRelease); assertEquals(0, cellBaseDataResult.getNumResults()); // start within the bounds, end out of the right bound. Should return last 10 nts. - cellBaseDataResult = genomeManager.getSequence(new Region("17", 63973989, 63974999), new QueryOptions(), dataRelease); + cellBaseDataResult = genomeManager.getSequence(new Region("17", 43043989, 43045999), new QueryOptions(), dataRelease); assertEquals(1, cellBaseDataResult.getNumResults()); - assertEquals("TCAAGACCAGC", cellBaseDataResult.getResults().get(0).getSequence()); + assertEquals("ACAGGGATCTT", cellBaseDataResult.getResults().get(0).getSequence()); // Start out of the left bound, end in bound. should return nts. QueryOptions queryOptions = new QueryOptions(); queryOptions.add("count", "true"); - cellBaseDataResult = genomeManager.getSequence(new Region("17", 63960000, 63970000), queryOptions, dataRelease); + cellBaseDataResult = genomeManager.getSequence(new Region("17", 7650000, 7660010), queryOptions, dataRelease); assertEquals(1, cellBaseDataResult.getNumResults()); } @Test - @Disabled public void testGetCytoband() throws CellBaseException { List regions = Arrays.asList(new Region("19:55799900-55803000"), new Region("11:121300000-124030001")); List> cellBaseDataResultList = genomeManager.getCytobands(regions, dataRelease); diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactoryTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactoryTest.java index 25a2040a8a..3e2c755f98 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactoryTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactoryTest.java @@ -16,56 +16,48 @@ package org.opencb.cellbase.lib.impl.core; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; -import org.opencb.cellbase.core.config.CellBaseConfiguration; import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; import org.opencb.cellbase.lib.db.MongoDBManager; -import java.io.IOException; import java.security.InvalidParameterException; import static org.junit.jupiter.api.Assertions.*; +import static org.opencb.cellbase.lib.db.MongoDBManager.DBNAME_SEPARATOR; @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class MongoDBAdaptorFactoryTest extends GenericMongoDBAdaptorTest { private MongoDBManager mongoDBManager; - private CellBaseConfiguration cellBaseConfiguration; - - public MongoDBAdaptorFactoryTest() throws IOException { + public MongoDBAdaptorFactoryTest() { super(); - } - - @BeforeAll - public void setUp() throws Exception { - dataRelease = 1; - - cellBaseConfiguration = CellBaseConfiguration.load( - MongoDBAdaptorFactoryTest.class.getClassLoader().getResourceAsStream("configuration.test.yaml"), - CellBaseConfiguration.ConfigurationFileFormat.YAML); - - mongoDBManager = new MongoDBManager(cellBaseConfiguration); + this.mongoDBManager = new MongoDBManager(cellBaseConfiguration); } @Test - public void testGetDatabaseName() throws Exception { + public void testGetDatabaseName() { + String auxVersion = cellBaseConfiguration.getVersion().replace(".", DBNAME_SEPARATOR).replace("-", DBNAME_SEPARATOR); + String[] split = auxVersion.split(DBNAME_SEPARATOR); + String version = split[0]; + if (split.length > 1) { + version += (DBNAME_SEPARATOR + split[1]); + } + // provide assembly - String databaseName = mongoDBManager.getDatabaseName("speciesName", "assemblyName"); - assertEquals("cellbase_speciesname_assemblyname_v5", databaseName); + String databaseName = mongoDBManager.getDatabaseName("speciesName", "assemblyName", cellBaseConfiguration.getVersion()); + assertEquals("cellbase_speciesname_assemblyname_" + version, databaseName); // don't provide assembly InvalidParameterException thrown = assertThrows(InvalidParameterException.class, - () -> mongoDBManager.getDatabaseName("speciesName", null), + () -> mongoDBManager.getDatabaseName("speciesName", null, cellBaseConfiguration.getVersion()), "Expected getDatabaseName() to throw an exception, but it didn't"); - assertTrue(thrown.getMessage().contains("Species and assembly are required")); // handle special characters - databaseName = mongoDBManager.getDatabaseName("speciesName", "my_funny.assembly--name"); - assertEquals("cellbase_speciesname_myfunnyassemblyname_v5", databaseName); + databaseName = mongoDBManager.getDatabaseName("speciesName", "my_funny.assembly--name", cellBaseConfiguration.getVersion()); + assertEquals("cellbase_speciesname_myfunnyassemblyname_" + version, databaseName); } } \ No newline at end of file diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptorTest.java index c06eb92eec..3bc9bd6617 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptorTest.java @@ -16,8 +16,6 @@ package org.opencb.cellbase.lib.impl.core; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.opencb.biodata.formats.protein.uniprot.v202003jaxb.Entry; import org.opencb.cellbase.core.api.ProteinQuery; @@ -25,12 +23,11 @@ import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; import org.opencb.cellbase.lib.managers.ProteinManager; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -41,24 +38,9 @@ public class ProteinMongoDBAdaptorTest extends GenericMongoDBAdaptorTest { public ProteinMongoDBAdaptorTest() throws Exception { super(); - - setUp(); - } - - @BeforeEach - public void setUp() throws Exception { - clearDB(CELLBASE_DBNAME); - - createDataRelease(); - dataRelease = 1; - - Path path = Paths.get(getClass().getResource("/protein/protein.test.json.gz").toURI()); - loadRunner.load(path, "protein", dataRelease); - updateDataRelease(dataRelease, "protein", Collections.emptyList()); } @Test - @Disabled public void testQuery() throws Exception { ProteinManager proteinManager = cellBaseManagerFactory.getProteinManager(SPECIES, ASSEMBLY); ProteinQuery query = new ProteinQuery(); @@ -67,31 +49,22 @@ public void testQuery() throws Exception { query.setIncludes(new ArrayList<>(Arrays.asList("accession", "name"))); query.setCount(Boolean.TRUE); query.setDataRelease(dataRelease); -// QueryOptions queryOptions = new QueryOptions(QueryOptions.EXCLUDE, new ArrayList<>(Arrays.asList("_id", "_chunkIds"))); -// queryOptions.put(QueryOptions.LIMIT, 3); -// queryOptions.put(QueryOptions.INCLUDE, "accession,name"); CellBaseDataResult CellBaseDataResult = proteinManager.search(query); assertEquals(3, CellBaseDataResult.getResults().size()); - assertEquals(4, CellBaseDataResult.getNumMatches()); + assertEquals(17, CellBaseDataResult.getNumMatches()); query = new ProteinQuery(); - query.setAccessions(new ArrayList<>(Arrays.asList("B2R8Q1","Q9UKT9"))); + query.setAccessions(new ArrayList<>(Arrays.asList("P02649","Q86VF7","Q16535"))); query.setDataRelease(dataRelease); -// CellBaseDataResult = proteinDBAdaptor.search(new Query(ProteinDBAdaptor.QueryParams.ACCESSION.key(), -// "B2R8Q1,Q9UKT9"), queryOptions); CellBaseDataResult = proteinManager.search(query); - assertEquals("B2R8Q1", CellBaseDataResult.getResults().get(0).getAccession().get(1)); - assertEquals("Q9UKT9", CellBaseDataResult.getResults().get(1).getAccession().get(0)); -// CellBaseDataResult = proteinDBAdaptor.search(new Query(ProteinDBAdaptor.QueryParams.NAME.key(), -// "MKS1_HUMAN"), queryOptions); + assertTrue(CellBaseDataResult.getResults().get(0).getAccession().contains("P02649")); + assertTrue(CellBaseDataResult.getResults().get(1).getAccession().contains("Q86VF7")); + assertTrue(CellBaseDataResult.getResults().get(2).getAccession().contains("Q16535")); query = new ProteinQuery(); - query.setNames(new ArrayList<>(Collections.singletonList("MKS1_HUMAN"))); + query.setNames(new ArrayList<>(Collections.singletonList("FMR1_HUMAN"))); query.setDataRelease(dataRelease); CellBaseDataResult = proteinManager.search(query); -// CellBaseDataResult = proteinDBAdaptor.query(new Query(ProteinDBAdaptor.QueryParams.NAME.key(), -// "MKS1_HUMAN"), queryOptions); - assertEquals("MKS1_HUMAN", CellBaseDataResult.getResults().get(0).getName().get(0)); + assertTrue(CellBaseDataResult.getResults().get(0).getName().contains("FMR1_HUMAN")); } - } diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/RemoteDBVariantAnnotationCalculatorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/RemoteDBVariantAnnotationCalculatorTest.java deleted file mode 100644 index f6c67acdab..0000000000 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/RemoteDBVariantAnnotationCalculatorTest.java +++ /dev/null @@ -1,96 +0,0 @@ -package org.opencb.cellbase.lib.impl.core; - -import org.junit.Assume; -import org.junit.Before; -import org.junit.Test; -import org.opencb.biodata.models.variant.Variant; -import org.opencb.biodata.models.variant.avro.ConsequenceType; -import org.opencb.biodata.models.variant.avro.VariantAnnotation; -import org.opencb.cellbase.core.api.query.QueryException; -import org.opencb.cellbase.core.config.CellBaseConfiguration; -import org.opencb.cellbase.core.exception.CellBaseException; -import org.opencb.cellbase.core.result.CellBaseDataResult; -import org.opencb.cellbase.lib.managers.CellBaseManagerFactory; -import org.opencb.cellbase.lib.variant.annotation.VariantAnnotationCalculator; -import org.opencb.commons.datastore.core.QueryOptions; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.nio.file.Paths; -import java.util.concurrent.ExecutionException; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; - -public class RemoteDBVariantAnnotationCalculatorTest { - - //------------------------------------------------------------------------ - // IMPORTANT - // To run these tests you need to hava the following config file - // /opt/cellbase/remote.configuration.test.yaml - // (useful if you have a tunnel to the CellBase MongoDB) - //------------------------------------------------------------------------ - - private int dataRelease = 3; - private String species = "hsapiens"; - private String assembly = "grch38"; - - private CellBaseConfiguration cellBaseConfiguration; - private CellBaseManagerFactory cellBaseManagerFactory; - private VariantAnnotationCalculator variantAnnotationCalculator; - - private String SPLICEAI_HGMD_COSMIC_TOKEN = "eyJhbGciOiJIUzI1NiJ9.eyJzb3VyY2VzIjp7ImNvc21pYyI6OTIyMzM3MjAzNjg1NDc3NTgwNywic3BsaWNlYWkiOjkyMjMzNzIwMzY4NTQ3NzU4MDcsImhnbWQiOjkyMjMzNzIwMzY4NTQ3NzU4MDd9LCJ2ZXJzaW9uIjoiMS4wIiwic3ViIjoiVEVTVCIsImlhdCI6MTY3Nzc3NTQzMn0.Z-F2WSRkRMyl_uFkf1lHg4WXr49fZvTLLcEc9LCOapU"; - - public RemoteDBVariantAnnotationCalculatorTest() { - } - - @Before - public void before() throws IOException, CellBaseException { - File configFile = Paths.get("/opt/cellbase/remote.configuration.test.yaml").toFile(); - Assume.assumeTrue(configFile.exists()); - - this.cellBaseConfiguration = CellBaseConfiguration.load(new FileInputStream(configFile.getAbsoluteFile().toString()), - CellBaseConfiguration.ConfigurationFileFormat.YAML); - - this.cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration); - } - - @Test - public void testWithoutToken() throws QueryException, ExecutionException, InterruptedException, CellBaseException, IllegalAccessException { - variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly, dataRelease, "", cellBaseManagerFactory); - - Variant variant = new Variant("1:1000012:C:A"); - CellBaseDataResult result = variantAnnotationCalculator.getAnnotationByVariant(variant, QueryOptions.empty()); - VariantAnnotation variantAnnotation = result.first(); - - - for (ConsequenceType ct : variantAnnotation.getConsequenceTypes()) { - if ("ENST00000304952.11".equals(ct.getEnsemblTranscriptId())) { - assertEquals(1, ct.getSpliceScores().size()); - assertEquals("mmsplice", ct.getSpliceScores().get(0).getSource().toLowerCase()); - return; - } - } - fail(); - } - - @Test - public void testSpliceToken() throws QueryException, ExecutionException, InterruptedException, CellBaseException, IllegalAccessException { - this.variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly, dataRelease, SPLICEAI_HGMD_COSMIC_TOKEN, cellBaseManagerFactory); - - Variant variant = new Variant("1:1000012:C:A"); - CellBaseDataResult result = variantAnnotationCalculator.getAnnotationByVariant(variant, QueryOptions.empty()); - VariantAnnotation variantAnnotation = result.first(); - - for (ConsequenceType ct : variantAnnotation.getConsequenceTypes()) { - if ("ENST00000304952.11".equals(ct.getEnsemblTranscriptId())) { - assertEquals(2, ct.getSpliceScores().size()); - assertEquals("mmsplice", ct.getSpliceScores().get(0).getSource().toLowerCase()); - assertEquals("spliceai", ct.getSpliceScores().get(1).getSource().toLowerCase()); - return; - } - } - fail(); - } -} diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/RemoteVariantAnnotationTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/RemoteVariantAnnotationTest.java deleted file mode 100644 index c55beb9bc3..0000000000 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/RemoteVariantAnnotationTest.java +++ /dev/null @@ -1,137 +0,0 @@ -package org.opencb.cellbase.lib.impl.core; - -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; -import org.junit.Assume; -import org.junit.Before; -import org.junit.Test; -import org.opencb.biodata.models.variant.Variant; -import org.opencb.biodata.models.variant.avro.VariantAnnotation; -import org.opencb.cellbase.core.api.query.QueryException; -import org.opencb.cellbase.core.config.CellBaseConfiguration; -import org.opencb.cellbase.core.exception.CellBaseException; -import org.opencb.cellbase.core.result.CellBaseDataResult; -import org.opencb.cellbase.lib.managers.CellBaseManagerFactory; -import org.opencb.cellbase.lib.variant.annotation.VariantAnnotationCalculator; -import org.opencb.commons.datastore.core.QueryOptions; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ExecutionException; - -import static org.junit.Assert.assertEquals; - -public class RemoteVariantAnnotationTest { - - //------------------------------------------------------------------------ - // IMPORTANT - // To run these tests you need to hava the following config file - // /opt/cellbase/remote.configuration.test.yaml - // (useful if you have a tunnel to the CellBase MongoDB) - //------------------------------------------------------------------------ - - private int dataRelease = 3; - private String species = "hsapiens"; - private String assembly = "grch38"; - - private CellBaseConfiguration cellBaseConfiguration; - private CellBaseManagerFactory cellBaseManagerFactory; - private VariantAnnotationCalculator variantAnnotationCalculator; - - public RemoteVariantAnnotationTest() { - } - - @Before - public void before() throws IOException, CellBaseException { - File configFile = Paths.get("/opt/cellbase/remote.configuration.test.yaml").toFile(); - Assume.assumeTrue(configFile.exists()); - - this.cellBaseConfiguration = CellBaseConfiguration.load(new FileInputStream(configFile.getAbsoluteFile().toString()), - CellBaseConfiguration.ConfigurationFileFormat.YAML); - - this.cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration); - - this.variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly, dataRelease, "", cellBaseManagerFactory); - - } - - @Test - public void testEmptyConsequenceTypeListDEL() throws QueryException, ExecutionException, InterruptedException, CellBaseException, - IllegalAccessException { - - List> variants = new ArrayList<>(); - variants.add(new ImmutablePair<>(new Variant("10:101008524:G:-"), 29)); - variants.add(new ImmutablePair<>(new Variant("10:101018284:T:-"), 20)); - variants.add(new ImmutablePair<>(new Variant("10:101694601:CCT:-"), 10)); - variants.add(new ImmutablePair<>(new Variant("10:102834048:A:-"), 13)); - variants.add(new ImmutablePair<>(new Variant("10:102834061:A:-"), 13)); - variants.add(new ImmutablePair<>(new Variant("10:103089679:TCC:-"), 73)); - variants.add(new ImmutablePair<>(new Variant("10:103089679:TCCTCC:-"), 73)); - variants.add(new ImmutablePair<>(new Variant("10:103089691:TCCTCT:-"), 73)); - variants.add(new ImmutablePair<>(new Variant("10:103089694:TCT:-"), 73)); - variants.add(new ImmutablePair<>(new Variant("10:104059681:T:-"), 9)); - variants.add(new ImmutablePair<>(new Variant("10:104145538:T:-"), 6)); - variants.add(new ImmutablePair<>(new Variant("10:118691379:AG:-"), 8)); - - checkAnnotation(variants); - } - - @Test - public void testEmptyConsequenceTypeListSNV() throws QueryException, ExecutionException, InterruptedException, CellBaseException, - IllegalAccessException { - - List> variants = new ArrayList<>(); - variants.add(new ImmutablePair<>(new Variant("10:89332624:T:C"), 30)); - variants.add(new ImmutablePair<>(new Variant("12:56117206:G:T"), 25)); - variants.add(new ImmutablePair<>(new Variant("1:39282328:T:A"), 16)); - variants.add(new ImmutablePair<>(new Variant("1:39254314:G:T"), 14)); - variants.add(new ImmutablePair<>(new Variant("1:39254319:C:T"), 14)); - variants.add(new ImmutablePair<>(new Variant("1:39282366:C:A"), 16)); - variants.add(new ImmutablePair<>(new Variant("1:39254320:G:A"), 14)); - variants.add(new ImmutablePair<>(new Variant("1:39254345:G:T"), 14)); - variants.add(new ImmutablePair<>(new Variant("1:39282374:G:A"), 16)); - variants.add(new ImmutablePair<>(new Variant("1:39254347:T:C"), 14)); - variants.add(new ImmutablePair<>(new Variant("1:39254353:T:A"), 14)); - variants.add(new ImmutablePair<>(new Variant("6:56900543:C:T"), 26)); - variants.add(new ImmutablePair<>(new Variant("6:56900614:C:T"), 26)); - variants.add(new ImmutablePair<>(new Variant("6:56954455:C:T"), 38)); - variants.add(new ImmutablePair<>(new Variant("6:56954502:A:C"), 38)); - variants.add(new ImmutablePair<>(new Variant("6:56954528:G:C"), 38)); - variants.add(new ImmutablePair<>(new Variant("8:98883755:A:G"), 15)); - - checkAnnotation(variants); - } - - @Test - public void controlTest() throws QueryException, ExecutionException, InterruptedException, CellBaseException, IllegalAccessException { - Variant variant = new Variant("10:100006605:T:C"); - CellBaseDataResult result = variantAnnotationCalculator.getAnnotationByVariant(variant, QueryOptions.empty()); - assertEquals(1, result.getNumResults()); - VariantAnnotation variantAnnotation = result.first(); - assertEquals(4, variantAnnotation.getHgvs().size()); - assertEquals(4, variantAnnotation.getConsequenceTypes().size()); - assertEquals(3, variantAnnotation.getConservation().size()); - assertEquals(2, variantAnnotation.getFunctionalScore().size()); - } - - private void checkAnnotation(List> variants) throws QueryException, ExecutionException, InterruptedException, - CellBaseException, IllegalAccessException { - - for (Pair pair : variants) { - Variant variant = pair.getKey(); - System.out.println(variant.toStringSimple()); - CellBaseDataResult result = variantAnnotationCalculator.getAnnotationByVariant(variant, QueryOptions.empty()); - VariantAnnotation variantAnnotation = result.first(); - assertEquals(variant.getChromosome(), variantAnnotation.getChromosome()); - assertEquals(variant.getStart(), variantAnnotation.getStart()); - assertEquals(variant.getReference(), variantAnnotation.getReference()); -// System.out.println(variantAnnotation.getConsequenceTypes().size()); - assertEquals(0l + pair.getValue(), variantAnnotation.getConsequenceTypes().size()); - } - } - -} diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/TranscriptMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/TranscriptMongoDBAdaptorTest.java index d464b5dc85..2cbb237b6f 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/TranscriptMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/TranscriptMongoDBAdaptorTest.java @@ -16,8 +16,6 @@ package org.opencb.cellbase.lib.impl.core; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.core.Transcript; @@ -26,8 +24,6 @@ import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; import org.opencb.cellbase.lib.managers.TranscriptManager; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; @@ -39,80 +35,57 @@ */ public class TranscriptMongoDBAdaptorTest extends GenericMongoDBAdaptorTest { - public TranscriptMongoDBAdaptorTest() throws Exception { + public TranscriptMongoDBAdaptorTest() { super(); } - @BeforeEach - public void setUp() throws Exception { - clearDB(CELLBASE_DBNAME); - - createDataRelease(); - dataRelease = 1; - - Path path = Paths.get(getClass().getResource("/transcript/gene.test.json.gz").toURI()); - loadRunner.load(path, "gene", dataRelease); - updateDataRelease(dataRelease, "gene", Collections.emptyList()); - } - @Test - @Disabled public void testQuery() throws Exception { TranscriptManager transcriptManager = cellBaseManagerFactory.getTranscriptManager(SPECIES, ASSEMBLY); -// Query query = new Query(TranscriptDBAdaptor.QueryParams.REGION.key(), "1:816481-825251"); TranscriptQuery query = new TranscriptQuery(); - Region region = Region.parseRegion("1:816481-825251"); + Region region = Region.parseRegion("19:44905791-44906393"); query.setRegions(new ArrayList<>(Arrays.asList(region))); query.setCount(Boolean.TRUE); query.setDataRelease(dataRelease); CellBaseDataResult cellBaseDataResult = transcriptManager.search(query); - assertEquals(1, cellBaseDataResult.getNumResults()); - //assertEquals(((Document) CellBaseDataResult.getResults().get(0)).size(), 18); + assertEquals(5, cellBaseDataResult.getNumResults()); Transcript transcript = cellBaseDataResult.getResults().get(0); - assertEquals("ENST00000594233", transcript.getId()); -// assertEquals(((Document) CellBaseDataResult.getResults().get(0)).get("id"), "ENST00000594233"); + assertTrue(transcript.getId().startsWith("ENST00000446996")); -// query = new Query(TranscriptDBAdaptor.QueryParams.REGION.key(), "1:31851-44817"); - region = Region.parseRegion("1:31851-44817"); + region = Region.parseRegion("19:44905791-44907393"); query = new TranscriptQuery(); query.setCount(Boolean.TRUE); query.setRegions(Collections.singletonList(region)); query.setDataRelease(dataRelease); cellBaseDataResult = transcriptManager.search(query); - assertEquals(2, cellBaseDataResult.getNumResults()); - assertTrue(transcriptIdEquals(cellBaseDataResult, Arrays.asList("ENST00000417324", "ENST00000461467"))); + assertEquals(5, cellBaseDataResult.getNumResults()); + assertTrue(transcriptIdEquals(cellBaseDataResult, Arrays.asList("ENST00000446996", "ENST00000485628", "ENST00000252486", + "ENST00000434152", "ENST00000425718"))); -// query = new Query(TranscriptDBAdaptor.QueryParams.XREFS.key(), "Q9UL59"); query = new TranscriptQuery(); - query.setTranscriptsXrefs(Collections.singletonList("Q9UL59")); + query.setTranscriptsXrefs(Collections.singletonList("A0A087WSZ2")); query.setCount(Boolean.TRUE); query.setIncludes(Collections.singletonList("id")); query.setDataRelease(dataRelease); cellBaseDataResult = transcriptManager.search(query); - assertEquals(2, cellBaseDataResult.getNumResults()); -// assertEquals(1, ((Document) cellBaseDataResult.getResults().get(0)).size()); -// assertEquals(1, ((Document) cellBaseDataResult.getResults().get(1)).size()); + assertEquals(1, cellBaseDataResult.getNumResults()); - assertTrue(transcriptIdEquals(cellBaseDataResult, Arrays.asList("ENST00000278314", "ENST00000536068"))); + assertEquals("ENST00000502692", cellBaseDataResult.getResults().get(0).getId().split("\\.")[0]); -// query = new Query(TranscriptDBAdaptor.QueryParams.BIOTYPE.key(), "protein_coding"); query = new TranscriptQuery(); query.setCount(Boolean.TRUE); query.setTranscriptsBiotype(Collections.singletonList("protein_coding")); - query.setTranscriptsXrefs(Collections.singletonList("BRCA2")); -// query.put(TranscriptDBAdaptor.QueryParams.XREFS.key(), "BRCA2"); -// queryOptions = new QueryOptions("include", "transcripts.id"); + query.setTranscriptsXrefs(Collections.singletonList("BRCA1")); query.setIncludes(Collections.singletonList("transcripts.id")); query.setDataRelease(dataRelease); cellBaseDataResult = transcriptManager.search(query); - assertEquals(3, cellBaseDataResult.getNumResults()); - + assertEquals(27, cellBaseDataResult.getNumResults()); } private boolean transcriptIdEquals(CellBaseDataResult cellBaseDataResult, List transcriptIdList) { Set set1 = (Set) cellBaseDataResult.getResults().stream() - .map(result -> ((String) ((Transcript) result).getId())).collect(Collectors.toSet()); + .map(result -> (((Transcript) result).getId().split("\\.")[0])).collect(Collectors.toSet()); Set set2 = new HashSet<>(transcriptIdList); return set1.equals(set2); diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java index b8c384f85a..5f11b6da7d 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java @@ -20,6 +20,7 @@ import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; +import org.apache.commons.collections4.CollectionUtils; import org.hamcrest.CoreMatchers; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; @@ -50,18 +51,12 @@ public class VariantAnnotationCalculatorTest extends GenericMongoDBAdaptorTest { public VariantAnnotationCalculatorTest() throws Exception { super(); - } - @BeforeAll - public void setUp() throws Exception { + variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, token, cellBaseManagerFactory); + jsonObjectMapper = new ObjectMapper(); jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); - - clearDB(CELLBASE_DBNAME); - initDB(); - - variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, token, cellBaseManagerFactory); } @Test @@ -647,7 +642,7 @@ private ConsequenceType getConsequenceType(List consequenceType public void testCellBaseDataResultGroupingDecomposedMNVs() throws Exception { QueryOptions queryOptions = (new QueryOptions("normalize", true)); - queryOptions.put("skipDecompose", false); + queryOptions.put("decompose", true); // Creating here a local VariantAnnotationCalculator since this test requires setting normalizer decompose // option to true which probably breaks some other tests. @@ -722,12 +717,10 @@ public void testCellBaseDataResultGroupingDecomposedMNVs() throws Exception { assertEquals(Integer.valueOf("33167330"), variantAnnotation.getStart()); assertEquals("C", variantAnnotation.getReference()); assertEquals("T", variantAnnotation.getAlternate()); - - - } @Test + @Disabled public void testPopulationFrequencies() throws Exception { QueryOptions queryOptions = new QueryOptions("normalize", true); @@ -1105,6 +1098,7 @@ public void testNoLicensedClinicalAnnotation() throws Exception { } @Test + @Disabled public void testClinicalAnnotationGwas() throws Exception { QueryOptions queryOptions = new QueryOptions("useCache", false); queryOptions.put("include", "clinical"); @@ -1132,8 +1126,8 @@ public void testClinicalAnnotationGwas() throws Exception { } - - @Test + @Test + @Disabled public void testClinicalAnnotation() throws Exception { QueryOptions queryOptions = new QueryOptions("useCache", false); queryOptions.put("include", "clinical"); @@ -1948,14 +1942,226 @@ private void sortProteinFeatureList(Collection consequenceTypeS } @Test - public void testSpliceAnnotation() throws Exception { + public void testLicensedDataUnisersalTokenAnnotation() throws Exception { QueryOptions queryOptions = new QueryOptions("useCache", false); - queryOptions.put("include", "splice_score"); + queryOptions.put("exclude", "pharmacogenomics"); + queryOptions.put("normalize", true); - Variant variant = new Variant("2", 114340663, "G", "A"); + variantAnnotationCalculator.setToken(UNIVERSAL_ACDES_TOKEN); + + Variant variant = new Variant("10", 113588287, "G", "A"); CellBaseDataResult cellBaseDataResult = variantAnnotationCalculator .getAnnotationByVariant(variant, queryOptions); - assertEquals(1, cellBaseDataResult.getNumMatches()); + VariantAnnotation variantAnnotation = cellBaseDataResult.first(); + + if (!containTraitAssociation(variantAnnotation, "clinvar")) { + fail(); + } + if (!containTraitAssociation(variantAnnotation, "hgmd")) { + fail(); + } + if (!containTraitAssociation(variantAnnotation, "cosmic")) { + fail(); + } + if (!containSpliceScore(variantAnnotation, "SpliceAI")) { + fail(); + } + } + + @Test + public void testLicensedDataHgmdTokenAnnotation() throws Exception { + QueryOptions queryOptions = new QueryOptions("useCache", false); + queryOptions.put("exclude", "pharmacogenomics"); + queryOptions.put("normalize", true); + + variantAnnotationCalculator.setToken(HGMD_ACCESS_TOKEN); + + Variant variant = new Variant("10", 113588287, "G", "A"); + CellBaseDataResult cellBaseDataResult = variantAnnotationCalculator + .getAnnotationByVariant(variant, queryOptions); + VariantAnnotation variantAnnotation = cellBaseDataResult.first(); + + if (!containTraitAssociation(variantAnnotation, "clinvar")) { + fail(); + } + if (!containTraitAssociation(variantAnnotation, "hgmd")) { + fail(); + } + if (containTraitAssociation(variantAnnotation, "cosmic")) { + fail(); + } + if (containSpliceScore(variantAnnotation, "SpliceAI")) { + fail(); + } + } + + @Test + public void testLicensedDataCosmicTokenAnnotation() throws Exception { + QueryOptions queryOptions = new QueryOptions("useCache", false); + queryOptions.put("exclude", "pharmacogenomics"); + queryOptions.put("normalize", true); + + variantAnnotationCalculator.setToken(COSMIC_ACCESS_TOKEN); + + Variant variant = new Variant("10", 113588287, "G", "A"); + CellBaseDataResult cellBaseDataResult = variantAnnotationCalculator + .getAnnotationByVariant(variant, queryOptions); + VariantAnnotation variantAnnotation = cellBaseDataResult.first(); + + if (!containTraitAssociation(variantAnnotation, "clinvar")) { + fail(); + } + if (containTraitAssociation(variantAnnotation, "hgmd")) { + fail(); + } + if (!containTraitAssociation(variantAnnotation, "cosmic")) { + fail(); + } + if (containSpliceScore(variantAnnotation, "SpliceAI")) { + fail(); + } + } + + @Test + public void testLicensedDataSpliceTokenAnnotation() throws Exception { + QueryOptions queryOptions = new QueryOptions("useCache", false); + queryOptions.put("exclude", "pharmacogenomics"); + queryOptions.put("normalize", true); + + variantAnnotationCalculator.setToken(SPLICEAI_ACCESS_TOKEN); + + Variant variant = new Variant("10", 113588287, "G", "A"); + CellBaseDataResult cellBaseDataResult = variantAnnotationCalculator + .getAnnotationByVariant(variant, queryOptions); + VariantAnnotation variantAnnotation = cellBaseDataResult.first(); + + if (!containTraitAssociation(variantAnnotation, "clinvar")) { + fail(); + } + if (containTraitAssociation(variantAnnotation, "hgmd")) { + fail(); + } + if (containTraitAssociation(variantAnnotation, "cosmic")) { + fail(); + } + if (!containSpliceScore(variantAnnotation, "SpliceAI")) { + fail(); + } + } + + @Test + public void testLicensedDataHgmdCosmicTokenAnnotation() throws Exception { + QueryOptions queryOptions = new QueryOptions("useCache", false); + queryOptions.put("exclude", "pharmacogenomics"); + queryOptions.put("normalize", true); + + variantAnnotationCalculator.setToken(HGMD_COSMIC_ACCESS_TOKEN); + + Variant variant = new Variant("10", 113588287, "G", "A"); + CellBaseDataResult cellBaseDataResult = variantAnnotationCalculator + .getAnnotationByVariant(variant, queryOptions); + VariantAnnotation variantAnnotation = cellBaseDataResult.first(); + + if (!containTraitAssociation(variantAnnotation, "clinvar")) { + fail(); + } + if (!containTraitAssociation(variantAnnotation, "hgmd")) { + fail(); + } + if (!containTraitAssociation(variantAnnotation, "cosmic")) { + fail(); + } + if (containSpliceScore(variantAnnotation, "SpliceAI")) { + fail(); + } + } + + @Test + public void testLicensedDataHgmdSpliceAiTokenAnnotation() throws Exception { + QueryOptions queryOptions = new QueryOptions("useCache", false); + queryOptions.put("exclude", "pharmacogenomics"); + queryOptions.put("normalize", true); + + variantAnnotationCalculator.setToken(HGMD_SPLICEAI_ACCESS_TOKEN); + + Variant variant = new Variant("10", 113588287, "G", "A"); + CellBaseDataResult cellBaseDataResult = variantAnnotationCalculator + .getAnnotationByVariant(variant, queryOptions); + VariantAnnotation variantAnnotation = cellBaseDataResult.first(); + + if (!containTraitAssociation(variantAnnotation, "clinvar")) { + fail(); + } + if (!containTraitAssociation(variantAnnotation, "hgmd")) { + fail(); + } + if (containTraitAssociation(variantAnnotation, "cosmic")) { + fail(); + } + if (!containSpliceScore(variantAnnotation, "SpliceAI")) { + fail(); + } } + @Test + public void testLicensedDataCosmicSpliceTokenAnnotation() throws Exception { + QueryOptions queryOptions = new QueryOptions("useCache", false); + queryOptions.put("exclude", "pharmacogenomics"); + queryOptions.put("normalize", true); + + variantAnnotationCalculator.setToken(COSMIC_SPLICEAI_ACCESS_TOKEN); + + Variant variant = new Variant("10", 113588287, "G", "A"); + CellBaseDataResult cellBaseDataResult = variantAnnotationCalculator + .getAnnotationByVariant(variant, queryOptions); + VariantAnnotation variantAnnotation = cellBaseDataResult.first(); + + if (!containTraitAssociation(variantAnnotation, "clinvar")) { + fail(); + } + if (containTraitAssociation(variantAnnotation, "hgmd")) { + fail(); + } + if (!containTraitAssociation(variantAnnotation, "cosmic")) { + fail(); + } + if (!containSpliceScore(variantAnnotation, "SpliceAI")) { + fail(); + } + } + + + private boolean containTraitAssociation(VariantAnnotation variantAnnotation, String source) { + if (variantAnnotation == null) { + return false; + } + if (CollectionUtils.isEmpty(variantAnnotation.getTraitAssociation())) { + return false; + } + for (EvidenceEntry entry : variantAnnotation.getTraitAssociation()) { + if (source.equals(entry.getSource().getName())) { + return true; + } + } + return false; + } + + private boolean containSpliceScore(VariantAnnotation variantAnnotation, String source) { + if (variantAnnotation == null) { + return false; + } + if (CollectionUtils.isEmpty(variantAnnotation.getConsequenceTypes())) { + return false; + } + for (ConsequenceType consequenceType : variantAnnotation.getConsequenceTypes()) { + if (CollectionUtils.isNotEmpty(consequenceType.getSpliceScores())) { + for (SpliceScores spliceScore : consequenceType.getSpliceScores()) { + if (source.equals(spliceScore.getSource())) { + return true; + } + } + } + } + return false; + } } diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptorTest.java index a1f04b2386..fdf7e7a0e7 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptorTest.java @@ -24,9 +24,13 @@ import org.opencb.biodata.models.variant.avro.PopulationFrequency; import org.opencb.biodata.models.variant.avro.SampleEntry; import org.opencb.cellbase.core.ParamConstants; +import org.opencb.cellbase.core.api.VariantQuery; +import org.opencb.cellbase.core.api.query.LogicalList; +import org.opencb.cellbase.core.api.query.QueryException; import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; +import org.opencb.cellbase.lib.managers.GeneManager; import org.opencb.cellbase.lib.managers.VariantManager; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -54,29 +58,6 @@ public class VariantMongoDBAdaptorTest extends GenericMongoDBAdaptorTest { public VariantMongoDBAdaptorTest() throws Exception { super(); - setUp(); - } - - public void setUp() throws Exception { - clearDB(CELLBASE_DBNAME); - - createDataRelease(); - dataRelease = 1; - - Path path = Paths.get(getClass() - .getResource("/variation_chr22.full.test.json.gz").toURI()); - loadRunner.load(path, "variation", dataRelease); - path = Paths.get(getClass() - .getResource("/variation_chr17.full.test.json.gz").toURI()); - loadRunner.load(path, "variation", dataRelease); - path = Paths.get(getClass() - .getResource("/variation_chr10.full.test.json.gz").toURI()); - loadRunner.load(path, "variation", dataRelease); - path = Paths.get(getClass() - .getResource("/variation_chr1.full.test.json.gz").toURI()); - loadRunner.load(path, "variation", dataRelease); - updateDataRelease(dataRelease, "variation", Collections.emptyList()); - variantManager = cellBaseManagerFactory.getVariantManager(SPECIES, ASSEMBLY); } @@ -92,6 +73,7 @@ public void testGetFunctionalScoreVariant() throws Exception { } @Test + @Disabled public void getPhasedPopulationFrequencyByVariant() throws Exception { VariantBuilder variantBuilder = new VariantBuilder("1", 62165739, @@ -278,37 +260,49 @@ private List getPopulationFrequency(List result = variantManager - .get(new Query(ParamConstants.QueryParams.GENE.key(), "CTA-445C9.14"), queryOptions, dataRelease); - assertEquals(21, result.getNumResults()); - assertThat(result.getResults().stream().map(variant -> variant.getId()).collect(Collectors.toList()), - CoreMatchers.hasItems("rs191188630", "rs191113747", "rs191348407", "rs191952842", - "rs192035553", "rs192722941", "rs192695313", "rs199730247", "rs199753073", "rs199826190", - "rs199934473", "rs200591220", "rs200883222", "rs200830209", "rs200830209", "rs200915243", - "rs200994757", "rs200942224", "rs201498625", "rs201498625")); - CellBaseDataResult resultENSEMBLGene = variantManager - .get(new Query(ParamConstants.QueryParams.GENE.key(), "ENSG00000261188"), queryOptions, dataRelease); - assertEquals(result.getResults(), resultENSEMBLGene.getResults()); - - // ENSEMBL transcript ids are also allowed for the GENE query parameter - this was done on purpose - CellBaseDataResult resultENSEMBLTranscript = variantManager - .get(new Query(ParamConstants.QueryParams.GENE.key(), "ENST00000565764"), queryOptions, dataRelease); - assertEquals(20, resultENSEMBLTranscript.getNumResults()); - assertThat(resultENSEMBLTranscript.getResults().stream().map(variant -> variant.getId()).collect(Collectors.toList()), - CoreMatchers.hasItems("rs191188630", "rs191113747", "rs191348407", "rs191952842", "rs192035553", - "rs192722941", "rs192695313", "rs199730247", "rs199753073", "rs199934473", "rs200591220", - "rs200883222", "rs200830209", "rs200830209", "rs200915243", "rs200994757", "rs200942224", - "rs201498625", "rs201498625", "rs201498625")); +// GeneManager geneManager = cellBaseManagerFactory.getGeneManager(SPECIES, ASSEMBLY); +// geneManager.search() + VariantQuery variantQuery = new VariantQuery(); + variantQuery.setGenes(new LogicalList<>(Collections.singletonList("BRCA1"))); + variantQuery.setDataRelease(dataRelease); + CellBaseDataResult result = variantManager.search(variantQuery); + for (Variant variant : result.getResults()) { + System.out.println(variant.getId()); + } - CellBaseDataResult geneCellBaseDataResult = variantManager - .get(new Query(ParamConstants.QueryParams.GENE.key(), "CERK"), queryOptions, dataRelease); - assertThat(geneCellBaseDataResult.getResults().stream().map(variant -> variant.getId()).collect(Collectors.toList()), - CoreMatchers.hasItems("rs192195512", "rs193091997", "rs200609865")); + // commented by JT +// CellBaseDataResult result = variantManager +// .get(new Query(ParamConstants.QueryParams.GENE.key(), "BRCA1"), queryOptions, dataRelease); +// assertEquals(21, result.getNumResults()); +// assertThat(result.getResults().stream().map(variant -> variant.getId()).collect(Collectors.toList()), +// CoreMatchers.hasItems("rs191188630", "rs191113747", "rs191348407", "rs191952842", +// "rs192035553", "rs192722941", "rs192695313", "rs199730247", "rs199753073", "rs199826190", +// "rs199934473", "rs200591220", "rs200883222", "rs200830209", "rs200830209", "rs200915243", +// "rs200994757", "rs200942224", "rs201498625", "rs201498625")); +// +// CellBaseDataResult resultENSEMBLGene = variantManager +// .get(new Query(ParamConstants.QueryParams.GENE.key(), "ENSG00000261188"), queryOptions, dataRelease); +// assertEquals(result.getResults(), resultENSEMBLGene.getResults()); +// +// // ENSEMBL transcript ids are also allowed for the GENE query parameter - this was done on purpose +// CellBaseDataResult resultENSEMBLTranscript = variantManager +// .get(new Query(ParamConstants.QueryParams.GENE.key(), "ENST00000565764"), queryOptions, dataRelease); +// assertEquals(20, resultENSEMBLTranscript.getNumResults()); +// assertThat(resultENSEMBLTranscript.getResults().stream().map(variant -> variant.getId()).collect(Collectors.toList()), +// CoreMatchers.hasItems("rs191188630", "rs191113747", "rs191348407", "rs191952842", "rs192035553", +// "rs192722941", "rs192695313", "rs199730247", "rs199753073", "rs199934473", "rs200591220", +// "rs200883222", "rs200830209", "rs200830209", "rs200915243", "rs200994757", "rs200942224", +// "rs201498625", "rs201498625", "rs201498625")); +// +// CellBaseDataResult geneCellBaseDataResult = variantManager +// .get(new Query(ParamConstants.QueryParams.GENE.key(), "CERK"), queryOptions, dataRelease); +// assertThat(geneCellBaseDataResult.getResults().stream().map(variant -> variant.getId()).collect(Collectors.toList()), +// CoreMatchers.hasItems("rs192195512", "rs193091997", "rs200609865")); } diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/XRefMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/XRefMongoDBAdaptorTest.java index 1c330f5093..942176b7b6 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/XRefMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/XRefMongoDBAdaptorTest.java @@ -16,14 +16,17 @@ package org.opencb.cellbase.lib.impl.core; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.opencb.biodata.models.core.Xref; +import org.opencb.cellbase.core.api.XrefQuery; +import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; import org.opencb.cellbase.lib.managers.XrefManager; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.Collections; +import java.util.List; + +import static org.bson.assertions.Assertions.fail; /** @@ -33,32 +36,25 @@ public class XRefMongoDBAdaptorTest extends GenericMongoDBAdaptorTest { public XRefMongoDBAdaptorTest() throws Exception { super(); - - setUp(); - } - - public void setUp() throws Exception { - clearDB(CELLBASE_DBNAME); - - createDataRelease(); - dataRelease = 1; - - Path path = Paths.get(getClass().getResource("/xref/gene.test.json.gz").toURI()); - loadRunner.load(path, "gene", dataRelease); - updateDataRelease(dataRelease, "gene", Collections.emptyList()); } @Test - @Disabled - public void contains() throws Exception { -// XRefMongoDBAdaptor xRefDBAdaptor = dbAdaptorFactory.getXRefDBAdaptor("hsapiens", "GRCh37", dataRelease); + public void queryTest() throws Exception { XrefManager xrefManager = cellBaseManagerFactory.getXrefManager(SPECIES, ASSEMBLY); -// CellBaseDataResult xrefs = xRefDBAdaptor.contains("BRCA2", new QueryOptions()); -// Set reference = new HashSet<>(Arrays.asList("ENSG00000185515", "ENSG00000139618", "ENSG00000107949", -// "ENSG00000083093", "ENSG00000170037")); -// Set set = (Set) xrefs.getResults().stream() -// .map(result -> ((String) ((Document) result).get("id"))).collect(Collectors.toSet()); -// assertEquals(reference, set); + XrefQuery query = new XrefQuery(); + query.setIds(Collections.singletonList("BRCA1")); + query.setDataRelease(dataRelease); + List> resultList = xrefManager.search(Collections.singletonList(query)); + CellBaseDataResult result = resultList.get(0); + boolean found = false; + for (Xref xref : result.getResults()) { + if (xref.getId().equals("ENSG00000012048")) { + found = true; + break; + } + } + if (!found) { + fail(); + } } - } \ No newline at end of file diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/indexer/IndexManagerTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/indexer/IndexManagerTest.java index e027dc395d..c27db8803d 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/indexer/IndexManagerTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/indexer/IndexManagerTest.java @@ -1,6 +1,7 @@ package org.opencb.cellbase.lib.indexer; import org.bson.Document; +import org.junit.Assert; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.opencb.biodata.models.core.Gene; @@ -19,6 +20,7 @@ import org.opencb.commons.datastore.mongodb.MongoDataStore; import java.io.IOException; +import java.net.URISyntaxException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collections; @@ -29,47 +31,33 @@ public class IndexManagerTest extends GenericMongoDBAdaptorTest { private IndexManager indexManager; - private DataReleaseManager dataReleaseManager; - private String databaseName = "cellbase_hsapiens_grch37_v4"; - public IndexManagerTest() { - try { - int release = 1; + public IndexManagerTest() throws URISyntaxException { + super(); - Path path = Paths.get(getClass().getResource("/index/mongodb-indexes.json").toURI()); - indexManager = new IndexManager(databaseName, path, cellBaseConfiguration); - dataReleaseManager = new DataReleaseManager(databaseName, cellBaseConfiguration); - - clearDB(CELLBASE_DBNAME); - - dataReleaseManager.createRelease(); - path = Paths.get(getClass().getResource("/gene/gene-test.json.gz").toURI()); - loadRunner.load(path, "gene", release); - } catch (Exception e) { - e.printStackTrace(); - } + Path path = Paths.get(getClass().getResource("/index/mongodb-indexes.json").toURI()); + indexManager = new IndexManager(cellBaseName, path, cellBaseConfiguration); } @Test - @Disabled public void testIndexes() throws IOException, CellBaseException, QueryException, IllegalAccessException { String collectionName = "gene" + CellBaseDBAdaptor.DATA_RELEASE_SEPARATOR + dataRelease; indexManager.createMongoDBIndexes(Collections.singletonList(collectionName), true); MongoDBManager mongoDBManager = new MongoDBManager(cellBaseConfiguration); - MongoDataStore mongoDataStore = mongoDBManager.createMongoDBDatastore("hsapiens", "grch37"); + MongoDataStore mongoDataStore = mongoDBManager.createMongoDBDatastore(SPECIES, ASSEMBLY); MongoDBCollection mongoDBCollection = mongoDataStore.getCollection(collectionName); DataResult index = mongoDBCollection.getIndex(); assertNotNull(index); - CellBaseManagerFactory factory = new CellBaseManagerFactory(cellBaseConfiguration); - GeneManager geneManager = factory.getGeneManager("hsapiens", "grch37"); + GeneManager geneManager = cellBaseManagerFactory.getGeneManager(SPECIES, ASSEMBLY); GeneQuery query = new GeneQuery(); - query.setIds(Collections.singletonList("ENSG00000279457")); + query.setNames(Collections.singletonList("BRCA1")); + query.setDataRelease(dataRelease); CellBaseDataResult result = geneManager.search(query); assertEquals(1, result.getNumResults()); - assertEquals("ENSG00000279457", result.getResults().get(0).getId()); - assertEquals("WASH9P", result.getResults().get(0).getName()); + assertEquals("BRCA1", result.getResults().get(0).getName()); + assertEquals("ENSG00000012048", result.getResults().get(0).getId()); } } diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataAccessTokenSourcesManagerTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataAccessTokenSourcesManagerTest.java index 2cc8da2480..b56b4a5301 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataAccessTokenSourcesManagerTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataAccessTokenSourcesManagerTest.java @@ -20,7 +20,6 @@ public class DataAccessTokenSourcesManagerTest { @Before public void before() { String randomStr = "xPacig89igHSieEnveJEi4KCfdEslhmssC3vui1JJQGgDQ0y8v"; -// String randomStr = RandomStringUtils.randomAlphanumeric(SECRET_KEY_MIN_LENGTH); System.out.println("Secret key = " + randomStr); datManager = new DataAccessTokenManager(randomStr); } diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataReleaseManagerTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataReleaseManagerTest.java index 802c2e5b55..48b721ddd1 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataReleaseManagerTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataReleaseManagerTest.java @@ -3,6 +3,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.VariantAnnotation; @@ -29,24 +30,14 @@ class DataReleaseManagerTest extends GenericMongoDBAdaptorTest { protected DataReleaseManager dataReleaseManager; - private static boolean populated = false; - public DataReleaseManagerTest() throws IOException { + public DataReleaseManagerTest() throws CellBaseException { super(); - } - - @BeforeEach - public void setUp() throws Exception { - if (!populated) { - clearDB(CELLBASE_DBNAME); - initDB(); - } - dataReleaseManager = cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY); - populated = true; } @Test + @Disabled public void testCreate() throws JsonProcessingException { CellBaseDataResult result = dataReleaseManager.getReleases(); DataRelease dr = dataReleaseManager.createRelease(); @@ -54,6 +45,7 @@ public void testCreate() throws JsonProcessingException { } @Test + @Disabled public void testAddActiveByDefaultIn() throws CellBaseException, JsonProcessingException { DataRelease dr = dataReleaseManager.createRelease(); dataReleaseManager.update(dr.getRelease(), Arrays.asList("v5.1", "v5.2")); @@ -64,6 +56,7 @@ public void testAddActiveByDefaultIn() throws CellBaseException, JsonProcessingE } @Test + @Disabled public void testChangeActiveByDefaultIn() throws JsonProcessingException, CellBaseException { String version3 = "v5.3"; String version4 = "v5.4"; @@ -89,6 +82,7 @@ public void testChangeActiveByDefaultIn() throws JsonProcessingException, CellBa } @Test + @Disabled public void failLoading() throws IOException, ExecutionException, ClassNotFoundException, InterruptedException, InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException, URISyntaxException, CellBaseException, LoaderException { @@ -105,6 +99,7 @@ public void failLoading() throws IOException, ExecutionException, ClassNotFoundE } @Test + @Disabled public void testMultipleAddActiveByDefaultIn() throws JsonProcessingException, CellBaseException { String version6 = "v5.6"; String version7 = "v5.7"; @@ -125,6 +120,7 @@ public void testMultipleAddActiveByDefaultIn() throws JsonProcessingException, C } @Test + @Disabled public void testRemoveMultipleAddActiveByDefaultIn() throws JsonProcessingException, CellBaseException { String version8 = "v5.8"; String version9 = "v5.9"; @@ -150,6 +146,7 @@ public void testRemoveMultipleAddActiveByDefaultIn() throws JsonProcessingExcept } @Test + @Disabled public void testAnnotationWithDR0() throws CellBaseException, QueryException, ExecutionException, InterruptedException, IllegalAccessException { dataReleaseManager.update(1, Arrays.asList("v5.5")); @@ -167,6 +164,7 @@ public void testAnnotationWithDR0() throws CellBaseException, QueryException, Ex } @Test + @Disabled public void testAnnotationWithInvalidDR() { CellBaseException thrown = Assertions.assertThrows(CellBaseException.class, () -> { VariantAnnotationCalculator annotator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, -1, token, cellBaseManagerFactory); @@ -177,6 +175,7 @@ public void testAnnotationWithInvalidDR() { } @Test + @Disabled public void testAnnotationWithInvalidDR_1() { int dr = 12; CellBaseException thrown = Assertions.assertThrows(CellBaseException.class, () -> { diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/monitor/MonitorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/monitor/MonitorTest.java index 45a7b65227..50026164b1 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/monitor/MonitorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/monitor/MonitorTest.java @@ -37,7 +37,6 @@ public MonitorTest() throws IOException { public void run() throws Exception { // "Local" monitoring all OK - clearDB(CELLBASE_DBNAME); Path path = Paths.get(getClass() .getResource("/gene.test.json.gz").toURI()); loadRunner.load(path, "gene"); @@ -52,7 +51,6 @@ public void run() throws Exception { assertEquals(HealthCheckResponse.Status.OK, health.getStatus()); // Empty gene collection - clearDB(CELLBASE_DBNAME); monitor = new Monitor(cellBaseManagerFactory.getMetaManager()); health = monitor.run("localhost", cellBaseConfiguration, SPECIES, ASSEMBLY, null); assertEquals(HealthCheckResponse.Status.DOWN, health.getStatus()); diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/ClinicalManagerTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/ClinicalManagerTest.java index 7333b710d7..0a6536cf5e 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/ClinicalManagerTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/ClinicalManagerTest.java @@ -20,7 +20,7 @@ import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; import org.junit.Assert; -import org.junit.jupiter.api.BeforeAll; +import org.junit.Test; import org.junit.jupiter.api.TestInstance; import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.variant.Variant; @@ -32,7 +32,6 @@ import org.opencb.cellbase.lib.iterator.CellBaseIterator; import org.opencb.cellbase.lib.managers.ClinicalManager; -import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -41,28 +40,21 @@ public class ClinicalManagerTest extends GenericMongoDBAdaptorTest { private ObjectMapper jsonObjectMapper; private ClinicalManager clinicalManager; - public ClinicalManagerTest() throws IOException { + public ClinicalManagerTest() throws CellBaseException { super(); - } - @BeforeAll - public void setUp() throws Exception { jsonObjectMapper = new ObjectMapper(); jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); clinicalManager = cellBaseManagerFactory.getClinicalManager(SPECIES, ASSEMBLY); - - clearDB(CELLBASE_DBNAME); - initDB(); } - //------------------------------------------------------------------------- // S E A R C H //------------------------------------------------------------------------- -// @Test + @Test public void testLicensedHGMD() throws CellBaseException, QueryException, IllegalAccessException { // Token with licensed HGMD, so only CLINVAR and HGMD are allowed ClinicalVariantQuery query = new ClinicalVariantQuery(); @@ -78,7 +70,7 @@ public void testLicensedHGMD() throws CellBaseException, QueryException, Illegal Assert.assertEquals(2, results.getResults().get(0).getAnnotation().getTraitAssociation().size()); } -// @Test + @Test public void testNotLicensed() throws CellBaseException, QueryException, IllegalAccessException { // Any token, so only CLINVAR is allowed ClinicalVariantQuery query = new ClinicalVariantQuery(); @@ -98,7 +90,7 @@ public void testNotLicensed() throws CellBaseException, QueryException, IllegalA // I T E R A T O R //------------------------------------------------------------------------- -// @Test + @Test public void testIteratorOnlyClinvar() throws CellBaseException, QueryException, IllegalAccessException { // Token with licensed HGMD, so only CLINVAR and HGMD are allowed ClinicalVariantQuery query = new ClinicalVariantQuery(); @@ -115,8 +107,8 @@ public void testIteratorOnlyClinvar() throws CellBaseException, QueryException, } } -// @Test - public void testIteratorLicensedHGMD() throws CellBaseException, QueryException, IllegalAccessException { + @Test + public void testIteratorLicensedHGMD() throws CellBaseException { // Token with licensed HGMD, so only CLINVAR and HGMD are allowed ClinicalVariantQuery query = new ClinicalVariantQuery(); @@ -132,6 +124,4 @@ public void testIteratorLicensedHGMD() throws CellBaseException, QueryException, Assert.assertEquals(2, variant.getAnnotation().getTraitAssociation().size()); } } - - } diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/VariantManagerTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/VariantManagerTest.java index b660b9fa29..f15293f601 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/VariantManagerTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/VariantManagerTest.java @@ -25,6 +25,7 @@ import org.junit.jupiter.api.TestInstance; import org.opencb.biodata.models.variant.Variant; import org.opencb.cellbase.core.api.VariantQuery; +import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; import org.opencb.cellbase.lib.managers.DataReleaseManager; @@ -41,21 +42,14 @@ public class VariantManagerTest extends GenericMongoDBAdaptorTest { private ObjectMapper jsonObjectMapper; private VariantAnnotationCalculator variantAnnotationCalculator; private VariantManager variantManager; - private DataReleaseManager dataReleaseManager; - public VariantManagerTest() throws IOException { + public VariantManagerTest() throws CellBaseException { super(); - } - @BeforeAll - public void setUp() throws Exception { jsonObjectMapper = new ObjectMapper(); jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); - clearDB(CELLBASE_DBNAME); - initDB(); - variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, token, cellBaseManagerFactory); variantManager = cellBaseManagerFactory.getVariantManager(SPECIES, ASSEMBLY); } @@ -63,7 +57,7 @@ public void setUp() throws Exception { @Test @Disabled public void testNormalisation() throws Exception { - CellBaseDataResult results = variantManager.getNormalizationByVariant("22:18512237:-:AGTT", dataRelease); + CellBaseDataResult results = variantManager.getNormalizationByVariant("22:18512237:-:AGTT", true, true, dataRelease); assertEquals(1, results.getResults().size()); } diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculatorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculatorTest.java index c6abc17041..d834a5115a 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculatorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculatorTest.java @@ -37,25 +37,28 @@ public class HgvsCalculatorTest extends GenericMongoDBAdaptorTest { private GeneMongoDBAdaptor geneDBAdaptor; private GeneManager geneManager; - public HgvsCalculatorTest() throws IOException { + public HgvsCalculatorTest() throws CellBaseException { + super(); + geneManager = cellBaseManagerFactory.getGeneManager(SPECIES, ASSEMBLY); + hgvsCalculator = new HgvsCalculator(cellBaseManagerFactory.getGenomeManager(SPECIES, ASSEMBLY), dataRelease); } - @BeforeAll - public void init() throws Exception { - clearDB(CELLBASE_DBNAME); - DataReleaseManager dataReleaseManager = cellBaseManagerFactory.getDataReleaseManager("hsapiens", "GRCh37"); - dataRelease = dataReleaseManager.createRelease().getRelease(); - Path path = Paths.get(getClass() - .getResource("/hgvs/gene.test.json.gz").toURI()); - loadRunner.load(path, "gene", dataRelease); - path = Paths.get(getClass() - .getResource("/hgvs/genome_sequence.test.json.gz").toURI()); - loadRunner.load(path, "genome_sequence", dataRelease); - CellBaseManagerFactory cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration); - geneManager = cellBaseManagerFactory.getGeneManager("hsapiens", "GRCh37"); - hgvsCalculator = new HgvsCalculator(cellBaseManagerFactory.getGenomeManager("hsapiens", "GRCh37"), dataRelease); -// geneDBAdaptor = dbAdaptorFactory.getGeneDBAdaptor("hsapiens", "GRCh37"); - } +// @BeforeAll +// public void init() throws Exception { +// clearDB(CELLBASE_DBNAME); +// DataReleaseManager dataReleaseManager = cellBaseManagerFactory.getDataReleaseManager("hsapiens", "GRCh37"); +// dataRelease = dataReleaseManager.createRelease().getRelease(); +// Path path = Paths.get(getClass() +// .getResource("/hgvs/gene.test.json.gz").toURI()); +// loadRunner.load(path, "gene", dataRelease); +// path = Paths.get(getClass() +// .getResource("/hgvs/genome_sequence.test.json.gz").toURI()); +// loadRunner.load(path, "genome_sequence", dataRelease); +// CellBaseManagerFactory cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration); +// geneManager = cellBaseManagerFactory.getGeneManager("hsapiens", "GRCh37"); +// hgvsCalculator = new HgvsCalculator(cellBaseManagerFactory.getGenomeManager("hsapiens", "GRCh37"), dataRelease); +//// geneDBAdaptor = dbAdaptorFactory.getGeneDBAdaptor("hsapiens", "GRCh37"); +// } // @Test // public void testProteinHgvsInsertion() throws Exception { diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/hgvs/HgvsTranscriptCalculatorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/hgvs/HgvsTranscriptCalculatorTest.java index 57bbc9fb36..c40fd75ce4 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/hgvs/HgvsTranscriptCalculatorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/hgvs/HgvsTranscriptCalculatorTest.java @@ -13,6 +13,7 @@ import org.opencb.biodata.models.core.Transcript; import org.opencb.biodata.models.variant.Variant; import org.opencb.cellbase.core.config.CellBaseConfiguration; +import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; import org.opencb.cellbase.lib.impl.core.GenomeMongoDBAdaptor; import org.opencb.cellbase.lib.impl.core.MongoDBAdaptorFactory; @@ -44,47 +45,54 @@ public class HgvsTranscriptCalculatorTest extends GenericMongoDBAdaptorTest { private ObjectMapper jsonObjectMapper; private List geneList; - private GenomeMongoDBAdaptor genomeDBAdaptor; - protected MongoDBAdaptorFactory dbAdaptorFactory; +// private GenomeMongoDBAdaptor genomeDBAdaptor; +// protected MongoDBAdaptorFactory dbAdaptorFactory; private GenomeManager genomeManager; - public HgvsTranscriptCalculatorTest() { + public HgvsTranscriptCalculatorTest() throws CellBaseException { + super(); + jsonObjectMapper = new ObjectMapper(); + jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); + jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); + jsonObjectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + genomeManager = cellBaseManagerFactory.getGenomeManager(SPECIES, ASSEMBLY); } // TODO add KeyError: '1:244856830:T:-', generated an error in the python script - @BeforeAll - public void setUp() { - try { - int release = 1; - - clearDB(CELLBASE_DBNAME); - Path path = Paths.get(getClass().getResource("/hgvs/gene_grch38.test.json.gz").toURI()); - loadRunner.load(path, "gene", release); - path = Paths.get(getClass().getResource("/hgvs/genome_sequence_grch38.test.json.gz").toURI()); - loadRunner.load(path, "genome_sequence", release); - - jsonObjectMapper = new ObjectMapper(); - jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); - jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); - jsonObjectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - - geneList = loadGenes(Paths.get(getClass().getResource("/hgvs/gene_grch38.test.json.gz").getFile())); - - CellBaseConfiguration cellBaseConfiguration = CellBaseConfiguration.load( - HgvsTranscriptCalculatorTest.class.getClassLoader().getResourceAsStream("configuration.test.yaml"), - CellBaseConfiguration.ConfigurationFileFormat.YAML); - -// dbAdaptorFactory = new MongoDBAdaptorFactory(cellBaseConfiguration); -// genomeDBAdaptor = dbAdaptorFactory.getGenomeDBAdaptor("hsapiens", "GRCh37"); - - CellBaseManagerFactory cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration); - genomeManager = cellBaseManagerFactory.getGenomeManager("hsapiens", "GRCh38"); - } catch (Exception e) { - e.printStackTrace(); - } - } +// @BeforeAll +// public void setUp() { +// try { +// int release = 1; +// +// clearDB(CELLBASE_DBNAME); +// Path path = Paths.get(getClass().getResource("/hgvs/gene_grch38.test.json.gz").toURI()); +// loadRunner.load(path, "gene", release); +// path = Paths.get(getClass().getResource("/hgvs/genome_sequence_grch38.test.json.gz").toURI()); +// loadRunner.load(path, "genome_sequence", release); +// +// jsonObjectMapper = new ObjectMapper(); +// jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); +// jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); +// jsonObjectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); +// +// geneList = loadGenes(Paths.get(getClass().getResource("/hgvs/gene_grch38.test.json.gz").getFile())); +// +// CellBaseConfiguration cellBaseConfiguration = CellBaseConfiguration.load( +// HgvsTranscriptCalculatorTest.class.getClassLoader().getResourceAsStream("configuration.test.yaml"), +// CellBaseConfiguration.ConfigurationFileFormat.YAML); +// +//// dbAdaptorFactory = new MongoDBAdaptorFactory(cellBaseConfiguration); +//// genomeDBAdaptor = dbAdaptorFactory.getGenomeDBAdaptor("hsapiens", "GRCh37"); +// +// CellBaseManagerFactory cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration); +// genomeManager = cellBaseManagerFactory.getGenomeManager("hsapiens", "GRCh38"); +// } catch (Exception e) { +// e.printStackTrace(); +// } +// } @Test @Disabled diff --git a/cellbase-lib/src/test/resources/configuration.test.yaml b/cellbase-lib/src/test/resources/configuration.test.yaml index b21b4cf489..927283d363 100644 --- a/cellbase-lib/src/test/resources/configuration.test.yaml +++ b/cellbase-lib/src/test/resources/configuration.test.yaml @@ -3,7 +3,7 @@ version: v5 apiVersion: "${project.version}" wiki: https://github.com/opencb/cellbase/wiki maintenanceFlagFile: "/tmp/maintenance" -maintainerContact: javier.lopez@genomicsengland.co.uk +maintainerContact: joaquin.tarraga@zettagenomics.com secretKey: "xPacig89igHSieEnveJEi4KCfdEslhmssC3vui1JJQGgDQ0y8v" databases: mongodb: @@ -109,10 +109,10 @@ species: - id: hsapiens scientificName: Homo sapiens assemblies: - - ensemblVersion: '82_37' - name: GRCh37 - ensemblVersion: '89_38' name: GRCh38 + - ensemblVersion: '82_37' + name: GRCh37 data: - genome - genome_info diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 0f5578209b..614328fdd1 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.5.0 + 5.7.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/GenericRestWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/GenericRestWSServer.java index 1fd334565b..47567929d5 100755 --- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/GenericRestWSServer.java +++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/GenericRestWSServer.java @@ -199,8 +199,10 @@ protected int getDataRelease() throws CellBaseException { } } // If no data release is present in the query, then use the default data release - logger.info("No data release present in query: using the default data release '" + defaultDataRelease + "' for CellBase version" - + " '" + version + "'"); + if (!DONT_CHECK_SPECIES.equals(species)) { + logger.info("No data release present in query: using the default data release '" + defaultDataRelease + "' for CellBase version" + + " '" + version + "'"); + } return defaultDataRelease; } @@ -234,9 +236,9 @@ private void checkVersion() throws CellBaseException { // System.out.println("cellBaseConfiguration.getVersion() = " + cellBaseConfiguration.getVersion()); // System.out.println("version = " + version); // System.out.println("*************************************"); - if (!version.startsWith(cellBaseConfiguration.getVersion())) { - logger.error("Version '{}' does not match configuration '{}'", this.version, cellBaseConfiguration.getVersion()); - throw new CellBaseException("Version not valid: '" + version + "'"); + if (!uriInfo.getPath().contains("health") && !version.startsWith(cellBaseConfiguration.getVersion())) { + logger.error("URL version '{}' does not match configuration '{}'", this.version, cellBaseConfiguration.getVersion()); + throw new CellBaseException("URL version not valid: '" + version + "'"); } } diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/clinical/ClinicalWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/clinical/ClinicalWSServer.java index 8e057f8b19..3800d3bbbe 100644 --- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/clinical/ClinicalWSServer.java +++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/clinical/ClinicalWSServer.java @@ -36,12 +36,10 @@ import static org.opencb.cellbase.core.ParamConstants.*; -/** - * Created by fjlopez on 06/12/16. - */ -@Path("/{apiVersion}/{species}/clinical") + +@Path("/{apiVersion}/{species}/clinical/variant") @Produces(MediaType.APPLICATION_JSON) -@Api(value = "Clinical", description = "Clinical RESTful Web Services API") +@Api(value = "Clinical Variants", description = "Clinical RESTful Web Services API") public class ClinicalWSServer extends GenericRestWSServer { private ClinicalManager clinicalManager; @@ -66,7 +64,7 @@ public ClinicalWSServer(@PathParam("apiVersion") @ApiParam(name = "apiVersion", } @GET - @Path("/variant/search") + @Path("/search") @ApiOperation(httpMethod = "GET", notes = "No more than 1000 objects are allowed to be returned at a time. " + DOT_NOTATION_NOTE, value = "Retrieves all clinical variants", response = Variant.class, responseContainer = "QueryResponse") @@ -127,7 +125,7 @@ public Response getAll() { } @GET - @Path("/variant/alleleOriginLabels") + @Path("/alleleOriginLabels") @ApiOperation(httpMethod = "GET", notes = "", value = "Retrieves all available allele origin labels", response = Variant.class, responseContainer = "QueryResponse") @@ -140,7 +138,7 @@ public Response getAlleleOriginLabels() { } @GET - @Path("/variant/modeInheritanceLabels") + @Path("/modeInheritanceLabels") @ApiOperation(httpMethod = "GET", notes = "", value = "Retrieves all available mode of inheritance labels", response = Variant.class, responseContainer = "QueryResponse") @@ -153,7 +151,7 @@ public Response getModeInheritanceLabels() { } @GET - @Path("/variant/clinsigLabels") + @Path("/clinsigLabels") @ApiOperation(httpMethod = "GET", notes = "", value = "Retrieves all available clinical significance labels", response = Variant.class, responseContainer = "QueryResponse") @@ -166,7 +164,7 @@ public Response getClinicalSignificanceLabels() { } @GET - @Path("/variant/consistencyLabels") + @Path("/consistencyLabels") @ApiOperation(httpMethod = "GET", notes = "", value = "Retrieves all available consistency labels", response = Variant.class, responseContainer = "QueryResponse") @@ -179,7 +177,7 @@ public Response getConsistencyLabels() { } @GET - @Path("/variant/type") + @Path("/type") @ApiOperation(httpMethod = "GET", notes = "", value = "Retrieves all available variant types", response = Variant.class, responseContainer = "QueryResponse") diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/clinical/PharmacogenomicsWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/clinical/PharmacogenomicsWSServer.java new file mode 100644 index 0000000000..983a45f739 --- /dev/null +++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/clinical/PharmacogenomicsWSServer.java @@ -0,0 +1,180 @@ +/* + * Copyright 2015-2020 OpenCB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.opencb.cellbase.server.rest.clinical; + +import io.swagger.annotations.*; +import org.opencb.biodata.models.pharma.PharmaChemical; +import org.opencb.cellbase.core.api.PharmaChemicalQuery; +import org.opencb.cellbase.core.api.query.QueryException; +import org.opencb.cellbase.core.exception.CellBaseException; +import org.opencb.cellbase.core.result.CellBaseDataResult; +import org.opencb.cellbase.core.utils.SpeciesUtils; +import org.opencb.cellbase.lib.managers.ClinicalManager; +import org.opencb.cellbase.lib.managers.PharmacogenomicsManager; +import org.opencb.cellbase.server.rest.GenericRestWSServer; + +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.*; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.UriInfo; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import static org.opencb.cellbase.core.ParamConstants.*; + + +@Path("/{apiVersion}/{species}/clinical/pharmacogenomics") +@Produces(MediaType.APPLICATION_JSON) +@Api(value = "Clinical Pharmacogenomics", description = "Clinical RESTful Web Services API") +public class PharmacogenomicsWSServer extends GenericRestWSServer { + + private ClinicalManager clinicalManager; + private PharmacogenomicsManager pharmacogenomicsManager; + + public PharmacogenomicsWSServer(@PathParam("apiVersion") @ApiParam(name = "apiVersion", value = VERSION_DESCRIPTION, + defaultValue = DEFAULT_VERSION) String apiVersion, + @PathParam("species") @ApiParam(name = "species", value = SPECIES_DESCRIPTION) String species, + @ApiParam(name = "assembly", value = ASSEMBLY_DESCRIPTION) @DefaultValue("") @QueryParam("assembly") + String assembly, + @ApiParam(name = "dataRelease", value = DATA_RELEASE_DESCRIPTION) @DefaultValue("0") + @QueryParam("dataRelease") int dataRelease, + @ApiParam(name = "token", value = DATA_ACCESS_TOKEN_DESCRIPTION) @DefaultValue("") @QueryParam("token") + String token, + @Context UriInfo uriInfo, @Context HttpServletRequest hsr) + throws QueryException, IOException, CellBaseException { + super(apiVersion, species, uriInfo, hsr); + if (assembly == null) { + assembly = SpeciesUtils.getDefaultAssembly(cellBaseConfiguration, species).getName(); + } + + clinicalManager = cellBaseManagerFactory.getClinicalManager(species, assembly); + pharmacogenomicsManager = cellBaseManagerFactory.getPharmacogenomicsManager(species, assembly); + } + + @GET + @Path("/search") + @ApiOperation(httpMethod = "GET", notes = "No more than 1000 objects are allowed to be returned at a time. " + + DOT_NOTATION_NOTE, + value = "Retrieves all chemicals/drugs", response = PharmaChemical.class, responseContainer = "QueryResponse") + @ApiImplicitParams({ + @ApiImplicitParam(name = "count", value = COUNT_DESCRIPTION, + required = false, dataType = "boolean", paramType = "query", defaultValue = "false", + allowableValues = "false,true"), +// @ApiImplicitParam(name = SOURCE_PARAM, value = SOURCE_DESCRIPTION, +// required = false, dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "name", value = "List of chemical/drug names, e.g.: warfarin. In order to get the list of chemical or" + + " drug names, please, call the endpoint pharmacogenomics/distinct?field=names", dataType = "java.util.List", + paramType = "query"), + @ApiImplicitParam(name = "type", value = "List of chemical/drug types, e.g.: Drug,Metabolite. In order to get the list of" + + " chemical or drug types, please, call the endpoint pharmacogenomics/distinct?field=types", + dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "variant", value = "List of variants (dbSNP IDs), e.g.: rs1429376,rs11191561. In order to get the list" + + " of variant IDs, please, call the endpoint endpoint pharmacogenomics/distinct?field=variants.variantId", + dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "haplotype", value = "List of haplotypes, e.g.: CYP2A6*1. In order to get the list of gene names," + + "please, call the endpoint endpoint pharmacogenomics/distinct?field=variants.haplotypes", dataType = "java.util.List", + paramType = "query"), + @ApiImplicitParam(name = "geneName", value = "List of gene names, e.g.: NT5C2,VKORC1. In order to get the list of gene names," + + "please, call the endpoint endpoint pharmacogenomics/distinct?field=variants.geneNames", dataType = "java.util.List", + paramType = "query"), + @ApiImplicitParam(name = "location", value = "List of chromosomic coordinates in the format: chromosome:position, e.g.:" + + " 10:103109774", dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "phenotype", value = "List of phenotypes, e.g.: Hemorrhage,Thrombosis. In order to get the list of" + + "phenotype values, please, call the endpoint pharmacogenomics/distinct?field=variants.phenotypes", + dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "phenotypeType", value = "List of phenotype categories (i.e., association phenotype), e.g.: Dosage," + + "Toxicity. In order to get the list of phenotype category values, please, call the endpoint pharmacogenomics/distinct" + + "?field=variants.phenotypeTypes", dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "confidence", value = "List of confidence values. Valid values: 1A, 1B, 2A, 2B, 3, 4", + dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "pubmedId", value = "List of evidence PubMed IDs, e.g.: 14765194", dataType = "java.util.List", + paramType = "query"), + @ApiImplicitParam(name = "exclude", value = EXCLUDE_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "include", value = INCLUDE_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "sort", value = SORT_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "order", value = ORDER_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query", + defaultValue = "", allowableValues="ASCENDING,DESCENDING"), + @ApiImplicitParam(name = "limit", value = LIMIT_DESCRIPTION, + required = false, defaultValue = DEFAULT_LIMIT, dataType = "java.util.List", + paramType = "query"), + @ApiImplicitParam(name = "skip", value = SKIP_DESCRIPTION, + required = false, defaultValue = DEFAULT_SKIP, dataType = "java.util.List", + paramType = "query") + }) + public Response getAll() { + try { + PharmaChemicalQuery query = new PharmaChemicalQuery(uriParams); + CellBaseDataResult queryResults = pharmacogenomicsManager.search(query); + + return createOkResponse(queryResults); + } catch (Exception e) { + return createErrorResponse(e); + } + } + + @GET + @Path("/{chemicals}/info") + @ApiOperation(httpMethod = "GET", value = "Get information about the specified chemical(s) or drug(s)", response = PharmaChemical.class, + responseContainer = "QueryResponse") + @ApiImplicitParams({ + @ApiImplicitParam(name = "exclude", value = EXCLUDE_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "include", value = INCLUDE_DESCRIPTION, + required = false, dataType = "java.util.List", paramType = "query") + }) + public Response getInfo(@PathParam("chemicals") @ApiParam(name = "chemicals", value = "Chemical/drug names", required = true) + String chemicals) { + try { + PharmaChemicalQuery pharmaQuery = new PharmaChemicalQuery(uriParams); + List> queryResults = pharmacogenomicsManager.info(Arrays.asList(chemicals.split(",")), + pharmaQuery, getDataRelease(), getToken()); + return createOkResponse(queryResults); + } catch (Exception e) { + return createErrorResponse(e); + } + } + + @GET + @Path("/distinct") + @ApiOperation(httpMethod = "GET", notes = "Gets a unique list of values, e.g. variants.location", + value = "Get a unique list of values for a given field.") + @ApiImplicitParams({ + @ApiImplicitParam(name = "type", value = "List of types", + required = false, dataType = "java.util.List", paramType = "query"), + @ApiImplicitParam(name = "gene", value = "List of gene names", + required = false, dataType = "java.util.List", paramType = "query"), + }) + public Response getUniqueValues(@QueryParam("field") @ApiParam(name = "field", required = true, + value = "Name of column to return, e.g. variants.location") String field) { + try { + copyToFacet("field", field); + PharmaChemicalQuery query = new PharmaChemicalQuery(uriParams); + CellBaseDataResult queryResults = pharmacogenomicsManager.distinct(query); + return createOkResponse(queryResults); + } catch (Exception e) { + return createErrorResponse(e); + } + } + +} diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java index f774d3ba5b..4ab8afc52a 100755 --- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java +++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/genomic/VariantWSServer.java @@ -94,10 +94,23 @@ public Response getHgvs(@PathParam("variants") @ApiParam(name = "variants", valu @ApiOperation(httpMethod = "GET", value = "FIXME: description needed", response = Map.class, responseContainer = "QueryResponse") public Response getNormalization(@PathParam("variants") @ApiParam(name = "variants", value = RS_IDS, - required = true) String id) { + required = true) String id, + @QueryParam("decompose") + @ApiParam(name = "decompose", + value = "Boolean to indicate whether input MNVs should be " + + "decomposed or not as part of the normalisation step.", + allowableValues = "false,true", + defaultValue = "false") Boolean decompose, + @QueryParam("leftAlign") + @ApiParam(name = "leftAlign", + value = "Boolean to indicate whether input ambiguous INDELS should be " + + "left aligned or not as part of the normalisation step.", + allowableValues = "false,true", + defaultValue = "false") Boolean leftAlign) { try { - CellBaseDataResult queryResults = variantManager.getNormalizationByVariant(id, getDataRelease()); + CellBaseDataResult queryResults = variantManager.getNormalizationByVariant(id, Boolean.TRUE.equals(decompose), + Boolean.TRUE.equals(leftAlign), getDataRelease()); return createOkResponse(queryResults); } catch (Exception e) { return createErrorResponse(e); @@ -136,15 +149,20 @@ public Response getAnnotationByVariantsPOST(@ApiParam(name = "variants", value = @ApiParam(name = "normalize", value = "Boolean to indicate whether input variants shall be " + "normalized or not. Normalization process does NOT " - + "include decomposing ", allowableValues = "false,true", - defaultValue = "false", required = false) Boolean normalize, - @QueryParam("skipDecompose") - @ApiParam(name = "skipDecompose", + + "include decomposing MNV nor left alignment", + allowableValues = "false,true", defaultValue = "false") Boolean normalize, + @QueryParam("decompose") + @ApiParam(name = "decompose", value = "Boolean to indicate whether input MNVs should be " - + "decomposed or not as part of the normalisation step." - + " MNV decomposition is strongly encouraged.", + + "decomposed or not as part of the normalisation step.", + allowableValues = "false,true", + defaultValue = "false") Boolean decompose, + @QueryParam("leftAlign") + @ApiParam(name = "leftAlign", + value = "Boolean to indicate whether input ambiguous INDELS should be " + + "left aligned or not as part of the normalisation step.", allowableValues = "false,true", - defaultValue = "false", required = false) Boolean skipDecompose, + defaultValue = "false") Boolean leftAlign, @QueryParam("ignorePhase") @ApiParam(name = "ignorePhase", value = "Boolean to indicate whether phase data should be " @@ -180,7 +198,7 @@ public Response getAnnotationByVariantsPOST(@ApiParam(name = "variants", value = @ApiParam(name = "checkAminoAcidChange", value = "true/false to specify whether variant match in the clinical variant" + " collection should also be performed at the aminoacid change level", - allowableValues = "false,true", + allowableValues = "false,true", defaultValue = "false", required = false) Boolean checkAminoAcidChange, @QueryParam("consequenceTypeSource") @ApiParam(name = "consequenceTypeSource", value = "Gene set, either ensembl (default) " @@ -188,9 +206,17 @@ public Response getAnnotationByVariantsPOST(@ApiParam(name = "variants", value = required = false) String consequenceTypeSource ) { + try { + checkNormalizationConfig(); + } catch (IllegalArgumentException e) { + return createErrorResponse(e); + } + + return getAnnotationByVariant(variants, normalize, - skipDecompose, + decompose, + leftAlign, ignorePhase, phased, imprecise, @@ -231,10 +257,14 @@ public Response getAnnotationByVariantsGET(@PathParam("variants") @ApiParam(name = "normalize", value = NORMALISE, allowableValues = "false,true", defaultValue = "true", required = false) Boolean normalize, - @QueryParam("skipDecompose") - @ApiParam(name = "skipDecompose", value = SKIP_DECOMPOSE, + @QueryParam("decompose") + @ApiParam(name = "decompose", value = DECOMPOSE, allowableValues = "false,true", - defaultValue = "false", required = false) Boolean skipDecompose, + defaultValue = "false") Boolean decompose, + @QueryParam("leftAlign") + @ApiParam(name = "leftAlign", value = LEFT_ALIGN, + allowableValues = "false,true", + defaultValue = "false") Boolean leftAlign, @QueryParam("ignorePhase") @ApiParam(name = "ignorePhase", value = IGNORE_PHASE, allowableValues = "false,true", @@ -265,10 +295,18 @@ public Response getAnnotationByVariantsGET(@PathParam("variants") @ApiParam(name = "consequenceTypeSource", value = "Gene set, either ensembl (default) " + "or refseq", allowableValues = "ensembl,refseq", allowMultiple = true, defaultValue = "ensembl", required = false) String consequenceTypeSource + ) { + try { + checkNormalizationConfig(); + } catch (IllegalArgumentException e) { + return createErrorResponse(e); + } + return getAnnotationByVariant(variants, normalize, - skipDecompose, + decompose, + leftAlign, ignorePhase, phased, imprecise, @@ -278,9 +316,26 @@ public Response getAnnotationByVariantsGET(@PathParam("variants") consequenceTypeSource); } + private void checkNormalizationConfig() throws IllegalArgumentException { + if (uriParams.containsKey("skipDecompose")) { + throw new IllegalArgumentException("Param 'skipDecompose' is not supported anymore. Please, use 'decompose' instead"); + } + if (uriParams.containsKey("normalize")) { + if (!Boolean.parseBoolean(uriParams.get("normalize"))) { + if (uriParams.containsKey("decompose") && Boolean.parseBoolean(uriParams.get("decompose"))) { + throw new IllegalArgumentException("Incompatible parameter usage: 'normalize'=false and 'decompose'=true"); + } + if (uriParams.containsKey("leftAlign") && Boolean.parseBoolean(uriParams.get("leftAlign"))) { + throw new IllegalArgumentException("Incompatible parameter usage: 'normalize'=false and 'leftAlign'=true"); + } + } + } + } + private Response getAnnotationByVariant(String variants, Boolean normalize, - Boolean skipDecompose, + Boolean decompose, + Boolean leftAlign, Boolean ignorePhase, @Deprecated Boolean phased, Boolean imprecise, @@ -294,8 +349,9 @@ private Response getAnnotationByVariant(String variants, String consequenceTypeSources = (StringUtils.isEmpty(uriParams.get("consequenceTypeSource")) ? consequenceTypeSource : uriParams.get("consequenceTypeSource")); List> queryResults = variantManager.getAnnotationByVariant(query.toQueryOptions(), - variants, normalize, skipDecompose, ignorePhase, phased, imprecise, svExtraPadding, cnvExtraPadding, + variants, normalize, decompose, leftAlign, ignorePhase, phased, imprecise, svExtraPadding, cnvExtraPadding, checkAminoAcidChange, consequenceTypeSources, getDataRelease(), getToken()); + return createOkResponse(queryResults); } catch (Exception e) { return createErrorResponse(e); diff --git a/pom.xml b/pom.xml index 65383a0679..3096613ac3 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.5.0 + 5.7.0-SNAPSHOT pom CellBase project @@ -23,8 +23,8 @@ ${project.version} - 4.9.0 - 2.9.0 + 4.11.0-SNAPSHOT + 2.11.0-SNAPSHOT 0.1.0 2.11.4 2.30.1 @@ -339,6 +339,20 @@ maven-site-plugin 3.7.1 + + org.codehaus.mojo + build-helper-maven-plugin + 3.2.0 + + + parse-version + initialize + + parse-version + + + + @@ -427,6 +441,27 @@ true + v${parsedVersion.majorVersion} + cellbase-${project.version} + ${project.basedir}/../build + localhost:27017 + cellbase + cellbase + admin + SCRAM-SHA-256 + secondaryPreferred + 9090 + + + + + + default-config-test + + false + + + v${parsedVersion.majorVersion}.${parsedVersion.minorVersion} cellbase-${project.version} ${project.basedir}/../build localhost:27017