Skip to content

Commit

Permalink
Add --dump-as-json pubmed_util.pl for debugging
Browse files Browse the repository at this point in the history
Refs #1038
  • Loading branch information
kimrutherford committed Dec 14, 2023
1 parent 71ae2a7 commit d942a97
Showing 1 changed file with 28 additions and 9 deletions.
37 changes: 28 additions & 9 deletions script/pubmed_util.pl
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@
use File::Basename;
use lib qw(lib);
use GDBM_File;
use Storable qw(thaw);
use JSON;

use PomBase::Config;
use PomBase::Chado;
use PomBase::Chado::PubmedUtil;

my $dry_run = 0;
my $do_fields = 0;
my $dump_json = 0;
my $do_help = 0;
my $verbose = 0;

Expand All @@ -32,42 +35,58 @@

if (!GetOptions("dry-run|d" => \$dry_run,
"add-missing-fields|f" => \$do_fields,
"dump-as-json:s" => \$dump_json,
"help|h" => \$do_help,
"verbose|v" => \$verbose)) {
usage();
}

sub usage
{
die "$0: needs six arguments:
<config_file> <host> <database> <user> <password> --add-missing-fields
die "usage:
$0 <config_file> <host> <database> <user> <password> <options> --add-missing-fields
OR
$0 <config_file> <host> <database> <user> <password> <options> --dump-as-json <pmid_id>
options:
--add-missing-fields (or -f): access pubmed to add missing title, abstract,
authors, etc. to publications in the publications table (pub)
--dump-as-json: write the details about <pmid_id> as JSON to stdout
";
}

if ($do_help || !$do_fields || @ARGV > 0) {
if ($do_help || !($do_fields || $dump_json) || @ARGV > 0) {
usage();
}

my $chado = PomBase::Chado::db_connect($host, $database, $username, $password);

my $guard = $chado->txn_scope_guard();

my $result = GetOptions ("add-missing-fields|f" => \$do_fields,
"help|h" => \$do_help);
tie my %pubmed_cache, 'GDBM_File', 'pubmed_cache.gdbm', &GDBM_WRCREAT, 0640;

if ($do_fields) {
tie my %pubmed_cache, 'GDBM_File', 'pubmed_cache.gdbm', &GDBM_WRCREAT, 0640;
my $guard = $chado->txn_scope_guard();

my $pubmed_util = PomBase::Chado::PubmedUtil->new(chado => $chado, config => $config,
pubmed_cache => \%pubmed_cache);
my ($missing_count, $loaded_count) = $pubmed_util->add_missing_fields();

print "$missing_count publications have missing fields\n";
print "details added for $loaded_count publications\n";

$guard->commit() unless $dry_run;
}

$guard->commit() unless $dry_run;
if ($dump_json) {
my $uniquename = $dump_json;
my $raw_cached = $pubmed_cache{$uniquename};
if (defined $raw_cached) {
my $pub_details = thaw($raw_cached);

my $json = JSON->new()->allow_nonref();

print $json->encode($pub_details);
} else {
warn "$uniquename is not cached\n";
}
}

0 comments on commit d942a97

Please sign in to comment.