Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prioritize scanning of projects #472

Merged
merged 1 commit into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion lib/MirrorCache/Schema/ResultSet/Project.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,22 @@ use strict;
use warnings;

use base 'DBIx::Class::ResultSet';
use Mojo::File qw(path);

sub mark_scheduled {
my ($self, $project_id) = @_;

my $rsource = $self->result_source;
my $schema = $rsource->schema;
my $dbh = $schema->storage->dbh;

my $sql = << "END_SQL";
update project
set db_sync_last = CURRENT_TIMESTAMP(3)
where id = ?
END_SQL
my $prep = $dbh->prepare($sql);
$prep->execute($project_id);
}


1;
43 changes: 36 additions & 7 deletions lib/MirrorCache/Task/FolderSync.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ sub register {
}

sub _sync {
my ($app, $job, $path) = @_;
my ($app, $job, $path, $recurs) = @_;
return $job->fail('Empty path is not allowed') unless $path;
return $job->fail('Trailing slash is forbidden') if '/' eq substr($path,-1) && $path ne '/';

Expand Down Expand Up @@ -100,15 +100,26 @@ sub _sync {
}
};

my @subfolders;

if ($folder) {
$update_db_last->();
} else {
my $count = 0;
my $sub = sub {
my ($file, $size, $mmode, $mtime, $target) = @_;
$file = $file . '/' if !$root->is_remote && $path ne '/' && $root->is_dir("$path/$file");
$file = $file . '/' if !$root->is_remote && $path eq '/' && $root->is_dir("$path$file");
$file = $file . '/' if $mmode && $root->is_remote && $mmode < 1000;
my $subfolder;
if ($root->is_remote) {
$subfolder = 1 if $mmode && $mmode < 1000;
} else {
if ($path eq '/') {
$subfolder = 1 if $root->is_dir("$path$file");
} else {
$subfolder = 1 if $root->is_dir("$path/$file");
}
}
push @subfolders, $file if $subfolder && $recurs;
$file = $file . '/' if $subfolder;
$count = $count+1;
$schema->resultset('File')->create({folder_id => $folder->id, name => $file, size => $size, mtime => $mtime, target => $target});
$obsrelease->next_file($file, $mtime) if $obsrelease;
Expand Down Expand Up @@ -138,6 +149,10 @@ sub _sync {
}
$schema->resultset('Folder')->request_scan($otherFolder->id) if $otherFolder && ($count || !$otherFolder->scan_requested);
$schema->resultset('Rollout')->add_rollout($proj->{project_id}, $obsrelease->versionmtime, $obsrelease->version, $obsrelease->versionfilename, $proj_prefix) if $obsrelease && $obsrelease->versionfilename;

for my $subfolder (@subfolders) {
$minion->enqueue('folder_sync' => ["$path/$subfolder"]);
}
return;
};
return $job->fail("Couldn't create folder $path in DB") unless $folder && $folder->id;
Expand All @@ -160,9 +175,18 @@ sub _sync {
my $cnt = 0, my $updated = 0;
my $sub = sub {
my ($file, $size, $mmode, $mtime, $target) = @_;
$file = $file . '/' if !$root->is_remote && $path ne '/' && $root->is_dir("$path/$file");
$file = $file . '/' if !$root->is_remote && $path eq '/' && $root->is_dir("$path$file");
$file = $file . '/' if $mmode && $root->is_remote && $mmode < 1000;
my $subfolder;
if ($root->is_remote) {
$subfolder = 1 if $mmode && $mmode < 1000;
} else {
if ($path eq '/') {
$subfolder = 1 if $root->is_dir("$path$file");
} else {
$subfolder = 1 if $root->is_dir("$path/$file");
}
}
push @subfolders, $file if $subfolder && $recurs;
$file = $file . '/' if $subfolder;
if ($dbfileids{$file}) {
my $id = delete $dbfileidstodelete{$file};
if (
Expand Down Expand Up @@ -226,6 +250,11 @@ sub _sync {
$otherFolder->update({sync_last => \"CURRENT_TIMESTAMP(3)", sync_scheduled => \'coalesce(sync_scheduled, CURRENT_TIMESTAMP(3))'}) if $otherFolder;
}
$schema->resultset('Rollout')->add_rollout($proj->{project_id}, $obsrelease->versionmtime, $obsrelease->version, $obsrelease->versionfilename, $proj_prefix) if $obsrelease && $obsrelease->versionfilename;


for my $subfolder (@subfolders) {
$minion->enqueue('folder_sync' => ["$path/$subfolder"]);
}
}

1;
37 changes: 37 additions & 0 deletions lib/MirrorCache/Task/MirrorScanSchedule.pm
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,14 @@ sub register {
}

my $RESCAN = int($ENV{MIRRORCACHE_RESCAN_INTERVAL} // 24 * 60 * 60);
my $PROJECT_RESCAN = int($ENV{MIRRORCACHE_PROJECT_RESCAN_INTERVAL} // 4 * 60 * 60);

my $DELAY = int($ENV{MIRRORCACHE_SCHEDULE_RETRY_INTERVAL} // 10);
$DELAY = $DELAY+1 if $DELAY; # period should differ from the same in FolderScanSchedule to avoid deadlocks

my $EXPIRE = int($ENV{MIRRORCACHE_SCHEDULE_EXPIRE_INTERVAL} // 14 * 24 * 60 * 60);
my $PROJECT_EXPIRE = int($ENV{MIRRORCACHE_SCHEDULE_EXPIRE_INTERVAL} // 2 * 24 * 60 * 60);

my $RECKLESS=int($ENV{MIRRORCACHE_RECKLESS} // 0);

$RESCAN=0 if $RECKLESS;
Expand Down Expand Up @@ -57,6 +62,21 @@ sub _run {
my @folders;

if ($schema->pg) {
# prioritize folders belonging to projects
$schema->storage->dbh->prepare(
"update folder set scan_requested = CURRENT_TIMESTAMP(3) where id in
(
select folder.id from folder
join project on folder.path like concat(project.path, '%')
where scan_requested < now() - interval '$PROJECT_RESCAN second' and
scan_requested < scan_scheduled and
wanted > now() - interval '$PROJECT_EXPIRE second'
and project.db_sync_every > 0
order by scan_requested limit 20
)"
)->execute();

# now the rest
$schema->storage->dbh->prepare(
"update folder set scan_requested = CURRENT_TIMESTAMP(3) where id in
(
Expand All @@ -75,6 +95,23 @@ if ($schema->pg) {
rows => $limit
});
} else {
# prioritize folders belonging to projects
$schema->storage->dbh->prepare(
"update folder f
join
(
select folder.id from folder
join project on folder.path like concat(project.path, '%')
where scan_requested < date_sub(CURRENT_TIMESTAMP(3), interval $PROJECT_RESCAN second) and
scan_requested < scan_scheduled and
wanted > date_sub(CURRENT_TIMESTAMP(3), interval $PROJECT_EXPIRE second)
and project.db_sync_every > 0
order by scan_requested limit 20
) x ON x.id = f.id
set scan_requested = CURRENT_TIMESTAMP(3)"
)->execute();

# now the rest
$schema->storage->dbh->prepare(
"update folder f
join
Expand Down
74 changes: 74 additions & 0 deletions lib/MirrorCache/Task/ProjectSyncSchedule.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright (C) 2024 SUSE LLC
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, see <http://www.gnu.org/licenses/>.

package MirrorCache::Task::ProjectSyncSchedule;
use Mojo::Base 'Mojolicious::Plugin';

use DateTime;

sub register {
my ($self, $app) = @_;
$app->minion->add_task(project_sync_schedule => sub { _run($app, @_) });
}

my $RESYNC = int($ENV{MIRRORCACHE_RESYNC_INTERVAL} // 24 * 60 * 60);
my $DELAY = int($ENV{MIRRORCACHE_SCHEDULE_RETRY_INTERVAL} // 10);
my $EXPIRE = int($ENV{MIRRORCACHE_SCHEDULE_EXPIRE_INTERVAL} // 14 * 24 * 60 * 60);
my $RECKLESS=int($ENV{MIRRORCACHE_RECKLESS} // 0);

$RESYNC=0 if $RECKLESS;

sub _run {
my ($app, $job, $once) = @_;

my $minion = $app->minion;

# prevent multiple scheduling tasks to run in parallel
return $job->finish('Previous project sync schedule job is still active')
unless my $guard = $minion->guard('_project_sync_schedule', 60);

my $schema = $app->schema;
my $limit = 100;

my @projects;
my $rs = $schema->resultset('Project');

my $columns = [ \"now() <= coalesce(me.db_sync_last + interval me.db_sync_every hour, now())" ];
$columns = [ \"now() <= coalesce(me.db_sync_last + me.db_sync_every * interval '1 hour', now())" ] if $schema->pg;

@projects = $rs->search({
db_sync_every => { '>', 0 },
}, {
'+select' => $columns,
'+as' => [ 'needsync' ],
});

my $cnt = 0;

for my $project (@projects) {
my $needsync = $project->get_column('needsync');
next unless $needsync;
$rs->mark_scheduled($project->id);
$minion->enqueue('folder_sync' => [$project->path, 1] => {priority => 2} => {notes => {$project->path => 1}} );
$cnt++;
}
$job->note(count => $cnt);

return $job->finish unless $DELAY;
return $job->finish if $once && $once eq 'once';
return $job->retry({delay => $DELAY});
}

1;
3 changes: 2 additions & 1 deletion lib/MirrorCache/WebAPI/Plugin/Backstage.pm
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ sub new {
}

my @permanent_jobs =
qw(folder_sync_schedule_from_misses folder_sync_schedule mirror_scan_schedule_from_misses mirror_scan_schedule_from_path_errors mirror_scan_schedule cleanup stat_agg_schedule mirror_check_from_stat report);
qw(folder_sync_schedule_from_misses folder_sync_schedule mirror_scan_schedule_from_misses mirror_scan_schedule_from_path_errors mirror_scan_schedule project_sync_schedule cleanup stat_agg_schedule mirror_check_from_stat report);

sub register_tasks {
my $self = shift;
Expand Down Expand Up @@ -64,6 +64,7 @@ sub register_tasks {
qw(MirrorCache::Task::FolderSyncSchedule),
qw(MirrorCache::Task::FolderSync),
qw(MirrorCache::Task::FolderTree),
qw(MirrorCache::Task::ProjectSyncSchedule),
qw(MirrorCache::Task::Cleanup),
qw(MirrorCache::Task::Report),
qw(MirrorCache::Task::StatAggSchedule),
Expand Down
54 changes: 54 additions & 0 deletions t/environ/14-project-sync.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!lib/test-in-container-environ.sh
set -ex

mc=$(environ mc $(pwd))

$mc/start

ap8=$(environ ap8)
ap7=$(environ ap7)
ap6=$(environ ap6)
ap5=$(environ ap5)
ap4=$(environ ap4)

for x in $mc $ap7 $ap8 $ap6 $ap5 $ap4; do
mkdir -p $x/dt/{folder1,folder2,folder3}
mkdir -p $x/dt/project1/{folder1,folder2,folder3}
mkdir -p $x/dt/project2/{folder1,folder2,folder3}
mkdir -p $x/dt/project3/{folder1,folder2,folder3}
echo $x/dt/{folder1,folder2,folder3}/{file1.1,file2.1}.dat | xargs -n 1 touch
echo $x/dt/project1/{folder1,folder2,folder3}/{file1.1,file2.1}.dat | xargs -n 1 touch
echo $x/dt/project2/{folder1,folder2,folder3}/{file1.1,file2.1}.dat | xargs -n 1 touch
echo $x/dt/project3/{folder1,folder2,folder3}/{file1.1,file2.1}.dat | xargs -n 1 touch
done

$ap4/start
$ap5/start
$ap6/start
$ap7/start
$ap8/start

# remove a file from ap7
rm $ap7/dt/project1/folder2/file2.1.dat
rm -r $ap5/dt/project1/folder2/
rm -r $ap4/dt/project1/

$mc/sql "insert into server(hostname,urldir,enabled,country,region) select '$($ap6/print_address)','','t','us','na'"
$mc/sql "insert into server(hostname,urldir,enabled,country,region) select '$($ap7/print_address)','','t','us','na'"
$mc/sql "insert into server(hostname,urldir,enabled,country,region) select '$($ap8/print_address)','','t','de','eu'"
$mc/sql "insert into server(hostname,urldir,enabled,country,region) select '$($ap5/print_address)','','t','cn','as'"
$mc/sql "insert into server(hostname,urldir,enabled,country,region) select '$($ap4/print_address)','','t','jp','as'"

$mc/sql "insert into project(name,path,db_sync_every) select 'proj1','/project1', 1"
$mc/sql "insert into project(name,path,db_sync_every) select 'proj 2','/project2', 0"
$mc/sql "insert into project(name,path,db_sync_every,db_sync_last) select 'proj 3','/project3', 1, now() - interval '1 hour'"

$mc/backstage/job -e project_sync_schedule -a '["once"]'
$mc/backstage/shoot
$mc/backstage/job -e mirror_scan_schedule -a '["once"]'
$mc/backstage/shoot

$mc/curl -I /download/project1/folder1/file1.1.dat?COUNTRY=jp
$mc/curl -I /download/project1/folder1/file1.1.dat?COUNTRY=jp | grep '302 Found'

echo success
Loading