From 9385abd464ea19e2936b27fb4e01dc4afcd84418 Mon Sep 17 00:00:00 2001 From: Christopher Dilks Date: Tue, 7 Jan 2025 19:35:16 -0500 Subject: [PATCH] ci: add more tests (#65) --- .github/workflows/ci.yml | 42 +++++++++++---------- util/chargeSumRuns.groovy | 2 +- util/syncCheck.groovy | 42 --------------------- util/syncCheck.rb | 77 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 62 deletions(-) delete mode 100644 util/syncCheck.groovy create mode 100755 util/syncCheck.rb diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03ee898..cc784fa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,20 +28,16 @@ jobs: id: datasets working-directory: qadb run: | - ls -d pass*/* | jq -Rs '{"dataset": split("\n")[:-1]}' > list.json - echo "### List of Datasets" >> $GITHUB_STEP_SUMMARY - echo '```json' >> $GITHUB_STEP_SUMMARY - cat list.json | xargs -0 -I{} echo {} >> $GITHUB_STEP_SUMMARY - echo '```' >> $GITHUB_STEP_SUMMARY - echo datasets=$(jq -c . list.json) >> $GITHUB_OUTPUT + ls -d pass*/* | jq -Rs '{"dataset": split("\n")[:-1]}' | tee datasets.json + echo datasets=$(jq -c . datasets.json) >> $GITHUB_OUTPUT # check consistency between Groovy and C++ APIs - groovy_vs_cpp: + test_dataset: needs: - get_datasets runs-on: ubuntu-latest strategy: - fail-fast: true + fail-fast: false matrix: ${{ fromJson(needs.get_datasets.outputs.datasets) }} steps: - name: checkout @@ -57,21 +53,29 @@ jobs: source environ.sh echo "QADB=${QADB}" >> $GITHUB_ENV echo "JYPATH=${JYPATH}" >> $GITHUB_ENV - - name: compile_cpp_tests + - name: make sure all the table files exist # since pre-commit.ci auto-fix bot will not `git add` *new* files, we fallback to checking their existence here + working-directory: qadb/${{ matrix.dataset }} run: | - cd srcC/tests - make - - name: test_diff_groovy_cpp + for f in qaTree.json.table miscTable.md; do + if [ ! -f $f ]; then + echo "missing table file '$f'; run pre-commit hook manually and commit new file(s)" + exit 1 + fi + done + - name: sync check + run: stdbuf -i0 -e0 -o0 util/syncCheck.rb ${{ matrix.dataset }} + - name: diff the Groovy and C++ QADB dumps run: | - tests/test_diffGroovyCpp.loop.sh ${{matrix.dataset}} - - name: concatenate_artifacts + make -C srcC/tests + tests/test_diffGroovyCpp.loop.sh ${{ matrix.dataset }} + - name: concatenate artifacts id: artifacts run: | - mkdir -p artifacts/${{matrix.dataset}} + mkdir -p artifacts/${{ matrix.dataset }} for lang in cpp groovy ; do - cat tmp/${lang}*.out > artifacts/${{matrix.dataset}}/${lang}.txt ; + cat tmp/${lang}*.out > artifacts/${{ matrix.dataset }}/${lang}.txt ; done - echo "artifact_name=groovy_vs_cpp__$(echo ${{matrix.dataset}} | sed 's;/;_;g')" | tee -a $GITHUB_OUTPUT + echo "artifact_name=groovy_vs_cpp__$(echo ${{ matrix.dataset }} | sed 's;/;_;g')" | tee -a $GITHUB_OUTPUT - name: upload_artifacts uses: actions/upload-artifact@v4 with: @@ -79,10 +83,10 @@ jobs: retention-days: 3 path: artifacts/* - # report status for github status check (successful only if all `groovy_vs_cpp` jobs pass) + # report status for github status check (successful only if all `test_dataset` jobs pass) report: runs-on: ubuntu-latest needs: - - groovy_vs_cpp + - test_dataset steps: - run: echo success diff --git a/util/chargeSumRuns.groovy b/util/chargeSumRuns.groovy index 90c3017..e4c0639 100644 --- a/util/chargeSumRuns.groovy +++ b/util/chargeSumRuns.groovy @@ -1,6 +1,6 @@ // calculate total analyzed charge for a run period, // with specified QA cuts enabled -// note: if syncCheck.groovy errors are present in the run range, +// note: if syncCheck.rb errors are present in the run range, // the final charge value might be a bit wrong... import org.jlab.io.hipo.HipoDataSource diff --git a/util/syncCheck.groovy b/util/syncCheck.groovy deleted file mode 100644 index a396ca6..0000000 --- a/util/syncCheck.groovy +++ /dev/null @@ -1,42 +0,0 @@ -// highlights where we have a QADB syncing problem: -// eventnumMax of DST file N is larger than eventnumMin of DST file N+1 -// (more correctly, of N+5, since these are 5-files) - - -// open QADB -import clasqa.QADB -QADB qa = new QADB("latest") - - -def printSep - -// loop through qaTree runs (sorted by run number) -qa.getQaTree().sort{a,b -> - a.key.toInteger() <=> b.key.toInteger() -}.each{ runnum,runTree -> - printSep = false - - // loop through run's files (sorted by file number) - runTree.sort{c,d -> - c.key.toInteger() <=> d.key.toInteger() - }.each { filenum,fileTree -> - def evnumMin = fileTree['evnumMin'] - def evnumMax = fileTree['evnumMax'] - //println "CHECK: $runnum $filenum $evnumMin $evnumMax" - def filenumNxt = filenum.toInteger() + 5 - if(runTree["$filenumNxt"]!=null) { - def fileTreeNxt = runTree["$filenumNxt"] - def evnumMinNxt = fileTreeNxt['evnumMin'] - def evnumMaxNxt = fileTreeNxt['evnumMax'] - if( evnumMax > evnumMinNxt) { - def overlap = evnumMax - evnumMinNxt - println "SYNC ERROR: runnum=$runnum" - println " file $filenum\tevnumMin = $evnumMin\tevnumMax = $evnumMax" - println " file $filenumNxt\tevnumMin = $evnumMinNxt\tevnumMax = $evnumMaxNxt" - println " overlap = $overlap" - printSep = true - } - } - } - if(printSep) println "==================================" -} diff --git a/util/syncCheck.rb b/util/syncCheck.rb new file mode 100755 index 0000000..8049f0e --- /dev/null +++ b/util/syncCheck.rb @@ -0,0 +1,77 @@ +#!/usr/bin/env ruby +# highlights where we have a QADB syncing problem: +# eventnumMax of bin N is larger than eventnumMin of bin N+1 + +require 'json' + +if ARGV.empty? + puts "USAGE: #{$0} [dataset]" + exit(2) +end +raise 'source environment variables first' if ENV['QADB'].nil? +dataset = ARGV.first + +qa_tree_file = File.join ENV['QADB'], 'qadb', dataset.split('/'), 'qaTree.json' +raise "#{qa_tree_file} does not exist" unless File.exist? qa_tree_file +qa_tree = JSON.load_file qa_tree_file + +# true if any issue found +found_issue_anywhere = false + +# loop over runs +qa_tree.sort{ |a,b| a.first.to_i <=> b.first.to_i }.each do |runnum, run_tree| + + binnum_prev = nil + found_issue_in_run = false + + # loop over QA bins + run_tree.sort{ |a,b| a.first.to_i <=> b.first.to_i }.each do |binnum, bin_tree| + + # check that min event number < max + evnumMin = bin_tree['evnumMin'] + evnumMax = bin_tree['evnumMax'] + if evnumMin > evnumMax + $stderr.puts "ERROR run=#{runnum} bin=#{binnum}: evnumMin > evnumMax: #{evnumMin} > #{evnumMax}" + found_issue_anywhere = true + found_issue_in_run = true + end + + # check for overlap with previous bin + unless binnum_prev.nil? + evnumMin_prev = run_tree[binnum_prev]['evnumMin'] + evnumMax_prev = run_tree[binnum_prev]['evnumMax'] + if evnumMax_prev > evnumMin + $stderr.puts """SYNC ERROR: run=#{runnum} bin=#{binnum}: previous bin's evnumMax > this bin's evnumMin: + prev bin: binnum=#{binnum_prev}\tevnumMin=#{evnumMin_prev}\tevnumMax=#{evnumMax_prev} + this bin: binnum=#{binnum}\tevnumMin=#{evnumMin}\tevnumMax=#{evnumMax}""" + found_issue_anywhere = true + found_issue_in_run = true + end + end + + binnum_prev = binnum + + end + + puts "RUN #{runnum} #{found_issue_in_run ? "has issues (see stderr)" : "is okay"}" +end + +if found_issue_anywhere + if [ # datasets which used DST 5-files as QA bins + "pass1/rga_fa18_inbending", + "pass1/rga_fa18_outbending", + "pass1/rga_sp19", + "pass1/rgb_fa19", + "pass1/rgb_sp19", + "pass1/rgb_wi20", + "pass1/rgk_fa18_6.5GeV", + "pass1/rgk_fa18_7.5GeV", + "pass1/rgm_fa21", + ].include? dataset + $stderr.puts "WARNING: this dataset was done using DST 5-files as QA bins, which inherently causes SYNC ERRORS; now exitting with 0" + exit 0 + else + $stderr.puts "ERROR: this dataset should not have any errors" + exit 1 + end +end