diff --git a/.github/workflows/thefirst.yml b/.github/workflows/thefirst.yml index 0c7ae74..ea58134 100644 --- a/.github/workflows/thefirst.yml +++ b/.github/workflows/thefirst.yml @@ -11,7 +11,7 @@ jobs: matrix: # os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] # align-it in conda is only for linux 64 bit https://anaconda.org/bioconda/align_it/files - os: ['ubuntu-16.04', 'ubuntu-18.04', 'ubuntu-20.04'] + os: ['ubuntu-18.04', 'ubuntu-20.04'] runs-on: ${{ matrix.os }} steps: - name: Checkout diff --git a/annapurna.py b/annapurna.py index 3e2ce98..883c599 100755 --- a/annapurna.py +++ b/annapurna.py @@ -1266,7 +1266,7 @@ def scoreLoop(infile, outputFilename, models, sdffile, ClusteringMethod, Cluster '''For each supplied model in models[] loop over data form infile, predict probabilities and save it in outputFilename ''' printMemInfo() print info.info + "Reading statistics file..." - statComplex = pd.read_csv(infile, delimiter="\t") + statComplex = pd.read_csv(infile, delimiter="\t", dtype={"compound": 'str'}) printMemInfo() print info.info + "Grouping..." statComplexGrouped = statComplex.groupby(['base', 'at2', 'atom_type']) @@ -1274,7 +1274,7 @@ def scoreLoop(infile, outputFilename, models, sdffile, ClusteringMethod, Cluster printMemInfo() print info.info + "Reading Energy data..." - E_ligand = pd.read_csv(outputFilename + ".ligand_energy.csv.bz2", delimiter=",") + E_ligand = pd.read_csv(outputFilename + ".ligand_energy.csv.bz2", delimiter=",", dtype={"compound": 'str'}) for modelName in models: @@ -1289,7 +1289,7 @@ def scoreLoop(infile, outputFilename, models, sdffile, ClusteringMethod, Cluster score(statComplexGrouped, modelName, tempFilename = outputFilename + ".scores.tmp") # WARNING - tu może zrobić chunks? - output = pd.read_csv(outputFilename + ".scores.tmp", delimiter=",") + output = pd.read_csv(outputFilename + ".scores.tmp", delimiter=",", dtype={"compound": 'str'}) # address the issue #3 os.remove(outputFilename + ".scores.tmp") # delete the temp file @@ -1529,7 +1529,7 @@ def mergeOutputFiles(outputFilename, models, groupByName = False): for modelName in models: outfile = outputFilename + "." + modelName + ".csv" print info.info + "Processing file:", outfile - data = pd.read_csv(outfile, delimiter="\t") + data = pd.read_csv(outfile, delimiter="\t", dtype={"compound": 'str'}) data = data.ix[:, ['compoundId', 'compound', 'score'] ] # last number - the column with the actual score data = data.rename(columns={'score': 'AnnapuRNA Score:' + modelName}) @@ -1543,7 +1543,7 @@ def mergeOutputFiles(outputFilename, models, groupByName = False): if groupByName == True: outfileGrouped = outputFilename + "." + modelName + ".grouped.csv" print info.info + "Processing file:", outfileGrouped - dataGrouped = pd.read_csv(outfileGrouped, delimiter="\t") + dataGrouped = pd.read_csv(outfileGrouped, delimiter="\t", dtype={"compound": 'str'}) dataGrouped = dataGrouped.ix[:, ['compoundId', 'compound', 'score'] ] dataGrouped = dataGrouped.reindex_axis(['compound', 'compoundId', 'score'], axis=1)