From 1ed3a246b6df99bfba5ff4eb745d0433149f04f3 Mon Sep 17 00:00:00 2001 From: ursinedeity Date: Tue, 14 Mar 2017 15:14:01 -0700 Subject: [PATCH] update cooler file support (100kb) --- alab/matrix.py | 26 ++++++++++++++++++++++++++ pgsflows/script/buildTADMap.py | 2 ++ 2 files changed, 28 insertions(+) diff --git a/alab/matrix.py b/alab/matrix.py index 9b21eec..dae0a34 100644 --- a/alab/matrix.py +++ b/alab/matrix.py @@ -782,3 +782,29 @@ def loadhic(filename,genome='hg19',resolution=100000,usechr=['#','X'],verbose=Fa #-- return m + +def loadcooler(filename,usechr=['#','X'],verbose=False): + import scipy.sparse + + h5 = h5py.File(filename,'r') + genome = str(h5.attrs['genome-assembly']) + resolution = h5.attrs['bin-size'] + nbins = h5.attrs['nbins'] + + if verbose: + print genome, resolution + + tgenome = utils.genome(genome) + bininfo = tgenome.bininfo(resolution) + sp = scipy.sparse.csr_matrix((h5['pixels']['count'],h5['pixels']['bin2_id'],h5['indexes']['bin1_offset']),shape=(nbins,nbins)) + + m = contactmatrix(len(bininfo.chromList),genome=genome,resolution=resolution,usechr=usechr) + m.matrix = sp.toarray()[:len(m.idx),:len(m.idx)].astype(np.float32) + h5.close() + + t = m.matrix.diagonal().copy() + m.matrix[np.diag_indices(len(m.idx))] = 0 + m.matrix += m.matrix.T + m.matrix[np.diag_indices(len(m.idx))] = t + + return m diff --git a/pgsflows/script/buildTADMap.py b/pgsflows/script/buildTADMap.py index b775396..340522e 100644 --- a/pgsflows/script/buildTADMap.py +++ b/pgsflows/script/buildTADMap.py @@ -48,6 +48,8 @@ def main(matrixfile, domainfile, outputfile, genome, resolution) : #fileFormat): raise IOError,"File %s doesn't exist!\n" % (matrixfile) if os.path.splitext(matrixfile)[1] == '.hic': m = alab.matrix.loadhic(matrixfile,genome=genome,resolution=resolution) + elif os.path.splitext(matrixfile)[1] == '.cool': + m = alab.matrix.loadcooler(matrixfile) else: m = alab.matrix.contactmatrix(matrixfile, genome=genome, resolution=resolution ) #m = None