Source code for rnaglib.config.build_iso_mat

import os
import sys
import numpy as np

script_dir = os.path.dirname(os.path.realpath(__file__))
if __name__ == "__main__":
    sys.path.append(os.path.join(script_dir, '..', '..'))

from rnaglib.config.graph_keys import EDGE_MAP_RGLIB

s = """
,CHH,TWH,CWW,THS,CWS,CSS,CWH,CHS,TWS,TSS,TWW,THH,B53
CHH,8.9,12,14.7,14,13.7,12.7,15.1,14.7,16.2,16.6,16.2,14,19
TWH,12,2.6,10.6,9.7,14.3,15.6,11.2,15.2,13.8,15.4,11.9,11.4,19
CWW,14.7,10.6,4.1,8.2,9.2,13.1,14.5,16,12.4,11.3,11.1,15.5,19
THS,14,9.7,8.2,2.1,7,12.7,12,12.1,10,11.9,13.1,15.8,19
CWS,13.7,14.3,9.2,7,3.5,7.4,14.9,12.3,10.9,10.8,14.6,17.7,19
CSS,12.7,15.6,13.1,12.7,7.4,1.3,15.8,12.9,13.8,12,17.1,19,19
CWH,15.1,11.2,14.5,12,14.9,15.8,3.2,8.8,8.4,11.5,10.6,10.8,19
CHS,14.7,15.2,16,12.1,12.3,12.9,8.8,2.4,7.9,11.2,14.7,14.9,19
TWS,16.2,13.8,12.4,10,10.9,13.8,8.4,7.9,3.4,6.4,9.6,13.4,19
TSS,16.6,15.4,11.3,11.9,10.8,12,11.5,11.2,6.4,2.2,9,14.4,19
TWW,16.2,11.9,11.1,13.1,14.6,17.1,10.6,14.7,9.6,9,3.8,9,19
THH,14,11.4,15.5,15.8,17.7,19,10.8,14.9,13.4,14.4,9,4,19
B53,19,19,19,19,19,19,19,19,19,19,19,19,0
"""

s2 = """8.9,12,14.7,14,13.7,12.7,15.1,14.7,16.2,16.6,16.2,14,19
12,2.6,10.6,9.7,14.3,15.6,11.2,15.2,13.8,15.4,11.9,11.4,19
14.7,10.6,4.1,8.2,9.2,13.1,14.5,16,12.4,11.3,11.1,15.5,19
14,9.7,8.2,2.1,7,12.7,12,12.1,10,11.9,13.1,15.8,19
13.7,14.3,9.2,7,3.5,7.4,14.9,12.3,10.9,10.8,14.6,17.7,19
12.7,15.6,13.1,12.7,7.4,1.3,15.8,12.9,13.8,12,17.1,19,19
15.1,11.2,14.5,12,14.9,15.8,3.2,8.8,8.4,11.5,10.6,10.8,19
14.7,15.2,16,12.1,12.3,12.9,8.8,2.4,7.9,11.2,14.7,14.9,19
16.2,13.8,12.4,10,10.9,13.8,8.4,7.9,3.4,6.4,9.6,13.4,19
16.6,15.4,11.3,11.9,10.8,12,11.5,11.2,6.4,2.2,9,14.4,19
16.2,11.9,11.1,13.1,14.6,17.1,10.6,14.7,9.6,9,3.8,9,19
14,11.4,15.5,15.8,17.7,19,10.8,14.9,13.4,14.4,9,4,19
19,19,19,19,19,19,19,19,19,19,19,19,0
"""

lines = s2.splitlines()
matrix = list()
for line in lines:
    matrix.append(line.split(','))
matrix = np.asarray(matrix)
matrix = np.asarray(matrix, dtype=float)
matrix = np.exp(-matrix / 8)

keys = list("CHH,TWH,CWW,THS,CWS,CSS,CWH,CHS,TWS,TSS,TWW,THH,B53".split(','))
key_map = {bp: i for i, bp in enumerate(keys)}


[docs]def get_undirected_iso(bpa, bpb): """ Given two directed edges, get the values from the undirected isostericity matrix :param bpa: LW edge code :type bpa: str :param bpb: LW edge code :type bpb: str :return: isostericty value :rtype float """ bpa = bpa.upper() bpb = bpb.upper() bpa = bpa if bpa in keys else bpa[0] + bpa[2] + bpa[1] bpb = bpb if bpb in keys else bpb[0] + bpb[2] + bpb[1] return matrix[key_map[bpa], key_map[bpb]]
[docs]def build_iso(): """ This function builds a directed isostericity matrix The heuristic is as follows : - It has a diagonal of ones : max similarity is self - Backbone is set aside, and has a little cost for reversing the direction - Different edges types are computed to have the associated undirected isostericity value :return: A np matrix that yields the isostericity values, ordered as EDGE_MAP """ iso_mat = np.zeros(shape=(len(EDGE_MAP_RGLIB), len(EDGE_MAP_RGLIB)), dtype=np.float32) for i, bpa in enumerate(EDGE_MAP_RGLIB.keys()): for j, bpb in enumerate(EDGE_MAP_RGLIB.keys()): # BB to anything else if (bpa in ['B53', 'B35'] and bpb not in ['B53', 'B35']) \ or (bpb in ['B53', 'B35'] and bpa not in ['B53', 'B35']): value = 0. # B53 to B35 elif (bpa == 'B53' and bpb == 'B35') or (bpb == 'B53' and bpa == 'B35'): value = 0.2 # Same bp elif bpa == bpb: value = 1 # iso value based on undirected else: value = get_undirected_iso(bpa, bpb) iso_mat[i, j] = value return iso_mat
iso_mat = build_iso() pass