import pdb

import matplotlib.pyplot as plt
import networkx as nx
import seaborn as sns

sns.set_style("darkgrid")

model = nx.read_graphml(
    "/home/hd/git/research/bioinformatics/data/breast_cancer/networks/cancer_grn_mirna_combined.graphml"
)

mirna_gene = [
    edge
    for edge in model.edges()
    if (edge[0].startswith("hsa-") and edge[1].startswith("ENSG"))
    or (edge[1].startswith("hsa-") and edge[0].startswith("ENSG"))
]

G = nx.DiGraph()
edges = [(edge[1], edge[0]) for edge in mirna_gene]
print("Number of edges: {}".format(len(edges)))

low_degree_mirnas = [  # top20 percent
    "hsa-miR-129-5p",
    "hsa-miR-140-3p",
    "hsa-miR-146b-5p",
    "hsa-miR-188-5p",
    "hsa-miR-193a-5p",
    "hsa-miR-28",
    "hsa-miR-346",
    "hsa-miR-3605-3p",
    "hsa-miR-361",
    "hsa-miR-455-5p",
    "hsa-miR-671-3p",
    "hsa-miR-320b",
    "hsa-miR-193a-3p",
    "hsa-miR-326",
    "hsa-miR-330",
    "hsa-miR-501-3p",
]
for e in edges:
    if e[0] in low_degree_mirnas:
        print(
            "{}\t{}".format(e[0], e[1]),
            file=open("low_degree_significant_mirna_gene_bindings.tab", "a+"),
        )

G.add_edges_from(edges)

mirnas = [node for node in dict(G.degree()).items() if node[0].startswith("hsa-")]
degree = [i[1] for i in mirnas]

plt.hist(degree, bins="auto", color="gray", rwidth=0.4)
plt.grid(True)
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.show()

mirnas_degree_less_thaneq100 = [node[0] for node in mirnas if node[1] <= 100]
for e in edges:
    if e[0] in mirnas_degree_less_thaneq100:
        print(
            "{}\t{}".format(e[0], e[1]),
            file=open("degree_le_100_significant_mirna_gene_bindings.tab", "a+"),
        )
