import glob
import os

import networkx as nx
import pandas as pd
import wget


def download(genes):
    url = "http://starbase.sysu.edu.cn/moduleDownload.php?source=agoClipRNA&type=txt&value=hg19;mRNA;all;1;0;0;1;None;{}"

    diff_genes = pd.read_csv(genes)

    for i, gene in enumerate(diff_genes["symbol"]):
        print("{}/{} Download file for {}".format(i + 1, diff_genes.shape[0], gene))
        if not os.path.isdir("starbase"):
            os.mkdir("starbase")
        wget.download(url.format(gene), "starbase", bar=None)


def binding_graph(directory, fname):
    """Creates bipartite graph from tab delimited files

    :param directory: directory of tab delimited files
    :returns: bipartite graph
    :rtype: pd.DataFrame

    """
    frames = []
    for f in glob.glob("{}/*.txt".format(directory)):
        df = pd.read_csv(f, sep="\t", comment="#")
        if df.shape[0] > 1:
            frames.append(df)

    result = pd.concat(frames)
    result["miRNAname"] = result["miRNAname"].str.replace("hsa-", "")
    G = nx.DiGraph()
    for r in result.iterrows():
        G.add_edge(r[1][1], r[1][2])

    genes = [n for n in G.nodes if n[:4] == "ENSG"]
    mirnas = [n for n in G.nodes() if n[:4] != "ENSG"]

    df = nx.to_pandas_adjacency(G, nodelist=mirnas + genes)
    df = df.iloc[: len(mirnas), len(mirnas) :]
    df = df.T
    df.to_csv(fname, index=False)


def main():
    download("hsa_gene_names.csv")


if __name__ == "__main__":
    main()
