library(clusterProfiler)
library(org.Hs.eg.db)
library(sqldf)
library(ggplot2)
library(stringr)
stage1 <- read.csv("stage1_gene_list.csv")
stage1$group <- ifelse(stage1$logFC > 0, "Up Regulated", "Down Regulated")
stage2 <- read.csv("stage2_gene_list.csv")
stage2$group <- ifelse(stage2$logFC > 0, "Up Regulated", "Down Regulated")
stage3 <- read.csv("stage3_gene_list.csv")
stage3$group <- ifelse(stage3$logFC > 0, "Up Regulated", "Down Regulated")
stage4 <- read.csv("stage4_gene_list.csv")
stage4$group <- ifelse(stage4$logFC > 0, "Up Regulated", "Down Regulated")

mydf <- rbind(stage1, stage2, stage3, stage4)
mydf$othergroup <- c(
  rep("Stage 1", nrow(stage1)),
  rep("Stage 2", nrow(stage2)),
  rep("Stage 3 and 4", nrow(stage3) + nrow(stage4))
)
entrez <- bitr(mydf$symbol, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db, drop = FALSE)

mydf_kegg <- sqldf("SELECT mydf.symbol AS symbol, logFC, `group`, othergroup, ENTREZID AS `entrez` FROM mydf JOIN entrez ON mydf.symbol = entrez.SYMBOL")
# ont = "MF"
#
# formula_res = compareCluster(symbol~group+othergroup, data=mydf,
#                               fun="enrichGO", ont = ont, keyType = "SYMBOL", OrgDb = "org.Hs.eg.db")

formula_res <- compareCluster(entrez ~ group + othergroup,
  data = mydf_kegg,
  fun = "enrichKEGG", organism = "hsa"
)


tmp <- formula_res@compareClusterResult
tmp$group <- str_replace_all(tmp$group, "Down Regulated", "D")
tmp$group <- str_replace_all(tmp$group, "Up Regulated", "U")

for (i in 1:nrow(tmp)) {
  tmp[i, 5] <- str_trunc(paste(tmp[i, 4], tmp[i, 5]), 30)
}

tmp2 <- sqldf("SELECT Cluster, `group`, GROUP_CONCAT(othergroup), ID, Description, GeneRatio, BgRatio, pvalue, `p.adjust`, qvalue, geneID, Count FROM tmp GROUP BY ID")
colnames(tmp2) <- colnames(tmp)
tmp2_stage3and4 <- tmp2[tmp2$othergroup == "Stage 3 and 4", ]
tmp2_stage1and2 <- tmp2[tmp2$othergroup == "Stage 1,Stage 2", ]
tmp2_stage1and2 <- rbind(tmp2_stage1and2, tmp2[tmp2$othergroup == "Stage 2,Stage 1", ])
tmp2_stage1and2$othergroup <- rep("Stage 1 and 2", nrow(tmp2_stage1and2))
tmp2_stage1 <- tmp2[tmp2$othergroup == "Stage 1", ]
tmp2_stage1 <- rbind(tmp2_stage1, tmp2_stage1and2)
tmp2_stage1$othergroup <- rep("Stage 1", nrow(tmp2_stage1))
tmp2_stage2 <- tmp2[tmp2$othergroup == "Stage 2", ]
tmp2_stage2 <- rbind(tmp2_stage2, tmp2_stage1and2)
tmp2_stage2$othergroup <- rep("Stage 2", nrow(tmp2_stage2))

final <- rbind(tmp2_stage1, tmp2_stage2, tmp2_stage3and4)

formula_res@compareClusterResult <- final
dotplot(formula_res, x = ~group, showCategory = 20, font.size = 4) + ggplot2::facet_grid(~othergroup)
ggsave(filename = "kegg_brca.pdf", device = "pdf", dpi = 300)
