library(tidyverse)
library(FRmatch)
##########
## data ##
##########

load("dat-challenge1.rda")
#################
## basic plots ##
#################

## cluster size
plot_clusterSize(sce.E1 = sce_query, sce.E2 = sce_ref, name.E1 = "query", name.E2 = "ref")

## original distribution
plot_exprDist(sce.E1 = sce_query, sce.E2 = sce_ref, name.E1 = "query", name.E2 = "ref")

###################
## barcode plots ##
###################

# ## create barcode plot folder
# folder_name <- "barcode_plots"
# if(!dir.exists(folder_name)){dir.create(folder_name)}
# 
# ## self barcode
# ref_clusters <- unique(colData(sce_ref)$cluster_membership)
# for(cl in ref_clusters){
#   plot_cluster_by_markers(sce_ref, cluster.name = cl, nsamp = 30, name.E1 = "",
#                           filename = paste0(folder_name,"/",cl,".pdf"))
# }
# query_clusters <- unique(colData(sce_query)$cluster_membership)
# for(cl in query_clusters){
#   plot_cluster_by_markers(sce_query, cluster.name = cl, nsamp = 30, name.E1 = "",
#                           filename = paste0(folder_name,"/",cl,".pdf"))
# }

###################
## normalization ## FINAL: decided not to do since query and ref data distribution look similar
###################

# ## min-max scaling only
# sce_query_norm <- normalization(sce_query)
# sce_ref_norm <- normalization(sce_ref)
# 
# 
# ## create barcode plot folder
# folder_name <- "barcode_plots_normalization"
# if(!dir.exists(folder_name)){dir.create(folder_name)}
# 
# ## self barcode
# ref_clusters <- unique(colData(sce_ref_norm)$cluster_membership)
# for(cl in ref_clusters){
#   plot_cluster_by_markers(sce_ref_norm, cluster.name = cl, nsamp = 30, name.E1 = "",
#                           filename = paste0(folder_name,"/",cl,".pdf"))
# }
# query_clusters <- unique(colData(sce_query_norm)$cluster_membership)
# for(cl in query_clusters){
#   plot_cluster_by_markers(sce_query_norm, cluster.name = cl, nsamp = 30, name.E1 = "",
#                           filename = paste0(folder_name,"/",cl,".pdf"))
# }
#############
## FRmatch ##
#############

rst_q2r <- FRmatch(sce_query, sce_ref, use.cosine=T) ### FINAL: subsamp.size=20, subsamp.iter=1000 (default)
rst_r2q <- FRmatch(sce_ref, sce_query, use.cosine=T) ### FINAL: subsamp.size=20, subsamp.iter=1000 (default)
plot_FRmatch(rst_q2r)

plot_FRmatch(rst_q2r, type="padj")

plot_FRmatch(rst_r2q)

plot_FRmatch(rst_r2q, type="padj")

out_FRmatch <- plot_bi_FRmatch(rst_q2r, rst_r2q, name.E1="", name.E2="", return.value = T) #sig.level = 0.05

plot_bi_FRmatch(rst_r2q, rst_q2r, name.E1="", name.E2="")

## output required lists
out_FRmatch <- out_FRmatch[-nrow(out_FRmatch),] #remove the unassigned row

## unassigned
(Q_unassigned <- names(which(colSums(out_FRmatch)==0)))
[1] "Q_cl_9"  "Q_cl_20" "Q_cl_50" "Q_cl_85" "Q_cl_22" "Q_cl_10" "Q_cl_8" 
(R_unassigned <- names(which(rowSums(out_FRmatch)==0)))
[1] "R_cl_21" "R_cl_40" "R_cl_15" "R_cl_18" "R_cl_6"  "R_cl_24" "R_cl_9" 
## one-to-many
(Q_multi <- names(which(colSums(out_FRmatch)>2)))
[1] "Q_cl_33"
(R_multi <- names(which(rowSums(out_FRmatch)>2)))
[1] "R_cl_11" "R_cl_32" "R_cl_84"
## one-to-one
matches <- get_values(out_FRmatch)
matches_1to1 <- matches %>% filter(!(rowname %in% R_multi)) %>% select(-value)
matches_1to1
## check if one-to-one
# table(matches_1to1$rowname)
# table(matches_1to1$colname)
## write output
write.csv(Q_unassigned, file="Q_unassigned.csv")
write.csv(R_unassigned, file="R_unassigned.csv")
write.csv(Q_multi, file="Q_multi.csv")
write.csv(R_multi, file="R_multi.csv")
write.csv(matches_1to1, file="matches_1to1.csv")
LS0tCnRpdGxlOiAiRGF0YSBjaGFsbGVuZyAxIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciwgaW5jbHVkZT1GQUxTRX0Kcm0obGlzdD1scygpKQpzZXR3ZCgifi9Eb2N1bWVudHMvQklDQ04tQnJhaW5TdGFuZGFyZHMtMjAyMC9BbmxheXNlcy9DZWxsVHlwZU1hcHBpbmdfMjAyMi9jaGFsbGVuZ2UxL0ZSLU1hdGNoX2NoYWxsZW5nZTEvIikKCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIwpgYGAKCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoRlJtYXRjaCkKYGBgCgoKYGBge3J9CiMjIyMjIyMjIyMKIyMgZGF0YSAjIwojIyMjIyMjIyMjCgpsb2FkKCJkYXQtY2hhbGxlbmdlMS5yZGEiKQpgYGAKCgpgYGB7ciwgaW5jbHVkZT1GQUxTRX0KbG9hZCgicnN0X2Nvc2luZS5yZGEiKQpgYGAKCgpgYGB7ciwgZmlnLndpZHRoPTE1LCBmaWcuaGVpZ2h0PTEwfQojIyMjIyMjIyMjIyMjIyMjIwojIyBiYXNpYyBwbG90cyAjIwojIyMjIyMjIyMjIyMjIyMjIwoKIyMgY2x1c3RlciBzaXplCnBsb3RfY2x1c3RlclNpemUoc2NlLkUxID0gc2NlX3F1ZXJ5LCBzY2UuRTIgPSBzY2VfcmVmLCBuYW1lLkUxID0gInF1ZXJ5IiwgbmFtZS5FMiA9ICJyZWYiKQpgYGAKCmBgYHtyfQojIyBvcmlnaW5hbCBkaXN0cmlidXRpb24KcGxvdF9leHByRGlzdChzY2UuRTEgPSBzY2VfcXVlcnksIHNjZS5FMiA9IHNjZV9yZWYsIG5hbWUuRTEgPSAicXVlcnkiLCBuYW1lLkUyID0gInJlZiIpCmBgYAoKCmBgYHtyfQojIyMjIyMjIyMjIyMjIyMjIyMjCiMjIGJhcmNvZGUgcGxvdHMgIyMKIyMjIyMjIyMjIyMjIyMjIyMjIwoKIyAjIyBjcmVhdGUgYmFyY29kZSBwbG90IGZvbGRlcgojIGZvbGRlcl9uYW1lIDwtICJiYXJjb2RlX3Bsb3RzIgojIGlmKCFkaXIuZXhpc3RzKGZvbGRlcl9uYW1lKSl7ZGlyLmNyZWF0ZShmb2xkZXJfbmFtZSl9CiMgCiMgIyMgc2VsZiBiYXJjb2RlCiMgcmVmX2NsdXN0ZXJzIDwtIHVuaXF1ZShjb2xEYXRhKHNjZV9yZWYpJGNsdXN0ZXJfbWVtYmVyc2hpcCkKIyBmb3IoY2wgaW4gcmVmX2NsdXN0ZXJzKXsKIyAgIHBsb3RfY2x1c3Rlcl9ieV9tYXJrZXJzKHNjZV9yZWYsIGNsdXN0ZXIubmFtZSA9IGNsLCBuc2FtcCA9IDMwLCBuYW1lLkUxID0gIiIsCiMgICAgICAgICAgICAgICAgICAgICAgICAgICBmaWxlbmFtZSA9IHBhc3RlMChmb2xkZXJfbmFtZSwiLyIsY2wsIi5wZGYiKSkKIyB9CiMgcXVlcnlfY2x1c3RlcnMgPC0gdW5pcXVlKGNvbERhdGEoc2NlX3F1ZXJ5KSRjbHVzdGVyX21lbWJlcnNoaXApCiMgZm9yKGNsIGluIHF1ZXJ5X2NsdXN0ZXJzKXsKIyAgIHBsb3RfY2x1c3Rlcl9ieV9tYXJrZXJzKHNjZV9xdWVyeSwgY2x1c3Rlci5uYW1lID0gY2wsIG5zYW1wID0gMzAsIG5hbWUuRTEgPSAiIiwKIyAgICAgICAgICAgICAgICAgICAgICAgICAgIGZpbGVuYW1lID0gcGFzdGUwKGZvbGRlcl9uYW1lLCIvIixjbCwiLnBkZiIpKQojIH0KCiMjIyMjIyMjIyMjIyMjIyMjIyMKIyMgbm9ybWFsaXphdGlvbiAjIyBGSU5BTDogZGVjaWRlZCBub3QgdG8gZG8gc2luY2UgcXVlcnkgYW5kIHJlZiBkYXRhIGRpc3RyaWJ1dGlvbiBsb29rIHNpbWlsYXIKIyMjIyMjIyMjIyMjIyMjIyMjIwoKIyAjIyBtaW4tbWF4IHNjYWxpbmcgb25seQojIHNjZV9xdWVyeV9ub3JtIDwtIG5vcm1hbGl6YXRpb24oc2NlX3F1ZXJ5KQojIHNjZV9yZWZfbm9ybSA8LSBub3JtYWxpemF0aW9uKHNjZV9yZWYpCiMgCiMgCiMgIyMgY3JlYXRlIGJhcmNvZGUgcGxvdCBmb2xkZXIKIyBmb2xkZXJfbmFtZSA8LSAiYmFyY29kZV9wbG90c19ub3JtYWxpemF0aW9uIgojIGlmKCFkaXIuZXhpc3RzKGZvbGRlcl9uYW1lKSl7ZGlyLmNyZWF0ZShmb2xkZXJfbmFtZSl9CiMgCiMgIyMgc2VsZiBiYXJjb2RlCiMgcmVmX2NsdXN0ZXJzIDwtIHVuaXF1ZShjb2xEYXRhKHNjZV9yZWZfbm9ybSkkY2x1c3Rlcl9tZW1iZXJzaGlwKQojIGZvcihjbCBpbiByZWZfY2x1c3RlcnMpewojICAgcGxvdF9jbHVzdGVyX2J5X21hcmtlcnMoc2NlX3JlZl9ub3JtLCBjbHVzdGVyLm5hbWUgPSBjbCwgbnNhbXAgPSAzMCwgbmFtZS5FMSA9ICIiLAojICAgICAgICAgICAgICAgICAgICAgICAgICAgZmlsZW5hbWUgPSBwYXN0ZTAoZm9sZGVyX25hbWUsIi8iLGNsLCIucGRmIikpCiMgfQojIHF1ZXJ5X2NsdXN0ZXJzIDwtIHVuaXF1ZShjb2xEYXRhKHNjZV9xdWVyeV9ub3JtKSRjbHVzdGVyX21lbWJlcnNoaXApCiMgZm9yKGNsIGluIHF1ZXJ5X2NsdXN0ZXJzKXsKIyAgIHBsb3RfY2x1c3Rlcl9ieV9tYXJrZXJzKHNjZV9xdWVyeV9ub3JtLCBjbHVzdGVyLm5hbWUgPSBjbCwgbnNhbXAgPSAzMCwgbmFtZS5FMSA9ICIiLAojICAgICAgICAgICAgICAgICAgICAgICAgICAgZmlsZW5hbWUgPSBwYXN0ZTAoZm9sZGVyX25hbWUsIi8iLGNsLCIucGRmIikpCiMgfQpgYGAKCmBgYHtyLCBldmFsPUZBTFNFfQojIyMjIyMjIyMjIyMjCiMjIEZSbWF0Y2ggIyMKIyMjIyMjIyMjIyMjIwoKcnN0X3EyciA8LSBGUm1hdGNoKHNjZV9xdWVyeSwgc2NlX3JlZiwgdXNlLmNvc2luZT1UKSAjIyMgRklOQUw6IHN1YnNhbXAuc2l6ZT0yMCwgc3Vic2FtcC5pdGVyPTEwMDAgKGRlZmF1bHQpCnJzdF9yMnEgPC0gRlJtYXRjaChzY2VfcmVmLCBzY2VfcXVlcnksIHVzZS5jb3NpbmU9VCkgIyMjIEZJTkFMOiBzdWJzYW1wLnNpemU9MjAsIHN1YnNhbXAuaXRlcj0xMDAwIChkZWZhdWx0KQpgYGAKCmBgYHtyLCBmaWcud2lkdGg9MTUsIGZpZy5oZWlnaHQ9MTV9CnBsb3RfRlJtYXRjaChyc3RfcTJyKQpgYGAKCmBgYHtyLCBmaWcud2lkdGg9MTUsIGZpZy5oZWlnaHQ9NX0KcGxvdF9GUm1hdGNoKHJzdF9xMnIsIHR5cGU9InBhZGoiKQpgYGAKCmBgYHtyLCBmaWcud2lkdGg9MTUsIGZpZy5oZWlnaHQ9MTV9CnBsb3RfRlJtYXRjaChyc3RfcjJxKQpgYGAKCmBgYHtyLCBmaWcud2lkdGg9MTUsIGZpZy5oZWlnaHQ9NX0KcGxvdF9GUm1hdGNoKHJzdF9yMnEsIHR5cGU9InBhZGoiKQpgYGAKCmBgYHtyLCBmaWcud2lkdGg9MTUsIGZpZy5oZWlnaHQ9MTV9Cm91dF9GUm1hdGNoIDwtIHBsb3RfYmlfRlJtYXRjaChyc3RfcTJyLCByc3RfcjJxLCBuYW1lLkUxPSIiLCBuYW1lLkUyPSIiLCByZXR1cm4udmFsdWUgPSBUKSAjc2lnLmxldmVsID0gMC4wNQpgYGAKCmBgYHtyLCBmaWcud2lkdGg9MTUsIGZpZy5oZWlnaHQ9MTV9CnBsb3RfYmlfRlJtYXRjaChyc3RfcjJxLCByc3RfcTJyLCBuYW1lLkUxPSIiLCBuYW1lLkUyPSIiKQpgYGAKCmBgYHtyfQojIyBvdXRwdXQgcmVxdWlyZWQgbGlzdHMKb3V0X0ZSbWF0Y2ggPC0gb3V0X0ZSbWF0Y2hbLW5yb3cob3V0X0ZSbWF0Y2gpLF0gI3JlbW92ZSB0aGUgdW5hc3NpZ25lZCByb3cKCiMjIHVuYXNzaWduZWQKKFFfdW5hc3NpZ25lZCA8LSBuYW1lcyh3aGljaChjb2xTdW1zKG91dF9GUm1hdGNoKT09MCkpKQooUl91bmFzc2lnbmVkIDwtIG5hbWVzKHdoaWNoKHJvd1N1bXMob3V0X0ZSbWF0Y2gpPT0wKSkpCgojIyBvbmUtdG8tbWFueQooUV9tdWx0aSA8LSBuYW1lcyh3aGljaChjb2xTdW1zKG91dF9GUm1hdGNoKT4yKSkpCihSX211bHRpIDwtIG5hbWVzKHdoaWNoKHJvd1N1bXMob3V0X0ZSbWF0Y2gpPjIpKSkKCiMjIG9uZS10by1vbmUKbWF0Y2hlcyA8LSBnZXRfdmFsdWVzKG91dF9GUm1hdGNoKQptYXRjaGVzXzF0bzEgPC0gbWF0Y2hlcyAlPiUgZmlsdGVyKCEocm93bmFtZSAlaW4lIFJfbXVsdGkpKSAlPiUgc2VsZWN0KC12YWx1ZSkKbWF0Y2hlc18xdG8xCiMjIGNoZWNrIGlmIG9uZS10by1vbmUKIyB0YWJsZShtYXRjaGVzXzF0bzEkcm93bmFtZSkKIyB0YWJsZShtYXRjaGVzXzF0bzEkY29sbmFtZSkKYGBgCgpgYGB7cn0KIyMgd3JpdGUgb3V0cHV0CndyaXRlLmNzdihRX3VuYXNzaWduZWQsIGZpbGU9IlFfdW5hc3NpZ25lZC5jc3YiKQp3cml0ZS5jc3YoUl91bmFzc2lnbmVkLCBmaWxlPSJSX3VuYXNzaWduZWQuY3N2IikKd3JpdGUuY3N2KFFfbXVsdGksIGZpbGU9IlFfbXVsdGkuY3N2IikKd3JpdGUuY3N2KFJfbXVsdGksIGZpbGU9IlJfbXVsdGkuY3N2IikKd3JpdGUuY3N2KG1hdGNoZXNfMXRvMSwgZmlsZT0ibWF0Y2hlc18xdG8xLmNzdiIpCmBgYAoK