From 663d919427bb13e0e256218322fc078dc5ac30e2 Mon Sep 17 00:00:00 2001
From: Sukanya Denni <sukanya.denni@inrae.fr>
Date: Wed, 23 Nov 2022 13:59:33 +0100
Subject: [PATCH 1/2] added runtime log

---
 workflow/Snakefile            | 20 ++++++++-----
 workflow/rules/00_runtime.smk | 54 +++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 7 deletions(-)
 create mode 100644 workflow/rules/00_runtime.smk

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 10ff558..036b09a 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -26,6 +26,9 @@ include: "rules/06_sym_link_hap.smk"
 ## AUTOMATIC REPORT
 include: "rules/07_report.smk"
 
+## runtime
+include: "rules/00_runtime.smk"
+
 ###### get filenames for workflow ######
 IDS=config["IDS"]
 bamIDS=check_bam(config["root"] + "/" + config["resdir"] + "/" + config["bamdir"], IDS)
@@ -45,8 +48,6 @@ longqc_output = expand(res_path + "/{Bid}/{run}/01_raw_data_QC/02_longQC", zip,
 fastqc_output =  expand(res_path + "/{Fid}/{run}/01_raw_data_QC/01_fastQC/{Fid}_fastqc.{ext}", zip,
     run=FID_RUN, Fid=fastqIDS, ext=["html", "zip"])
 
-
-
 ### REPORT
 REP_ID = for_report(IDS)
 RUNID_REG = run_id(REP_ID)
@@ -64,9 +65,6 @@ BUSCO_LIN_TRIO = busco_lin(REP_TRIO_ID)
 report_trio_output = expand(res_path + "/{runid}/report_trio_{id}.{lin}.html", zip,
     runid=RUNID_TRIO, id=REP_TRIO_ID, lin = BUSCO_LIN_TRIO)
 
-
-
-
 ### SYM LINK
 ## symbolic link to final assembly
 symb_link1 = expand(res_path + "/{runid}/{id}_hap{n}.fa", zip,
@@ -80,7 +78,7 @@ cut_eval1 = expand(res_path + "/{runid}/02_genome_assembly/02_after_purge_dups_a
 cut_eval2 = expand(res_path + "/{runid}/02_genome_assembly/02_after_purge_dups_assembly/00_assembly/{id}_hap{n}/cutoffs_graph_hap{n}.png", zip,
     runid=RUNID_TRIO, id=REP_TRIO_ID, n=["1", "2"])
 
-# BUSCO
+## BUSCO
 busco_reg = expand(res_path + "/{runid}/02_genome_assembly/01_raw_assembly/01_assembly_QC/busco/{id}_hap{n}/short_summary.specific.{lin}.{id}_hap{n}.txt", zip,
     runid=RUNID_REG, id=REP_ID, n=["1", "2"], lin = BUSCO_LIN)
 busco_purged_reg = expand(res_path + "/{runid}/02_genome_assembly/02_after_purge_dups_assembly/01_assembly_QC/busco/{id}_purged_hap{n}/short_summary.specific.{lin}.{id}_purged_hap{n}.txt", zip,
@@ -91,6 +89,12 @@ busco_trio = expand(res_path + "/{runid}/02_genome_assembly/01_raw_assembly/01_a
 busco_purged_trio = expand(res_path + "/{runid}/02_genome_assembly/02_after_purge_dups_assembly/01_assembly_QC/busco/{id}_purged_hap{n}/short_summary.specific.{lin}.{id}_purged_hap{n}.txt", zip,
     runid=RUNID_TRIO, id=REP_TRIO_ID, n=["1", "2"], lin = BUSCO_LIN_TRIO)
 
+## RUNTIME
+time = expand(res_path + "/{runid}/runtime.{id}.{lin}.txt", zip,
+    runid = RUNID_REG, id=REP_ID, lin=BUSCO_LIN)
+time_trio = expand(res_path + "/{runid}/runtime_trio.{id}.{lin}.txt", zip,
+    runid = RUNID_TRIO, id=REP_TRIO_ID, lin=BUSCO_LIN_TRIO)
+
 rule_all_input_list = [
     longqc_output,
     fastqc_output,
@@ -103,7 +107,9 @@ rule_all_input_list = [
     busco_reg,
     busco_purged_reg,
     busco_trio,
-    busco_purged_trio
+    busco_purged_trio,
+    time,
+    time_trio
 ]
 
 #### target files
diff --git a/workflow/rules/00_runtime.smk b/workflow/rules/00_runtime.smk
new file mode 100644
index 0000000..f53da68
--- /dev/null
+++ b/workflow/rules/00_runtime.smk
@@ -0,0 +1,54 @@
+rule start_time:
+    output: 
+        temp(res_path + "/{runid}/runtime.txt")
+    priority: 20
+    run:
+        import time
+        start = time.time()
+        with open(output[0], "w") as out:
+            out.write(str(start))
+
+rule elasped_time:
+    input: 
+        rules.start_time.output,
+        rules.rename_report.output
+    output:
+        res_path + "/{runid}/runtime.{id}.{lin}.txt"
+    run:
+        import time
+        from datetime import timedelta
+
+        with open(input[0], "r") as inp:
+            start = inp.read()
+        
+        end = time.time()
+        elapsed_time = end - float(start)
+        td = timedelta(seconds=elapsed_time)
+
+        with open(output[0], "w") as out:
+            out.write("Runtime (hh:mm:ss): " + str(td))
+
+        # os.remove(input[0])
+
+
+rule elasped_time_trio:
+    input: 
+        rules.start_time.output,
+        rules.rename_report_trio.output
+    output:
+        res_path + "/{runid}/runtime_trio.{id}.{lin}.txt"
+    run:
+        import time
+        from datetime import timedelta
+
+        with open(input[0], "r") as inp:
+            start = inp.read()
+        
+        end = time.time()
+        elapsed_time = end - float(start)
+        td = timedelta(seconds=elapsed_time)
+
+        with open(output[0], "w") as out:
+            out.write("Runtime (hh:mm:ss): " + str(td))
+
+        # os.remove(input[0])
\ No newline at end of file
-- 
GitLab


From 6ab65b8d0a39f7632f9121914219000fd0b35c57 Mon Sep 17 00:00:00 2001
From: Sukanya Denni <sukanya.denni@inrae.fr>
Date: Thu, 24 Nov 2022 09:02:30 +0100
Subject: [PATCH 2/2] updated config with template

---
 .config/masterconfig.yaml | 48 ++++++++-------------------------------
 1 file changed, 9 insertions(+), 39 deletions(-)

diff --git a/.config/masterconfig.yaml b/.config/masterconfig.yaml
index 324bec9..60969e5 100644
--- a/.config/masterconfig.yaml
+++ b/.config/masterconfig.yaml
@@ -1,44 +1,6 @@
 # absolute path to your desired output path
 root: /gpfs/scratch/sdenni/wf/GenomAsm4pg
 
-### TEMPLATE
-IDS: ["E742-Rouge_de_Roussillon_ccs-20", "E742-CH-240_1-20"]
-
-# IDS_1:
-  # path: path
-  # bam: true
-#   run: name
-#   ploidy: 2
-#   busco_lineage: eudi
-#   mode: default
-
-# IDS_2:
-  # path: path
-  # bam: false
-#   run: name
-#   ploidy: 2
-#   busco_lineage: eudi
-#   mode: trio
-#   p1: path
-#   p2: path
-
-### TRIAL
-E742-Rouge_de_Roussillon_ccs-20:
-  bam: False
-  run: test_runid
-  ploidy: 2
-  busco_lineage: eudicots_odb10
-  mode: default
-
-E742-CH-240_1-20:
-  bam: False
-  run: test_trio
-  ploidy: 2
-  busco_lineage: eudicots_odb10
-  mode: trio
-  p1: /scratch/sdenni/wf/GenomAsm4pg/workflow_results/00_raw_data/fastx_files/E742-CH-240_1.fasta.gz
-  p2: /scratch/sdenni/wf/GenomAsm4pg/workflow_results/00_raw_data/fastx_files/E742-CH-240_1-20.fasta.gz
-
 ####################### prejob - data preparation #######################
 # path to tar data
 data: /path
@@ -48,8 +10,16 @@ get_all_tar_filename: True
 tarIDS: "tar_filename"
 
 ####################### job - workflow #######################
-# list of fasta.gz names
+#### FILES YOU WANT TO RUN THE WORKFLOW ON
+IDS: ["sample_1"]
 
+### PARAMETERS
+## default assembly mode
+sample_1:
+  run: name
+  ploidy: 2
+  busco_lineage: eudicots_odb10
+  mode: default
 
 ####################### workflow output directories #######################
 # results directory
-- 
GitLab