From 663d919427bb13e0e256218322fc078dc5ac30e2 Mon Sep 17 00:00:00 2001 From: Sukanya Denni <sukanya.denni@inrae.fr> Date: Wed, 23 Nov 2022 13:59:33 +0100 Subject: [PATCH 1/2] added runtime log --- workflow/Snakefile | 20 ++++++++----- workflow/rules/00_runtime.smk | 54 +++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 7 deletions(-) create mode 100644 workflow/rules/00_runtime.smk diff --git a/workflow/Snakefile b/workflow/Snakefile index 10ff558..036b09a 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -26,6 +26,9 @@ include: "rules/06_sym_link_hap.smk" ## AUTOMATIC REPORT include: "rules/07_report.smk" +## runtime +include: "rules/00_runtime.smk" + ###### get filenames for workflow ###### IDS=config["IDS"] bamIDS=check_bam(config["root"] + "/" + config["resdir"] + "/" + config["bamdir"], IDS) @@ -45,8 +48,6 @@ longqc_output = expand(res_path + "/{Bid}/{run}/01_raw_data_QC/02_longQC", zip, fastqc_output = expand(res_path + "/{Fid}/{run}/01_raw_data_QC/01_fastQC/{Fid}_fastqc.{ext}", zip, run=FID_RUN, Fid=fastqIDS, ext=["html", "zip"]) - - ### REPORT REP_ID = for_report(IDS) RUNID_REG = run_id(REP_ID) @@ -64,9 +65,6 @@ BUSCO_LIN_TRIO = busco_lin(REP_TRIO_ID) report_trio_output = expand(res_path + "/{runid}/report_trio_{id}.{lin}.html", zip, runid=RUNID_TRIO, id=REP_TRIO_ID, lin = BUSCO_LIN_TRIO) - - - ### SYM LINK ## symbolic link to final assembly symb_link1 = expand(res_path + "/{runid}/{id}_hap{n}.fa", zip, @@ -80,7 +78,7 @@ cut_eval1 = expand(res_path + "/{runid}/02_genome_assembly/02_after_purge_dups_a cut_eval2 = expand(res_path + "/{runid}/02_genome_assembly/02_after_purge_dups_assembly/00_assembly/{id}_hap{n}/cutoffs_graph_hap{n}.png", zip, runid=RUNID_TRIO, id=REP_TRIO_ID, n=["1", "2"]) -# BUSCO +## BUSCO busco_reg = expand(res_path + "/{runid}/02_genome_assembly/01_raw_assembly/01_assembly_QC/busco/{id}_hap{n}/short_summary.specific.{lin}.{id}_hap{n}.txt", zip, runid=RUNID_REG, id=REP_ID, n=["1", "2"], lin = BUSCO_LIN) busco_purged_reg = expand(res_path + "/{runid}/02_genome_assembly/02_after_purge_dups_assembly/01_assembly_QC/busco/{id}_purged_hap{n}/short_summary.specific.{lin}.{id}_purged_hap{n}.txt", zip, @@ -91,6 +89,12 @@ busco_trio = expand(res_path + "/{runid}/02_genome_assembly/01_raw_assembly/01_a busco_purged_trio = expand(res_path + "/{runid}/02_genome_assembly/02_after_purge_dups_assembly/01_assembly_QC/busco/{id}_purged_hap{n}/short_summary.specific.{lin}.{id}_purged_hap{n}.txt", zip, runid=RUNID_TRIO, id=REP_TRIO_ID, n=["1", "2"], lin = BUSCO_LIN_TRIO) +## RUNTIME +time = expand(res_path + "/{runid}/runtime.{id}.{lin}.txt", zip, + runid = RUNID_REG, id=REP_ID, lin=BUSCO_LIN) +time_trio = expand(res_path + "/{runid}/runtime_trio.{id}.{lin}.txt", zip, + runid = RUNID_TRIO, id=REP_TRIO_ID, lin=BUSCO_LIN_TRIO) + rule_all_input_list = [ longqc_output, fastqc_output, @@ -103,7 +107,9 @@ rule_all_input_list = [ busco_reg, busco_purged_reg, busco_trio, - busco_purged_trio + busco_purged_trio, + time, + time_trio ] #### target files diff --git a/workflow/rules/00_runtime.smk b/workflow/rules/00_runtime.smk new file mode 100644 index 0000000..f53da68 --- /dev/null +++ b/workflow/rules/00_runtime.smk @@ -0,0 +1,54 @@ +rule start_time: + output: + temp(res_path + "/{runid}/runtime.txt") + priority: 20 + run: + import time + start = time.time() + with open(output[0], "w") as out: + out.write(str(start)) + +rule elasped_time: + input: + rules.start_time.output, + rules.rename_report.output + output: + res_path + "/{runid}/runtime.{id}.{lin}.txt" + run: + import time + from datetime import timedelta + + with open(input[0], "r") as inp: + start = inp.read() + + end = time.time() + elapsed_time = end - float(start) + td = timedelta(seconds=elapsed_time) + + with open(output[0], "w") as out: + out.write("Runtime (hh:mm:ss): " + str(td)) + + # os.remove(input[0]) + + +rule elasped_time_trio: + input: + rules.start_time.output, + rules.rename_report_trio.output + output: + res_path + "/{runid}/runtime_trio.{id}.{lin}.txt" + run: + import time + from datetime import timedelta + + with open(input[0], "r") as inp: + start = inp.read() + + end = time.time() + elapsed_time = end - float(start) + td = timedelta(seconds=elapsed_time) + + with open(output[0], "w") as out: + out.write("Runtime (hh:mm:ss): " + str(td)) + + # os.remove(input[0]) \ No newline at end of file -- GitLab From 6ab65b8d0a39f7632f9121914219000fd0b35c57 Mon Sep 17 00:00:00 2001 From: Sukanya Denni <sukanya.denni@inrae.fr> Date: Thu, 24 Nov 2022 09:02:30 +0100 Subject: [PATCH 2/2] updated config with template --- .config/masterconfig.yaml | 48 ++++++++------------------------------- 1 file changed, 9 insertions(+), 39 deletions(-) diff --git a/.config/masterconfig.yaml b/.config/masterconfig.yaml index 324bec9..60969e5 100644 --- a/.config/masterconfig.yaml +++ b/.config/masterconfig.yaml @@ -1,44 +1,6 @@ # absolute path to your desired output path root: /gpfs/scratch/sdenni/wf/GenomAsm4pg -### TEMPLATE -IDS: ["E742-Rouge_de_Roussillon_ccs-20", "E742-CH-240_1-20"] - -# IDS_1: - # path: path - # bam: true -# run: name -# ploidy: 2 -# busco_lineage: eudi -# mode: default - -# IDS_2: - # path: path - # bam: false -# run: name -# ploidy: 2 -# busco_lineage: eudi -# mode: trio -# p1: path -# p2: path - -### TRIAL -E742-Rouge_de_Roussillon_ccs-20: - bam: False - run: test_runid - ploidy: 2 - busco_lineage: eudicots_odb10 - mode: default - -E742-CH-240_1-20: - bam: False - run: test_trio - ploidy: 2 - busco_lineage: eudicots_odb10 - mode: trio - p1: /scratch/sdenni/wf/GenomAsm4pg/workflow_results/00_raw_data/fastx_files/E742-CH-240_1.fasta.gz - p2: /scratch/sdenni/wf/GenomAsm4pg/workflow_results/00_raw_data/fastx_files/E742-CH-240_1-20.fasta.gz - ####################### prejob - data preparation ####################### # path to tar data data: /path @@ -48,8 +10,16 @@ get_all_tar_filename: True tarIDS: "tar_filename" ####################### job - workflow ####################### -# list of fasta.gz names +#### FILES YOU WANT TO RUN THE WORKFLOW ON +IDS: ["sample_1"] +### PARAMETERS +## default assembly mode +sample_1: + run: name + ploidy: 2 + busco_lineage: eudicots_odb10 + mode: default ####################### workflow output directories ####################### # results directory -- GitLab