-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Trims .bam from cram files; Adds crai; Trick for G-Actions disk limit #3
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,11 @@ | ||
docker.enabled = true | ||
|
||
params { | ||
|
||
input = 'testdata/test_input_cloudos.csv' | ||
reference = 's3://eu-west-1-example-data/nihr/testdata/Homo_sapiens_assembly38.fasta' | ||
report_dir = "/opt/bin" | ||
// delete the actual files to save space in Github Actions | ||
pre_script = "df -h; ls -lh" | ||
post_script = "df -h; ls -lh > metadata.cram.txt; rm *.cram; rm *.crai" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
echo = true | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,18 +4,18 @@ def helpMessage() { | |
log.info """ | ||
Usage: | ||
nextflow run main.nf --input input.csv --reference reference.fasta [Options] | ||
|
||
Inputs Options: | ||
--input Input csv file with bam paths | ||
--reference Reference fasta file | ||
|
||
Resource Options: | ||
--cpus Number of CPUs (int) | ||
(default: $params.cpus) | ||
(default: $params.cpus) | ||
--max_cpus Maximum number of CPUs (int) | ||
(default: $params.max_cpus) | ||
--memory Memory (memory unit) | ||
(default: $params.memory) | ||
(default: $params.memory) | ||
--max_memory Maximum memory (memory unit) | ||
(default: $params.max_memory) | ||
--time Time limit (time unit) | ||
|
@@ -81,13 +81,16 @@ process samtools_default_30 { | |
input: | ||
file(bam_file) from ch_input_0 | ||
each file(reference) from ch_reference_0 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here I am changing to also catch the crai files (we need them if we want to use the crams for variant calling). |
||
|
||
script: | ||
""" | ||
samtools view -T $reference -o ${bam_file}.cram -O cram,version=3.0 $bam_file | ||
${params.pre_script} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. adding debugging and handy script sections, to be able to debug. We can use for example to see if we have enough space, if we are wasting too much disk size, ls the files and many more |
||
samtools view -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.0 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} | ||
|
||
|
@@ -99,13 +102,16 @@ process samtools_default_31 { | |
input: | ||
file(bam_file) from ch_input_1 | ||
each file(reference) from ch_reference_1 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
|
||
script: | ||
""" | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file}.cram -O cram,version=3.1 $bam_file | ||
${params.pre_script} | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.1 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} | ||
|
||
|
@@ -117,13 +123,16 @@ process samtools_normal_30 { | |
input: | ||
file(bam_file) from ch_input_2 | ||
each file(reference) from ch_reference_2 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
|
||
script: | ||
""" | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file}.cram -O cram,version=3.0 --output-fmt-option seqs_per_slice=10000 $bam_file | ||
${params.pre_script} | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.0 --output-fmt-option seqs_per_slice=10000 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} | ||
|
||
|
@@ -135,13 +144,16 @@ process samtools_normal_31 { | |
input: | ||
file(bam_file) from ch_input_3 | ||
each file(reference) from ch_reference_3 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
|
||
script: | ||
""" | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file}.cram -O cram,version=3.1 --output-fmt-option seqs_per_slice=10000 $bam_file | ||
${params.pre_script} | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.1 --output-fmt-option seqs_per_slice=10000 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} | ||
|
||
|
@@ -153,13 +165,16 @@ process samtools_fast_30 { | |
input: | ||
file(bam_file) from ch_input_4 | ||
each file(reference) from ch_reference_4 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
|
||
script: | ||
""" | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file}.cram -O cram,version=3.0,level=1 --output-fmt-option seqs_per_slice=1000 $bam_file | ||
${params.pre_script} | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.0,level=1 --output-fmt-option seqs_per_slice=1000 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} | ||
|
||
|
@@ -171,13 +186,16 @@ process samtools_fast_31 { | |
input: | ||
file(bam_file) from ch_input_5 | ||
each file(reference) from ch_reference_5 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
|
||
script: | ||
""" | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file}.cram -O cram,version=3.1,level=1 --output-fmt-option seqs_per_slice=1000 $bam_file | ||
${params.pre_script} | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.1,level=1 --output-fmt-option seqs_per_slice=1000 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} | ||
|
||
|
@@ -189,13 +207,16 @@ process samtools_small_30 { | |
input: | ||
file(bam_file) from ch_input_6 | ||
each file(reference) from ch_reference_6 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
|
||
script: | ||
""" | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file}.cram -O cram,version=3.0,level=6,use_bzip2=1 --output-fmt-option seqs_per_slice=25000 $bam_file | ||
${params.pre_script} | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.0,level=6,use_bzip2=1 --output-fmt-option seqs_per_slice=25000 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} | ||
|
||
|
@@ -207,13 +228,16 @@ process samtools_small_31 { | |
input: | ||
file(bam_file) from ch_input_7 | ||
each file(reference) from ch_reference_7 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
|
||
script: | ||
""" | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file}.cram -O cram,version=3.1,level=6,use_bzip2=1,use_fqz=1 --output-fmt-option seqs_per_slice=25000 $bam_file | ||
${params.pre_script} | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.1,level=6,use_bzip2=1,use_fqz=1 --output-fmt-option seqs_per_slice=25000 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} | ||
|
||
|
@@ -225,13 +249,16 @@ process samtools_archive_30 { | |
input: | ||
file(bam_file) from ch_input_8 | ||
each file(reference) from ch_reference_8 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
|
||
script: | ||
""" | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file}.cram -O cram,version=3.0,level=7,use_bzip2=1 --output-fmt-option seqs_per_slice=100000 $bam_file | ||
${params.pre_script} | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.0,level=7,use_bzip2=1 --output-fmt-option seqs_per_slice=100000 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} | ||
|
||
|
@@ -243,13 +270,16 @@ process samtools_archive_31 { | |
input: | ||
file(bam_file) from ch_input_9 | ||
each file(reference) from ch_reference_9 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
|
||
script: | ||
""" | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file}.cram -O cram,version=3.1,level=7,use_bzip2=1,use_fqz=1,use_arith=1 --output-fmt-option seqs_per_slice=100000 $bam_file | ||
${params.pre_script} | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.1,level=7,use_bzip2=1,use_fqz=1,use_arith=1 --output-fmt-option seqs_per_slice=100000 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} | ||
|
||
|
@@ -261,13 +291,16 @@ process samtools_archive_lzma_30 { | |
input: | ||
file(bam_file) from ch_input_10 | ||
each file(reference) from ch_reference_10 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
|
||
script: | ||
""" | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file}.cram -O cram,version=3.0,level=7,use_bzip2=1,use_lzma=1 --output-fmt-option seqs_per_slice=100000 $bam_file | ||
${params.pre_script} | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.0,level=7,use_bzip2=1,use_lzma=1 --output-fmt-option seqs_per_slice=100000 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} | ||
|
||
|
@@ -279,12 +312,15 @@ process samtools_archive_lzma_31 { | |
input: | ||
file(bam_file) from ch_input_11 | ||
each file(reference) from ch_reference_11 | ||
|
||
output: | ||
file "*.cram" | ||
file "*.cra*" | ||
|
||
script: | ||
""" | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file}.cram -O cram,version=3.1,level=7,use_bzip2=1,use_fqz=1,use_arith=1,use_lzma=1 --output-fmt-option seqs_per_slice=100000 $bam_file | ||
${params.pre_script} | ||
samtools view --threads $task.cpus -T $reference -o ${bam_file.simpleName}.cram -O cram,version=3.1,level=7,use_bzip2=1,use_fqz=1,use_arith=1,use_lzma=1 --output-fmt-option seqs_per_slice=100000 $bam_file | ||
samtools index ${bam_file.simpleName}.cram | ||
${params.post_script} | ||
""" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,2 @@ | ||
bam | ||
s3://eu-west-1-example-data/nihr/testdata/pb_normal.bam | ||
s3://eu-west-1-example-data/nihr/testdata/pb_tumor.bam | ||
https://eu-west-1-example-data.s3-eu-west-1.amazonaws.com/nihr/testdata/pb_normal.bam |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here I add printing of the results and head, because I want to see the cram sizes. We are not able to inspect them as we choose not to store artifacts from the CI.
Additionally, we need to delete the generated data because we hit the disk size limits and the CI fails because of that, see example here:
https://github.com/lifebit-ai/bam2cram/runs/4233568183?check_suite_focus=true#step:4:201