Merge pull request #111 from TomHarrop/gpu_tools_test

Test wrappers for tool dev on NVIDIA
usegalaxy-au · Jun 3, 2024 · cc471f7 · cc471f7
2 parents bc08097 + 998d5d2
commit cc471f7
Show file tree

Hide file tree

Showing 6 changed files with 247 additions and 0 deletions.
diff --git a/tools/gpu_tools_test/README.md b/tools/gpu_tools_test/README.md
@@ -0,0 +1,60 @@
+# GPU-tools-test
+
+Some test wrappers to see if we can talk to the GPU from our tool development
+environment.
+
+## Setup
+
+The main thing is to add a singularity (apptainer) destination to your job_conf file and to include `--nv` as a `singularity_run_extra_arguments`
+
+```xml
+<?xml version="1.0"?>
+<job_conf>
+    <plugins>
+        <plugin id="local" type="runner" load="galaxy.jobs.runners.local:LocalJobRunner" workers="1"/>
+    </plugins>
+    <destinations default="singularity">
+        <destination id="local" runner="local"/>
+        <destination id="singularity" runner="local">
+            <param id="singularity_enabled">true</param>
+            <param id="singularity_run_extra_arguments">--nv --writable-tmpfs</param>
+            <env id="LC_ALL">C</env>
+            <env id="SINGULARITY_CACHEDIR">/tmp/singularity/cache</env>
+            <env id="SINGULARITY_TMPDIR">/tmp</env>
+        </destination>
+    </destinations>
+</job_conf>
+```
+
+Then run planemo like this: 
+
+```bash
+planemo test \
+    --job_config_file /path/to/job_conf.xml \
+    nvidia-container-cli-info.xml
+```
+
+It should see your GPU and return some information about it.
+
+```
+   NVRM version:   525.147.05
+   CUDA version:   12.0
+
+   Device Index:   0
+   Device Minor:   0
+   Model:          NVIDIA A100-PCIE-40GB
+   Brand:          Nvidia
+   GPU UUID:       GPU-3a29f0dc-490e-1e8d-abf3-a5cfa02adcde
+   Bus Location:   00000000:00:08.0
+   Architecture:   8.0
+```
+
+If that works you can try `dorado-test-basecaller.xml`
+
+## `nvidia-container-cli-info.xml`
+
+Pulls the nvidia `container-toolkit` and runs `nvidia-container-cli info`
+
+## `dorado-test-basecaller.xml`
+
+Pulls the ONT `dorado` container and basecalls a small .pod5 file.
diff --git a/tools/gpu_tools_test/dorado-basecaller-help.xml b/tools/gpu_tools_test/dorado-basecaller-help.xml
@@ -0,0 +1,81 @@
+<tool id="dorado-basecaller-help" name="dorado basecaller help" version="@CONTAINER_HASH@+galaxy0" python_template_version="3.5" profile="21.05">
+    <macros>
+        <token name="@CONTAINER_HASH@">c2d8bc91ca2d043fed84d06cca92aaeb62bcc1cd</token>
+    </macros>
+    <requirements>
+        <container type="docker">nanoporetech/dorado:sha@CONTAINER_HASH@</container>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+    dorado basecaller --help 2>&1 | tee out.txt
+    ]]></command>
+    <inputs>
+    </inputs>
+    <outputs>
+        <data format="txt" name="out_txt" label="Output from dorado list-models" from_work_dir="out.txt" />
+    </outputs>
+    <tests>
+    <test expect_num_outputs="1">
+        <output name="out_txt">
+            <assert_contents>
+                <has_text text="Positional arguments"/>
+                <has_text text="model selection"/>
+            </assert_contents>
+        </output>
+    </test>
+
+    </tests>
+    <help><![CDATA[
+
+Just runs ``dorado basecaller --help``. If you have planemo configured
+properly you will see output like this:
+
+.. code:: bash
+
+   [2024-05-29 12:43:38.990] [info] Running: "basecaller" "--help"
+   Usage: dorado [-h] [--device VAR] [--read-ids VAR] [--resume-from VAR] [--max-reads VAR] [--min-qscore VAR] [--batchsize VAR] [--chunksize VAR] [--overlap VAR] [--recursive] [--modified-bases VAR...] [--modified-bases-models VAR] [--modified-bases-threshold VAR] [--emit-fastq] [--emit-sam] [--emit-moves] [--reference VAR] [--kit-name VAR] [--barcode-both-ends] [--no-trim] [--trim VAR] [--sample-sheet VAR] [--barcode-arrangement VAR] [--barcode-sequences VAR] [--primer-sequences VAR] [--estimate-poly-a] [--poly-a-config VAR] [-k VAR] [-w VAR] [-I VAR] [--secondary VAR] [-N VAR] [-Y] [--bandwidth VAR] [--junc-bed VAR] [--mm2-preset VAR] model data
+
+   Positional arguments:
+     model                         model selection {fast,hac,sup}@v{version} for automatic model selection including modbases, or path to existing model directory 
+     data                          the data directory or file (POD5/FAST5 format). 
+
+   Optional arguments:
+     -h, --help                    shows help message and exits 
+     -v, --verbose             
+     -x, --device                  device string in format "cuda:0,...,N", "cuda:all", "metal", "cpu" etc.. [default: "cuda:all"]
+     -l, --read-ids                A file with a newline-delimited list of reads to basecall. If not provided, all reads will be basecalled [default: ""]
+     --resume-from                 Resume basecalling from the given HTS file. Fully written read records are not processed again. [default: ""]
+     -n, --max-reads               [default: 0]
+     --min-qscore                  Discard reads with mean Q-score below this threshold. [default: 0]
+     -b, --batchsize               if 0 an optimal batchsize will be selected. batchsizes are rounded to the closest multiple of 64. [default: 0]
+     -c, --chunksize               [default: 10000]
+     -o, --overlap                 [default: 500]
+     -r, --recursive               Recursively scan through directories to load FAST5 and POD5 files 
+     --modified-bases              [nargs: 1 or more] 
+     --modified-bases-models       a comma separated list of modified base models [default: ""]
+     --modified-bases-threshold    the minimum predicted methylation probability for a modified base to be emitted in an all-context model, [0, 1] [default: 0.05]
+     --emit-fastq                  Output in fastq format. 
+     --emit-sam                    Output in SAM format. 
+     --emit-moves              
+     --reference                   Path to reference for alignment. [default: ""]
+     --kit-name                    Enable barcoding with the provided kit name. Choose from: EXP-NBD103 EXP-NBD104 EXP-NBD114 EXP-NBD196 EXP-PBC001 EXP-PBC096 SQK-16S024 SQK-16S114-24 SQK-LWB001 SQK-MLK111-96-XL SQK-MLK114-96-XL SQK-NBD111-24 SQK-NBD111-96 SQK-NBD114-24 SQK-NBD114-96 SQK-PBK004 SQK-PCB109 SQK-PCB110 SQK-PCB111-24 SQK-PCB114-24 SQK-RAB201 SQK-RAB204 SQK-RBK001 SQK-RBK004 SQK-RBK110-96 SQK-RBK111-24 SQK-RBK111-96 SQK-RBK114-24 SQK-RBK114-96 SQK-RLB001 SQK-RPB004 SQK-RPB114-24 TWIST-16-UDI TWIST-96A-UDI VSK-PTC001 VSK-VMK001 VSK-VMK004 VSK-VPS001. [default: ""]
+     --barcode-both-ends           Require both ends of a read to be barcoded for a double ended barcode. 
+     --no-trim                     Skip trimming of barcodes, adapters, and primers. If option is not chosen, trimming of all three is enabled. 
+     --trim                        Specify what to trim. Options are 'none', 'all', 'adapters', and 'primers'. Default behaviour is to trim all detected adapters, primers, or barcodes. Choose 'adapters' to just trim adapters. The 'primers' choice will trim adapters and primers, but not barcodes. The 'none' choice is equivelent to using --no-trim. Note that this only applies to DNA. RNA adapters are always trimmed. [default: ""]
+     --sample-sheet                Path to the sample sheet to use. [default: ""]
+     --barcode-arrangement         Path to file with custom barcode arrangement. 
+     --barcode-sequences           Path to file with custom barcode sequences. 
+     --primer-sequences            Path to file with custom primer sequences. [default: <not representable>]
+     --estimate-poly-a             Estimate poly-A/T tail lengths (beta feature). Primarily meant for cDNA and dRNA use cases. 
+     --poly-a-config               Configuration file for PolyA estimation to change default behaviours [default: ""]
+     -k                            minimap2 k-mer size for alignment (maximum 28). 
+     -w                            minimap2 minimizer window size for alignment. 
+     -I                            minimap2 index batch size. 
+     --secondary                   minimap2 outputs secondary alignments 
+     -N                            minimap2 retains at most INT secondary alignments 
+     -Y                            minimap2 uses soft clipping for supplementary alignments 
+     --bandwidth                   minimap2 chaining/alignment bandwidth and optionally long-join bandwidth specified as NUM,[NUM] 
+     --junc-bed                    Optional file with gene annotations in the BED12 format (aka 12-column BED), or intron positions in 5-column BED. With this option, minimap2 prefers splicing in annotations. 
+     --mm2-preset                  minimap2 preset for indexing and mapping. Alias for the -x option in minimap2. [default: "lr:hq"]
+
+    ]]></help>
+</tool>
diff --git a/tools/gpu_tools_test/dorado-test-basecaller.xml b/tools/gpu_tools_test/dorado-test-basecaller.xml
@@ -0,0 +1,47 @@
+<tool id="dorado_test_basecaller" name="dorado test basecaller" version="@CONTAINER_HASH@+galaxy0" python_template_version="3.5" profile="21.05">
+    <macros>
+        <token name="@CONTAINER_HASH@">c2d8bc91ca2d043fed84d06cca92aaeb62bcc1cd</token>
+    </macros>
+    <requirements>
+        <container type="docker">nanoporetech/dorado:sha@CONTAINER_HASH@</container>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+ln -s '$pod5_file' ./reads.pod5 &&
+
+dorado basecaller
+--emit-sam
+--chunksize 1000
+--max-reads 10
+"/models/[email protected]"
+reads.pod5 
+> output.sam
+    ]]></command>
+    <inputs>
+        <param type="data" name="pod5_file" label="Raw fast5 file" format="fast5"/>
+    </inputs>
+    <outputs>
+        <data format="sam" name="out_sam" label="Output from dorado list-models" from_work_dir="output.sam"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="pod5_file" value="FAL00375_473bf0ed_0.ten_reads.pod5"/>
+            <output name="out_sam">
+                <assert_contents>
+                    <has_text text="00777c4b-cbd6-4a79-8647-bbe5f5f3f3bf"/>
+                    <has_size value="132024" delta="10000" />
+                </assert_contents>
+        </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Tests basecalling a small pod5 file using dorado.
+
+If it works you will get a SAM file with some reads in it.
+
+::
+
+   00777c4b-cbd6-4a79-8647-bbe5f5f3f3bf    4   *   0   0   *   *   0   0   TGCTTCATTCGGTCCACGTATTGCTGGTTAAAATTTCTGAAAATATTATGATTATTAATCATATTCTGTGAATGAGATTTTAGCTATTATGTTGTTGAATATAATGTTGGCTATAATGTTGATTTGAATAATCATTTTAAACAATTTATAACAATATATTTTATTCTGACACTTTGAAATAAATTCATATCAAAACTTCTATAATAACTTCCGTGATATTATAAATTTTCAAAATATCTGTTCTTCTCTCTCGTAATTCCTTCTTTGCCGCATATTTATTGTTTCAATAAATTTAGAAATTAAATTAATTTCTTAATTTTTACTAATGCTGATTTAAAACATTCAACCATCTCTTTAATACGTAGCAGAACGTGAATAGTCTAATCTAATCTGACTATCAGTACAGATTACTGTGGATATTAATTGACATTTGCAGCAAAGGAAATGTTTAATTAGTATCAGACTCATTTTGATATTTTTAAATTATTGTGAAGGTAATATGTAAAAAAGGAAAAAATTCTTTTGTGTGATATAATCTAATGTAGTATTGATAAAAAAGTGAGATTTTTAAAGATTTTTATGTAATAAGAATTCTTGCAAAAAAAAGATAAACAATATTTTTATATTAATAAAATAAAATTATTAATTAATATATTTTATATATTATAATATATGCAATATATCATATATTTTTTTAAAATAAATTAGATTATTTTATATTTTTTTTAACAGAAAATGTAATAAAATGAAATATATATTTTTATTGTTTTTGTCTTTTAATTAATTTTATTAGATTTTAAAAATTTTTAATAAAAAATTTAGAAATTTTTCAAAAAATTGCTTTCCTATATATTGGTATATTATATAACTTTAAAAACATTTCTAAATAAAATATTTATATTTGATATTTTATTTTAGTTTTACTTGGTGTGTTTTTAGGTCTTAAAGGCTTCAGTATCATATGTATCAATATTATATGCAAATGTGTATACGAAAATCATTATATTCTAGAAATGAATATGTATGAATATGTATTGAAAGCAGCCGTAACT    %'**21,+,*&#$#(),,.'&++)**01;>955??7<())>=AD667888BACA<;<::;58577.&&$&(*)((1*26><667>::>77525,*+67F;87-*,+,-4-/0/536<999;0//7;8=<::=<333<820)(((===<;:?88:::9456;;9992,,.1.5783((8:7489;==,++(*(*,*)))&&$%2'('&&),)*)-16346,2<<98::@@@3=B?=<>:46:>>8<:97==5=333577702>@<AD?73248;667?2224<BBA;>B@55=A9;3935:7><>A?>>BDH@8G4333211,&%%-966689:7.--121056.%%%%%'(((),0.-514,+,-/1(5660/0/=,2226&75<9*6+)'""$').36*22;<:?@>>::60)+-36../.001221+)*730////10;&'90;98;5($$%(''';=/.1/03326A>53368;>=8(&&&'&%%$'&),**&3:@<230134:5/761;CCABBGA<BFA877;9<;:<<=B<323:22-+.6><>?ADAESS33++38779ASAS@AA<8AB>=;;>?9770'+(0,;887311;=BLSS=C:9<AA:889///<;:9;;;;=@@:<:55567:>=::;><FA@=>;</0/B=:7.-&'(24:;<43&'%)*/89<;<743./69=???<<4=ICE;=@@::9:9;:;C>>=:88<>=SCC>12/--4=ALA443322>@DI88BC@<;;6;<>BSK65678=;S;65479B>??B@><??EA4235697;=81-3/)&&&&&&'+)')+*+244.))05@DEC=B5DAA-,11000656,.54+,((()-.491219<<=>=46>>;?B=C732322.45;J:DAB==>?<>?+,+;=88100A==:@D@434::1-'('$#&%$$&',((()53=??@<<>=;=?<+++9;;9<:9767>@=>?;<<98.-*)*-.1-*)(&&$$$%,4.97***'%$&*1&0&-;<=118:;85:;==8;82379;;;?=74<41002-/,**)    qs:f:12.5814    du:f:2.5165 ns:i:10066  ts:i:130    mx:i:3  ch:i:300    st:Z:2020-08-14T01:23:39.733+00:00  rn:i:314    fn:Z:reads.pod5 sm:f:80.9692    sd:f:15.1339sv:Z:med_mad    dx:i:0  RG:Z:[email protected]
+   ...
+
+    ]]></help>
+</tool>
diff --git a/tools/gpu_tools_test/nvidia-container-cli-info.xml b/tools/gpu_tools_test/nvidia-container-cli-info.xml
@@ -0,0 +1,46 @@
+<tool id="nvidia-container-cli-info" name="nvidia-container-cli info" version="@TOOL_VERSION@+galaxy0" python_template_version="3.5" profile="21.05">
+    <macros>
+        <token name="@TOOL_VERSION@">1.15.0</token>
+    </macros>
+    <requirements>
+        <container type="docker">nvcr.io/nvidia/k8s/container-toolkit:v@[email protected]</container>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+    nvidia-container-cli info 2>&1 | tee out.txt
+    ]]></command>
+    <inputs>
+    </inputs>
+    <outputs>
+        <data format="txt" name="out_txt" label="Output from nvidia-container-cli info" from_work_dir="out.txt" />
+    </outputs>
+    <tests>
+    <test expect_num_outputs="1">
+        <output name="out_txt">
+            <assert_contents>
+                <has_text text="NVRM version"/>
+                <has_text text="CUDA version"/>
+            </assert_contents>
+        </output>
+    </test>
+
+    </tests>
+    <help><![CDATA[
+Just runs ``nvidia-container-cli info``. If you have planemo configured
+properly you will see output like this:
+
+.. code:: bash
+
+   NVRM version:   525.147.05
+   CUDA version:   12.0
+
+   Device Index:   0
+   Device Minor:   0
+   Model:          NVIDIA A100-PCIE-40GB
+   Brand:          Nvidia
+   GPU UUID:       GPU-3a29f0dc-490e-1e8d-abf3-a5cfa02adcde
+   Bus Location:   00000000:00:08.0
+   Architecture:   8.0
+
+
+    ]]></help>
+</tool>
diff --git a/tools/gpu_tools_test/test-data/FAL00375_473bf0ed_0.ten_reads.pod5 b/tools/gpu_tools_test/test-data/FAL00375_473bf0ed_0.ten_reads.pod5
diff --git a/tools/gpu_tools_test/test-data/output.sam b/tools/gpu_tools_test/test-data/output.sam