Skip to content
This repository has been archived by the owner on Aug 20, 2024. It is now read-only.

Add json samplesheet support #125

Merged
merged 5 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
2 changes: 1 addition & 1 deletion examples/fromSamplesheetMeta/pipeline/nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
2 changes: 1 addition & 1 deletion examples/paramsHelp/pipeline/nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
2 changes: 1 addition & 1 deletion examples/paramsSummaryLog/pipeline/nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
2 changes: 1 addition & 1 deletion examples/paramsSummaryMap/pipeline/nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
2 changes: 1 addition & 1 deletion examples/validateParameters/log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ ERROR ~ ERROR: Validation of pipeline parameters failed!
-- Check '.nextflow.log' file for details
The following invalid input values have been detected:

* --input: string [samplesheet.txt] does not match pattern ^\S+\.(csv|tsv|yaml)$ (samplesheet.txt)
* --input: string [samplesheet.txt] does not match pattern ^\S+\.(csv|tsv|yaml|json)$ (samplesheet.txt)
* --input: the file 'samplesheet.txt' does not exist (samplesheet.txt)
2 changes: 1 addition & 1 deletion examples/validateParameters/pipeline/nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ class SamplesheetConverter {
return ["empty": it] as Map
}
}
else if(fileType == "json"){
samplesheetList = new JsonSlurper().parseText(samplesheetFile.text).collect {
if(containsHeader) {
return it as Map
}
return ["empty": it] as Map
}
}
else {
Path fileSamplesheet = Nextflow.file(samplesheetFile) as Path
samplesheetList = fileSamplesheet.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
Expand Down Expand Up @@ -215,7 +223,7 @@ class SamplesheetConverter {
Path samplesheetFile
) {
def String extension = samplesheetFile.getExtension()
if (extension in ["csv", "tsv", "yml", "yaml"]) {
if (extension in ["csv", "tsv", "yml", "yaml", "json"]) {
return extension == "yml" ? "yaml" : extension
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,14 @@ class SchemaValidator extends PluginExtensionPoint {
return ["empty": it] as Map
}
}
else if(fileType == "json"){
fileContent = new JsonSlurper().parseText(samplesheetFile.text).collect {
if(containsHeader) {
return it as Map
}
return ["empty": it] as Map
}
}
else {
fileContent = samplesheetFile.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
}
Expand Down Expand Up @@ -437,6 +445,14 @@ class SchemaValidator extends PluginExtensionPoint {
return ["empty": it] as Map
}
}
else if(fileType == "json"){
fileContent = new JsonSlurper().parseText(file_path.text).collect {
if(containsHeader) {
return it as Map
}
return ["empty": it] as Map
}
}
else {
fileContent = file_path.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
schema_dest.delete()
}

def 'should validate a schema csv' () {
def 'should validate a schema - CSV' () {
given:
def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
Expand All @@ -148,7 +148,7 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
!stdout
}

def 'should validate a schema tsv' () {
def 'should validate a schema - TSV' () {
given:
def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
Expand All @@ -171,7 +171,7 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
!stdout
}

def 'should validate a schema yaml' () {
def 'should validate a schema - YAML' () {
given:
def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
Expand All @@ -194,7 +194,88 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
!stdout
}

def 'should validate a schema yaml with failures' () {
def 'should validate a schema - JSON' () {
given:
def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
params.input = 'src/testResources/correct.json'
params.outdir = 'src/testResources/testDir'
include { validateParameters } from 'plugin/nf-validation'

validateParameters(parameters_schema: '$schema')
"""

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null }

then:
noExceptionThrown()
!stdout
}

def 'should validate a schema with failures - CSV' () {
given:
def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
params.input = 'src/testResources/wrong.csv'
params.outdir = 'src/testResources/testDir'
include { validateParameters } from 'plugin/nf-validation'

validateParameters(parameters_schema: '$schema')
"""

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null }

then:
def error = thrown(SchemaValidationException)
def errorMessages = error.message.readLines()
errorMessages[0] == "\033[0;31mThe following errors have been detected:"
errorMessages[2] == "* -- Entry 1: Missing required value: sample"
errorMessages[3] == "* -- Entry 1 - strandedness: Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded' (weird)"
errorMessages[4] == "* -- Entry 1 - fastq_2: FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq2.fasta)"
errorMessages[5] == "* -- Entry 2 - sample: Sample name must be provided and cannot contain spaces (test 2)"
!stdout
}

def 'should validate a schema with failures - TSV' () {
given:
def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
params.input = 'src/testResources/wrong.tsv'
params.outdir = 'src/testResources/testDir'
include { validateParameters } from 'plugin/nf-validation'

validateParameters(parameters_schema: '$schema')
"""

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null }

then:
def error = thrown(SchemaValidationException)
def errorMessages = error.message.readLines()
errorMessages[0] == "\033[0;31mThe following errors have been detected:"
errorMessages[2] == "* -- Entry 1: Missing required value: sample"
errorMessages[3] == "* -- Entry 1 - strandedness: Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded' (weird)"
errorMessages[4] == "* -- Entry 1 - fastq_2: FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq2.fasta)"
errorMessages[5] == "* -- Entry 2 - sample: Sample name must be provided and cannot contain spaces (test 2)"
!stdout
}

def 'should validate a schema with failures - YAML' () {
given:
def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
Expand Down Expand Up @@ -223,6 +304,35 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
!stdout
}

def 'should validate a schema with failures - JSON' () {
given:
def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
params.input = 'src/testResources/wrong.json'
params.outdir = 'src/testResources/testDir'
include { validateParameters } from 'plugin/nf-validation'

validateParameters(parameters_schema: '$schema')
"""

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null }

then:
def error = thrown(SchemaValidationException)
def errorMessages = error.message.readLines()
errorMessages[0] == "\033[0;31mThe following errors have been detected:"
errorMessages[2] == "* -- Entry 1: Missing required value: sample"
errorMessages[3] == "* -- Entry 1 - strandedness: Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded' (weird)"
errorMessages[4] == "* -- Entry 1 - fastq_2: FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq2.fasta)"
errorMessages[5] == "* -- Entry 2 - sample: Sample name must be provided and cannot contain spaces (test 2)"
!stdout
}

def 'should find unexpected params' () {
given:
def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,33 @@ class SamplesheetConverterTest extends Dsl2Spec{
stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String)
}

def 'should work fine - JSON' () {
given:
def SCRIPT_TEXT = '''
include { fromSamplesheet } from 'plugin/nf-validation'

params.input = 'src/testResources/correct.json'

workflow {
Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter.json").view()
}
'''

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.startsWith('[[') ? it : null }

then:
noExceptionThrown()
stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String)
stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25.08, false, [], [], [], [], [], itDoesExist]")
stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]")
stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String)
}

def 'no header - CSV' () {
given:
def SCRIPT_TEXT = '''
Expand Down Expand Up @@ -214,6 +241,32 @@ class SamplesheetConverterTest extends Dsl2Spec{
stdout.contains("[test_2]")
}

def 'no header - JSON' () {
given:
def SCRIPT_TEXT = '''
include { fromSamplesheet } from 'plugin/nf-validation'

params.input = 'src/testResources/no_header.json'

workflow {
Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_no_header.json").view()
}
'''

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.startsWith('[') ? it : null }

then:
noExceptionThrown()
stdout.contains("[test_1]")
stdout.contains("[test_2]")
}


def 'extra field' () {
given:
def SCRIPT_TEXT = '''
Expand Down
Loading