From f5e4693bd88005d53135b3479d36a1b9f97710b6 Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Wed, 9 Aug 2023 03:40:52 -0700 Subject: [PATCH] New API (#6) * :sparkles: Cache pipeline execution * :art: Don't run pipeline from NB * :art: Run pipeline from NB * :art: Remove non-NB run again * :art: Add late tree * :art: Cache different folder * :art: Half timeout * :art: Cache attempt * :art: Timeout * :art: Refactor * :art: Remove storage root Signed-off-by: zethson --------- Signed-off-by: zethson --- .github/workflows/build.yml | 6 ++--- docs/guide/bulk_rna_seq.ipynb | 49 ++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d4f2924..a96077b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: python-version: ["3.9"] - timeout-minutes: 90 + timeout-minutes: 50 steps: - name: Checkout main @@ -46,8 +46,8 @@ jobs: - name: Cache Nextflow output uses: actions/cache@v2 with: - path: rna-seq-results - key: ${{ runner.os }}-nextflow + path: docs/guide/nextflow_rna_seq/rna-seq-results + key: ${{ runner.os }}-nextflow-rna-seq-results - uses: actions/cache@v3 with: diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb index 82ebb91..79d249d 100644 --- a/docs/guide/bulk_rna_seq.ipynb +++ b/docs/guide/bulk_rna_seq.ipynb @@ -79,7 +79,7 @@ "metadata": {}, "outputs": [], "source": [ - "lb.settings.species = \"human\"" + "lb.settings.species = \"saccharomyces cerevisiae\"" ] }, { @@ -150,9 +150,7 @@ "metadata": {}, "outputs": [], "source": [ - "input_fastqs_file = ln.File.from_dir(\n", - " \"test-datasets/testdata/GSE110004/\", storage_root=Path(\".\")\n", - ")\n", + "input_fastqs_file = ln.File.from_dir(\"test-datasets/testdata/GSE110004/\")\n", "sample_sheet_file = ln.File(\"test-datasets/samplesheet/v3.10/samplesheet_test.csv\")\n", "ln.save(input_fastqs_file)\n", "ln.save(sample_sheet_file)" @@ -173,7 +171,7 @@ "metadata": {}, "outputs": [], "source": [ - "run = ln.Run.select(created_by_id=\"DzTjkKse\").one()\n", + "run = ln.Run.filter(created_by_id=\"DzTjkKse\").one()\n", "run" ] }, @@ -218,7 +216,11 @@ "cell_type": "code", "execution_count": null, "id": "2219c55e", - "metadata": {}, + "metadata": { + "jupyter": { + "outputs_hidden": true + } + }, "outputs": [], "source": [ "!nextflow run nf-core/rnaseq -r 3.11.2 -profile test,docker --outdir rna-seq-results -name $LAMINDB_RUN_ID -resume" @@ -239,9 +241,7 @@ "metadata": {}, "outputs": [], "source": [ - "multiqc_results = ln.File.from_dir(\n", - " \"rna-seq-results/multiqc/\", storage_root=Path(\".\"), run=run\n", - ")\n", + "multiqc_results = ln.File.from_dir(\"rna-seq-results/multiqc/\", run=run)\n", "ln.save(multiqc_results)" ] }, @@ -290,24 +290,24 @@ { "cell_type": "code", "execution_count": null, - "id": "c1a58047-0c25-4632-b355-69610c6176f3", + "id": "b8ed9118-1fd1-47a7-a825-736f78a7d111", "metadata": {}, "outputs": [], "source": [ - "salmon_gene_counts_table = ln.File(\n", - " \"rna-seq-results/salmon/salmon.merged.gene_counts.tsv\", run=run\n", - ")\n", - "ln.save(salmon_gene_counts_table)" + "salmon_gene_counts_table_df = pd.read_csv(\n", + " \"rna-seq-results/salmon/salmon.merged.gene_counts.tsv\", sep=\"\\t\"\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "4f8680e2-7c58-4822-94dd-f3ee0caeb5bc", + "id": "c1a58047-0c25-4632-b355-69610c6176f3", "metadata": {}, "outputs": [], "source": [ - "gene_counts_df = pd.read_csv(salmon_gene_counts_table.stage(), sep=\"\\t\")" + "salmon_gene_counts_table = ln.File.from_df(salmon_gene_counts_table_df, run=run)\n", + "ln.save(salmon_gene_counts_table)" ] }, { @@ -321,23 +321,24 @@ { "cell_type": "code", "execution_count": null, - "id": "0dad135a-4b32-4902-95ff-4585c2ab99dc", + "id": "e655b46d-2bee-404e-9ecc-0d219e97d976", "metadata": {}, "outputs": [], "source": [ - "feature_set_genes = ln.FeatureSet.from_values(\n", - " gene_counts_df[\"gene_name\"], lb.Gene.symbol\n", + "genes = ln.FeatureSet.from_values(\n", + " salmon_gene_counts_table_df[\"gene_name\"], lb.Gene.symbol\n", ")\n", - "feature_set_genes.save()\n", - "salmon_gene_counts_table.feature_sets.add(feature_set_genes)" + "salmon_gene_counts_table.features.add_feature_set(genes, slot=\"rna\")" ] }, { - "cell_type": "markdown", - "id": "b0bfe4f5-5566-423d-9a1e-6796c8f98a77", + "cell_type": "code", + "execution_count": null, + "id": "ba83bfee-5cb8-4520-9a32-0deccd47ff34", "metadata": {}, + "outputs": [], "source": [ - "The dataset contains yeast samples and our species is set to human. Hence, a lot of gene records are being created. Bionty will soon also support yeast genes." + "salmon_gene_counts_table.describe()" ] }, {