|
| 1 | +#!/usr/bin/env python3 |
| 2 | +import os |
| 3 | +import sys |
| 4 | +import glob |
| 5 | +import argparse |
| 6 | + |
| 7 | +class CustomParser(argparse.ArgumentParser): |
| 8 | + def print_help(self, file=None): |
| 9 | + help_text = """ |
| 10 | +
|
| 11 | +Description: |
| 12 | +Generate an RNA-seq sample sheet for a given project before running the nf-core/rnaseq analysis pipeline. |
| 13 | +The resulting CSV is printed to the screen (stdout). To save the CSV to a file, use shell redirection. |
| 14 | +
|
| 15 | +USAGE: |
| 16 | + create_rnaseq_samplesheet.py <ProjectID> <Strandedness> [-d <data_path>] > <output.csv> |
| 17 | +
|
| 18 | +Examples: |
| 19 | + create_rnaseq_samplesheet.py P001 auto > P001.csv |
| 20 | + create_rnaseq_samplesheet.py P001 auto -d /my/data/path > P001.csv |
| 21 | +
|
| 22 | +Arguments: |
| 23 | + ProjectID Identifier for your RNA-seq project (e.g., P001) |
| 24 | + Strandedness Library strandedness (forward/reverse/unstranded/auto, use 'auto' to auto-detect) |
| 25 | +
|
| 26 | +Optional arguments: |
| 27 | + -d, --data Path to your RNA-seq data folder. Default: /proj/ngi2016003/nobackup/NGI/DATA |
| 28 | + -h, --help Show this help message and exit |
| 29 | +""" |
| 30 | + print(help_text) |
| 31 | + |
| 32 | +def main(): |
| 33 | + parser = CustomParser(add_help=False) |
| 34 | + parser.add_argument("ProjectID", help="Identifier for your RNA-seq project (e.g., P001)") |
| 35 | + parser.add_argument("Strandedness", help="Library strandedness (use 'auto' to auto-detect)") |
| 36 | + parser.add_argument("-d", "--data", default="/proj/ngi2016003/nobackup/NGI/DATA", |
| 37 | + help="Path to RNA-seq data (default: %(default)s)") |
| 38 | + parser.add_argument("-h", "--help", action="help", help="Show this help message and exit") |
| 39 | + |
| 40 | + args = parser.parse_args() |
| 41 | + |
| 42 | + # Generate CSV content |
| 43 | + print(f"# Sample sheet for project {args.ProjectID}") |
| 44 | + print(f"Strandedness,{args.Strandedness}") |
| 45 | + |
| 46 | + # Build full path to project data |
| 47 | + data_path = os.path.join(args.data, args.ProjectID) |
| 48 | + |
| 49 | + if not os.path.exists(data_path): |
| 50 | + sys.exit(f"Error: data path does not exist: {data_path}") |
| 51 | + |
| 52 | + header = "sample,fastq_1,fastq_2,strandedness" |
| 53 | + print(header) |
| 54 | + |
| 55 | + sampleList = os.listdir(data_path) |
| 56 | + sampleList.sort() |
| 57 | + |
| 58 | + for sample in sampleList: |
| 59 | + path_pattern = os.path.join(data_path, sample, '**', '*R1*.gz') |
| 60 | + paths = glob.glob(path_pattern, recursive=True) |
| 61 | + |
| 62 | + for counter, R1 in enumerate(paths, 1): |
| 63 | + R2 = R1.replace('_R1_','_R2_') |
| 64 | + print(f"{sample},{R1},{R2},{args.Strandedness}") |
| 65 | + |
| 66 | + |
| 67 | +if __name__ == "__main__": |
| 68 | + main() |
| 69 | + |
0 commit comments