Skip to content

Commit 1ca8a48

Browse files
authored
Merge pull request #114 from jun-wan/master
Add new script for create rnaseq samplesheet
2 parents 81173f5 + 6e1d5b7 commit 1ca8a48

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

create_rnaseq_samplesheet.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import sys
4+
import glob
5+
import argparse
6+
7+
class CustomParser(argparse.ArgumentParser):
8+
def print_help(self, file=None):
9+
help_text = """
10+
11+
Description:
12+
Generate an RNA-seq sample sheet for a given project before running the nf-core/rnaseq analysis pipeline.
13+
The resulting CSV is printed to the screen (stdout). To save the CSV to a file, use shell redirection.
14+
15+
USAGE:
16+
create_rnaseq_samplesheet.py <ProjectID> <Strandedness> [-d <data_path>] > <output.csv>
17+
18+
Examples:
19+
create_rnaseq_samplesheet.py P001 auto > P001.csv
20+
create_rnaseq_samplesheet.py P001 auto -d /my/data/path > P001.csv
21+
22+
Arguments:
23+
ProjectID Identifier for your RNA-seq project (e.g., P001)
24+
Strandedness Library strandedness (forward/reverse/unstranded/auto, use 'auto' to auto-detect)
25+
26+
Optional arguments:
27+
-d, --data Path to your RNA-seq data folder. Default: /proj/ngi2016003/nobackup/NGI/DATA
28+
-h, --help Show this help message and exit
29+
"""
30+
print(help_text)
31+
32+
def main():
33+
parser = CustomParser(add_help=False)
34+
parser.add_argument("ProjectID", help="Identifier for your RNA-seq project (e.g., P001)")
35+
parser.add_argument("Strandedness", help="Library strandedness (use 'auto' to auto-detect)")
36+
parser.add_argument("-d", "--data", default="/proj/ngi2016003/nobackup/NGI/DATA",
37+
help="Path to RNA-seq data (default: %(default)s)")
38+
parser.add_argument("-h", "--help", action="help", help="Show this help message and exit")
39+
40+
args = parser.parse_args()
41+
42+
# Generate CSV content
43+
print(f"# Sample sheet for project {args.ProjectID}")
44+
print(f"Strandedness,{args.Strandedness}")
45+
46+
# Build full path to project data
47+
data_path = os.path.join(args.data, args.ProjectID)
48+
49+
if not os.path.exists(data_path):
50+
sys.exit(f"Error: data path does not exist: {data_path}")
51+
52+
header = "sample,fastq_1,fastq_2,strandedness"
53+
print(header)
54+
55+
sampleList = os.listdir(data_path)
56+
sampleList.sort()
57+
58+
for sample in sampleList:
59+
path_pattern = os.path.join(data_path, sample, '**', '*R1*.gz')
60+
paths = glob.glob(path_pattern, recursive=True)
61+
62+
for counter, R1 in enumerate(paths, 1):
63+
R2 = R1.replace('_R1_','_R2_')
64+
print(f"{sample},{R1},{R2},{args.Strandedness}")
65+
66+
67+
if __name__ == "__main__":
68+
main()
69+

0 commit comments

Comments
 (0)