我开始使用Snakemake,但我觉得我缺少一些关键概念...
我拥有的是fastq文件的目录,我想在所有读取对上运行STAR。
我写了以下蛇文件:
import os
from glob import glob
experiment_name = 'swo-406'
scratch_data_base_dir="/rst1/2017-0205_illuminaseq/scratch"
scratch_data_dir = os.path.join(scratch_data_base_dir, experiment_name)
seqrun = '180413_NB501997_0054_AHTFJ3BGX3'
fastq_dir = os.path.join(scratch_data_dir, 'fastq', seqrun)
preprocessing_dir = os.path.join(scratch_data_dir, 'preprocessing', seqrun)
if not os.path.isdir(preprocessing_dir):
os.makedirs(preprocessing_dir)
ref_base_dir = '/rst1/2017-0205_illuminaseq/data/references/Reference_Genomes/GRCh38.87'
ref_genome = os.path.join(ref_base_dir, 'Homo_sapiens.GRCh38.dna.primary_assembly.fa')
star_ref_dir = '/rst1/2017-0205_illuminaseq/scratch/swo-390/STAR_references/human'
log = os.path.join(preprocessing_dir, 'log.txt')
SAMPLES = set([os.path.basename(fastq_file.replace('_R1_001.fastq.gz', '').replace('_R2_001.fastq.gz', ''))
for fastq_file in glob(os.path.join(fastq_dir, '*_R*_001.fastq.gz'))
if not 'Undetermined' in fastq_file])
print(expand(os.path.join(fastq_dir, '{sample}_R1_001.fastq.gz'), sample=SAMPLES))
# Rule all is a pseudo-rule that tells snakemake what final files to generate.
rule all:
input:
expand(os.path.join(preprocessing_dir, '{sample}.Aligned.out.bam'), sample=SAMPLES)
rule star_map:
input:
read1 = expand(os.path.join(fastq_dir, '{sample}_R1_001.fastq.gz'), sample=SAMPLES),
read2 = expand(os.path.join(fastq_dir, '{sample}_R2_001.fastq.gz'), sample=SAMPLES)
output:
os.path.join(preprocessing_dir, '{sample}.Aligned.out.bam')
log:
log
shell:
"""
STAR \
--runThreadN 8 \
--genomeDir {star_ref_dir} \
--readFilesIn {input.read1} {input.read2} \
--outSAMtype BAM Unsorted \
--outFileNamePrefix {output} \
--outStd Log \
{log}
"""
这将输出:
['/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S4_S14_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S06_S6_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S5_S15_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S03_S3_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S10_S10_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S2_S12_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S05_S5_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S02_S2_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S3_S13_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S04_S4_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S07_S7_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S08_S8_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S6_S16_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S09_S9_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S1_S11_R1_001.fastq.gz', '/rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S01_S1_R1_001.fastq.gz']
Provided cores: 1
Rules claiming more threads will be scaled down.
Job counts:
THERE IS TEXT HERE BUT IT REFUSED TO BE PASTED??
rule star_map:
input: /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S4_S14_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S06_S6_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S5_S15_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S03_S3_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S10_S10_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S2_S12_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S05_S5_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S02_S2_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S3_S13_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S04_S4_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S07_S7_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S08_S8_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S6_S16_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S09_S9_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S1_S11_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S01_S1_R1_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S4_S14_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S06_S6_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S5_S15_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S03_S3_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S10_S10_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S2_S12_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S05_S5_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S02_S2_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S3_S13_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S04_S4_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S07_S7_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S08_S8_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S6_S16_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S09_S9_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S1_S11_R2_001.fastq.gz, /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S01_S1_R2_001.fastq.gz
output: /rst1/2017-0205_illuminaseq/scratch/swo-406/preprocessing/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S06_S6.Aligned.out.bam
log: /rst1/2017-0205_illuminaseq/scratch/swo-406/preprocessing/180413_NB501997_0054_AHTFJ3BGX3/log.txt
jobid: 7
wildcards: sample=0054_P2017SEQE83S06_S6
Error in job star_map while creating output file /rst1/2017-0205_illuminaseq/scratch/swo-406/preprocessing/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S06_S6.Aligned.out.bam.
RuleException:
CalledProcessError in line 40 of /home/nlv24077/experiments/experiments/swo-406/scripts/Snakefile.snakefile:
Command '
STAR --runThreadN 8 --genomeDir /rst1/2017-0205_illuminaseq/scratch/swo-390/STAR_references/human --readFilesIn /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S4_S14_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S06_S6_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S5_S15_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S03_S3_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S10_S10_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S2_S12_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S05_S5_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S02_S2_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S3_S13_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S04_S4_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S07_S7_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S08_S8_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S6_S16_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S09_S9_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S1_S11_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S01_S1_R1_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S4_S14_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S06_S6_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S5_S15_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S03_S3_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S10_S10_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S2_S12_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S05_S5_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S02_S2_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S3_S13_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S04_S4_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S07_S7_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S08_S8_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S6_S16_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S09_S9_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2018SEQE15S1_S11_R2_001.fastq.gz /rst1/2017-0205_illuminaseq/scratch/swo-406/fastq/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S01_S1_R2_001.fastq.gz --outSAMtype BAM Unsorted --outFileNamePrefix /rst1/2017-0205_illuminaseq/scratch/swo-406/preprocessing/180413_NB501997_0054_AHTFJ3BGX3/0054_P2017SEQE83S06_S6.Aligned.out.bam --outStd Log /rst1/2017-0205_illuminaseq/scratch/swo-406/preprocessing/180413_NB501997_0054_AHTFJ3BGX3/log.txt
' died with <Signals.SIGSEGV: 11>.
File "/home/nlv24077/experiments/experiments/swo-406/scripts/Snakefile.snakefile", line 40, in __rule_star_map
File "/rst1/2017-0205_illuminaseq/scratch/swo-406/snakemake/lib/python3.6/concurrent/futures/thread.py", line 56, in run
Will exit after finishing currently running jobs.
Exiting because a job execution failed. Look above for error message
如您所见,我获得了所有fastq文件作为STAR的输入,它没有像我期望的那样在样本中循环。我该怎么办?
最诚挚的问候
Freek。
答案 0 :(得分:1)
您需要定义{sample}
中的rule all
通配符。在下面的示例中,snakemake将针对示例A和B运行。
rule all:
input:
expand(os.path.join(preprocessing_dir, '{sample}.Aligned.out.bam'), sample=['A', 'B'])
Here是我过去发现有用的教程。