#!/bin/sh
#PBS -N DL_FQ
#PBS -l nodes=1:ppn=16
#PBS -o /home/omics2017/hx_du/test1.log
#PBS -e /home/omics2017/hx_du/test1.err
#PBS -q batch
#PBS -l walltime=120:00:00
wkdir=/bios-store2/hx_du/fq_td/HBB300K_200501/200501/PE
src=/home/omics2017/hx_du/program
ref=/bios-store2/hx_du/NIPT/reference
bedfile=/bios-store2/hx_du/HBB300K_200327/test20200331/all_target_segments_covered_by_probes_HBB_v1_hg19_191108105530.bed
cd ${wkdir}
samples=$(for i in $(ls *_R1*.fastq.gz);do echo "${i%%_*}";done)
for ID in $samples
do
echo $ID
if [ -f ${ID}*R2*.fastq.gz ];then
echo ${ID} "R2 exist"
java -jar $src/Trimmomatic-0.38/trimmomatic-0.38.jar PE -threads 16 $wkdir/${ID}*R1*.fastq.gz $wkdir/${ID}*R2*.fastq.gz ${ID}_1_paired.fq.gz ${ID}_1_unpaired.fq.gz ${ID}_2_paired.fq.gz ${ID}_2_unpaired.fq.gz LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
# PE ===========================
$src/bwa-0.7.17/bwa mem -t 16 -M $ref/genome.fa ${ID}_1_paired.fq.gz ${ID}_2_paired.fq.gz > ${ID}_pe.sam
$src/bwa-0.7.17/bwa mem -t 16 -M $ref/genome.fa ${ID}_1_unpaired.fq.gz > ${ID}_s1.sam
$src/bwa-0.7.17/bwa mem -t 16 -M $ref/genome.fa ${ID}_2_unpaired.fq.gz > ${ID}_s2.sam
picard MergeSamFiles \
TMP_DIR=${wkdir}/tmp \
INPUT=${ID}_pe.sam \
INPUT=${ID}_s1.sam \
INPUT=${ID}_s2.sam \
OUTPUT=${ID}.bam
rm ${ID}*.sam
# #===========================
else
echo ${ID} "R2 noexist"
java -jar $src/Trimmomatic-0.38/trimmomatic-0.38.jar SE -threads 16 $wkdir/${ID}*R1*.fastq.gz ${ID}.trimmed.fq.gz LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
$src/bwa-0.7.17/bwa mem -t 16 -M $ref/genome.fa ${ID}.trimmed.fq.gz > ${ID}_s1.sam
picard MergeSamFiles \
INPUT=${ID}_s1.sam \
OUTPUT=${ID}.bam
rm ${ID}*.sam
fi
picard AddOrReplaceReadGroups \
TMP_DIR=${wkdir}/tmp \
INPUT=${ID}.bam \
OUTPUT=${ID}.add.bam \
RGID=group1 RGLB=lib1 RGPL=illumina RGPU=unit1 RGSM=${ID}
picard SortSam \
TMP_DIR=${wkdir}/tmp \
INPUT=${ID}.add.bam \
OUTPUT=${ID}.add.sort.bam \
SORT_ORDER=coordinate
picard MarkDuplicates \
INPUT=${ID}.add.sort.bam \
OUTPUT=${ID}.add.dedup.bam \
METRICS_FILE=metrics.txt \
REMOVE_DUPLICATES=true
picard BuildBamIndex \
TMP_DIR=${wkdir}/tmp \
INPUT=${ID}.add.dedup.bam
# # Not essential ===========================
gatk BaseRecalibrator \
-R $ref/genome.fa \
-I ${ID}.add.dedup.bam \
--known-sites $ref/hg19/dbsnp_138.hg19.vcf \
--known-sites $ref/hg19/1000G_phase1.snps.high_confidence.hg19.sites.vcf \
-O ${ID}.recal.grp \
-L $bedfile
gatk ApplyBQSR \
-R $ref/genome.fa \
-I ${ID}.add.dedup.bam \
-bqsr ${ID}.recal.grp \
-O ${ID}.recal.bam \
-L $bedfile
# # ===========================
# # Singel Sample ===========================
gatk HaplotypeCaller \
-R $ref/genome.fa \
-I ${ID}.recal.bam \
-O ${ID}.raw.g.vcf \
-ERC GVCF \
-L $bedfile
# # =========================================
# # Multiple Sample ===========================
gatk GenotypeGVCFs \
-R $ref/genome.fa \
-V ${ID}.raw.g.vcf \
--include-non-variant-sites true \
-O ${ID}_genotype.vcf \
-L $bedfile
gatk SelectVariants \
-R $ref/genome.fa \
-V ${ID}_genotype.vcf \
--select-type-to-include SNP \
--select-type-to-include NO_VARIATION \
-O ${ID}_raw_snps.vcf \
-L $bedfile
#===========================
gatk VariantFiltration \
-V ${ID}_raw_snps.vcf \
--filter-expression "QD < 2.0 || FS > 200.0 || SOR > 10.0 || MQRankSum < -12.5 || ReadPosRankSum < -8.0 || DP < 10" \
--filter-name "Filter" \
-O ${ID}.filter.vcf
cat ${ID}.filter.vcf | grep -v '\./\.' > ${ID}.filter.gt.vcf
done
gatk 分析流程 2020-06-02
?著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事?!?“怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...