质控 过滤 比对 去除宿主
# 云硬盘挂在100G
virtio-disk-bxgzkldx
/dev/disk/by-id/virtio-disk-bxgzkldx /data ext4 defaults 0 0
# uniport数据库下载
wget https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping_selected.tab.gz
wget https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz
# GDTB数据库下载
wget https://data.gtdb.ecogenomic.org/releases/latest/auxillary_files/gtdbtk_data.tar.gz
***fastqc 质控
mkdir ./qc
singularity exec ../../software/MetaGenome.sif fastqc --outdir ./qc --threads 4 ../../dataFQ/A1_1.fq.gz ../../dataFQ/A1_2.fq.gz ../../dataFQ/A2_1.fq.gz ../../dataFQ/A2_2.fq.gz
out .html 质控报告 Per base sequence quality Q30
***fastp 质量过滤
mkdir -p clean_data
singularity exec ../../software/MetaGenome.sif fastp --thread 4 -i ../../dataFQ/A1_1.fq.gz -I ../../dataFQ/A1_2.fq.gz -o clean_data/A1_1.fq.gz -O clean_data/A1_2.fq.gz -j clean_data/A1.fastp.json -h clean_data/A1.fastp.html
singularity exec ../../software/MetaGenome.sif fastp --thread 4 -i ../../dataFQ/A2_1.fq.gz -I ../../dataFQ/A2_2.fq.gz -o clean_data/A2_1.fq.gz -O clean_data/A2_2.fq.gz -j clean_data/A2.fastp.json -h clean_data/A2.fastp.html
*** mutiqc 质控结果汇总
singularity exec MetaGenome.sif multiqc ./clean_data/
---去除宿主序列---
****** bowtie2 -构建index
ln -s ../../data/genome.fa
singularity exec ../../software/MetaGenome.sif bowtie2-build \
genome.fa \ # 基因组序列
genome.db # 输出index前缀
****** 比对
singularity exec ../../software/MetaGenome.sif bowtie2 \
--threads 4 \
-x ./genome.db \
-1 ../01.quality/clean_data/A1_1.fq.gz \
-2 ../01.quality/clean_data/A1_2.fq.gz \
-S A1.sam \
2>A1.map.log
****** 去除宿主数据
singularity exec ../../software/MetaGenome.sif samtools view \
-f 12 \ # 去除比对上的reads
A1.sam \ # 输入,sam文件
>A1.unmap.bam # 输出bam格式文件
****** bam转换回fq格式
singularity exec ../../software/MetaGenome.sif samtools fastq \
-1 A1_1.clean.fq.gz \ # 输出,fq1
-2 A1_2.clean.fq.gz \ # 输出,fq2
-s A1_singleton.clean.fq.gz \ # 输出,单端数据
A1.unmap.bam # 输入,bam文件
------------------
比对&去除宿主数据&bam转换回fq格式
singularity exec ../../software/MetaGenome.sif bowtie2 --threads 4 -x ./genome.db -1 ../01.quality/clean_data/A1_1.fq.gz -2 ../01.quality/clean_data/A1_2.fq.gz 2>A1.map.log | \
singularity exec ../../software/MetaGenome.sif samtools view -f 12 | \
singularity exec ../../software/MetaGenome.sif samtools fastq -1 A1_1.clean.fq.gz -2 A1_2.clean.fq.gz -s A1_singleton.clean.fq.gz