所有的命令粘貼于此抵碟,用于快速完成分析任務(wù)桃漾。具體軟件參數(shù),見
#!/bin/bash
#復(fù)制下機(jī)數(shù)據(jù)到新的文件夾data ,盡量避免操作原始文件~/disk/lyb/
find ./Cleandata -name '*fq.gz'|xargs -i cp {} ./data
#以下內(nèi)容運(yùn)行目錄 ~/disk/lyb/data/
#1.質(zhì)控
fastqc *.fq.gz -t 8
bg1='RNA_R1.fq.gz'
bg2='RNA_R2.fq.gz'
bef=(NS-1 NS-2 NS-3 WT-1 WT-2 WT-3)
for ((i=0;i<6;i++));
do
inA1=${bef[$i]}$bg1;
inA2=${bef[$i]}$bg2;
out1=${bef[$i]}"paired-R1.fq.gz";
out2=${bef[$i]}"paired-R2.fq.gz";
unpaired1=${bef[$i]}"unpaired-R1.fq.gz";
unpaired2=${bef[$i]}"unpaired-R2.fq.gz";
java -jar /home/guo/tool/Trimmomatic-0.38/trimmomatic-0.38.jar PE -threads 12 -phred33 $inA1 $inA2 $out1 $unpaired1 $out2 $unpaired2 ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
echo $inA1,$inA2,$out1,$out2,$unpaired1,$unpaired2;
done
#運(yùn)行目錄是 /disks/backup/chaim/maize/
/home/chaim/disk/soft/hisat2/extract_exons.py Zea_mays.B73_RefGen_v4.42.gtf > genome.exon
/home/chaim/disk/soft/hisat2/extract_splice_sites.py Zea_mays.B73_RefGen_v4.42.gtf > genome.ss
/home/chaim/disk/soft/hisat2/hisat2_extract_snps_haplotypes_VCF.py zea_mays.vcf> genome.snp
#2.1建立索引
hisat2-build -p 8 Zea_mays.B73_RefGen_v4.42.fa --ss genome.ss --exon genome.exon genome_tran &
#2.2比對
for((i=0;i<6;i++));
do
out1=${bef[$i]}"paired-R1.fq.gz";
out2=${bef[$i]}"paired-R2.fq.gz";
hisat2 -x /disks/backup/chaim/maize/genome_tran -p 16 -1 $out1 -2 $out2 -S ${bef[$i]}".map.sam" --dta-cufflinks --novel-splicesite-outfile ${bef[$i]}".nsplice"
done
#第3步:用samtool拟逮,格式轉(zhuǎn)換撬统,將sam轉(zhuǎn)換為bam(共6條)
for((i=0;i<6;i++));
do
samtools sort -@ 8 -o ${bef[$i]}".map.bam" ${bef[$i]}".map.sam" 2>${bef[$i]}"samtool_out"
done
#第4步裝配:用stringtie(共三輪)
#組裝轉(zhuǎn)錄本(6個分別比對到基因組)
for((i=0;i<6;i++));
do
stringtie ${bef[$i]}".map.bam" -G /disks/backup/chaim/maize/Zea_mays.B73_RefGen_v4.42.gtf -p 8 -o ${bef[$i]}".gtf" &
done
#合并各樣本(整合6個的結(jié)果成一個)
stringtie --merge -G /disks/backup/chaim/maize/Zea_mays.B73_RefGen_v4.42.gtf -p 8 -o merged.gtf NS-1.gtf NS-2.gtf NS-3.gtf WT-1.gtf WT-2.gtf WT-3.gtf 2>stringtie_merge &
#估計表達(dá)豐度(以第二輪的結(jié)果作為參考序列,6個分別比對)
stringtie ${bef[$i]}".map.bam" -G merged.gtf -p 8 -b ${bef[$i]}"_out" -e -o ${bef[$i]}"-st.gtf" &
#第5步 生成CSV文件
#python路徑
python2.7 /disks/backup/chaim/soft/prepDE.py -i gtf2
#第6步 deseq2進(jìn)行定量分析
source("https://bioconductor.org/biocLite.R")
biocLite("DESeq2")
#輸入數(shù)據(jù)
library(tidyverse)
library(DESeq2)
library(ggplot2)
#import data
#setwd("/home/chaim/disk/lyb/data/")
#setwd("/mnt/d/RNA-seq/")
setwd("D:/RNA-seq/")
countData <- as.matrix(read.csv("gene_count_matrix.csv",row.names="gene_id"))
condition <- factor(c(rep("NS",3),rep("WT",3)),levels = c("NS","WT"))
colData <- data.frame(row.names=colnames(countData),condition)
dds <- DESeqDataSetFromMatrix(countData = countData,colData = colData, design = ~ condition)
dds <- DESeq(dds)
#總體結(jié)果查看
res = results(dds)
res = res[order(res$pvalue),]
summary(res)
write.csv(res,file="All_results.csv")
table(res$padj<0.05)
#提取差異基因(DEGs)并進(jìn)行g(shù)ene Symbol注釋
diff_gene_deseq2 <- subset(res,padj<0.05 & abs(log2FoldChange)>1)
dim(diff_gene_deseq2)
write.csv(diff_gene_deseq2,file = "DEG_treat_vs_control.csv")
# resdata <- res
# threshold <- as.factor(ifelse(resdata$padj < 0.001 & abs(resdata$log2FoldChange) >= 2 ,ifelse(resdata$log2FoldChange >= 2 ,'Up','Down'),'Not'))
# ggplot(resdata,aes(x=log2FoldChange,y=-log10(padj),colour=threshold)) + xlab("log2(Fold Change)")+ylab("-log10(qvalue)") + geom_point(size = 0.5,alpha=1) + ylim(0,200) + xlim(-12,12) + scale_color_manual(values=c("green","grey", "red"))
# #安裝biomaRt包
# source("http://bioconductor.org/biocLite.R")
# biocLite("biomaRt")
# install.packages('DT')
# #用bioMart對差異表達(dá)基因進(jìn)行注釋
# library("biomaRt")
# listMarts()
#
# ensembl=useMart("ENSEMBL_MART_ENSEMBL")
# all_datasets <- listDatasets(ensembl)
# library(DT)
# datatable(all_datasets,options = list(searching=FALSE,pageLength=5,lengthMenu=c(5,10,15,20)))
#安裝clusterProfiler 用于GO/KEGG分析及GSEA
source("https://bioconductor.org/biocLite.R")
biocLite("clusterProfiler")
biocLite("DOSE")
require(DOSE)
library(DO.db)
library(clusterProfiler)
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("S4Vectors", version = "3.8")
#安裝annotationhub
if(!requireNamespace("BiocManager",quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("AnnotationHub", version = "3.8")
library(AnnotationHub)
require(AnnotationHub)
hub <- AnnotationHub()
query(hub,"zea mays")
maize <- hub[['AH66225']]
length(keys(maize))
columns(maize)
require(clusterProfiler)
bitr(keys(maize)[1],'GID',c("ACCNUM","ENTREZID","UNIGENE"),maize)
"ALIAS","EVIDENCE","EVIDENCELL",
#GO富集分析
#使用enrichGO
sample_genes <- keys(maize)
res=enrichGO(sample_genes,OrgDb=maize,pvalueCutoff=1,qvalueCutoff=1)
ego <- enrichGO(gene=row.names(diff_gene_deseq2),OrgDb = maize,keyType = "GENENAME",ont="MF")
ensids <- c("Zm00001d011037","Zm00001d035600","Zm00001d035599")
cols <- c("SYMBOL","GO")
select(maize,keys = ensids,columns = cols,keytype = "GENENAME")
#氣泡圖
dotplot(ego,font.size=5)
#網(wǎng)絡(luò)圖
enrichMap(ego,vertex.label.cex=1.2,layout=igraph::layout.kamada.kawai)
#GO圖額外安裝的包
biocLite("topGO")
biocLite("Rgraphviz")
plotGOgraph(ego)
#gseGO進(jìn)行GSEA分析
#快速匹配文件中唱歧,以gene開頭的行宪摧,并輸出其中的第3,12列內(nèi)容颅崩。
cat genome_table.txt |awk '$1 ~/gene/ {print $3,$12}' >gene_id