本文接bismark上游分析:http://08643.cn/p/9f6939d1588a
下文主要是:1、找到差异甲基化区域;2、注释差异甲基化区域
library(methylKit)
library(genomation)
library(edmr)
library(GenomicRanges)
library(IRanges)
library(mixtools)
library(data.table)
library(TxDb.Mmusculus.UCSC.mm10.ensGene)
library(GenomicFeatures)
setwd("/Users/apple/zhouyh56")
#cat M4.txt|grep chr >M4.filter.txt(去掉了奇怪的染色体)
#M4和M7.txt均为带有甲基化位点的数据,但并未做差异化分析
file.list.M<-list(
"M7.filter.txt",
"M4.filter.txt")
myobj.M<-methRead(
file.list.M,
sample.id = list("M7.filter",
"M4.filter"),
assembly = "mm10",
treatment = c(0,1),
context = "CpG"
)
#过滤数据,减少PCR影响
filtered.myobj.M=filterByCoverage(myobj.M,lo.count=10,lo.perc=NULL, hi.count=NULL,hi.perc=99.9)
#计算DML
meth.M<-unite(filtered.myobj.M, destrand=FALSE)#将两组数据合为一体
myDiff.M<-calculateDiffMeth(meth.M)#计算两组数据之间的差异化甲基化位点
myDiff25p.M<-getMethylDiff(myDiff.M,difference = 25,qvalue = 0.01,type = "all")
myDiff25p.hyper.M=getMethylDiff(myDiff.M,difference=25,qvalue=0.01,type="hyper")
myDiff25p.hypo.M=getMethylDiff(myDiff.M,difference=25,qvalue=0.01,type="hypo")
#methylKit计算DMR
tiles.M=tileMethylCounts(filtered.myobj.M,win.size=1000,step.size=1000)
meth.tiles.M=unite(tiles.M, destrand=FALSE)
myDiff.tiles.M<-calculateDiffMeth(meth.tiles.M)
myDiff25p.tiles.M<-getMethylDiff(myDiff.tiles.M,difference = 25,qvalue = 0.01,type = "all")
myDiff25p.hyper.tiles.M=getMethylDiff(myDiff.tiles.M,difference=25,qvalue=0.01,type="hyper")
myDiff25p.hypo.tiles.M=getMethylDiff(myDiff.tiles.M,difference=25,qvalue=0.01,type="hypo")
##终于注释上了DMR
library(genoset)
library(RSQLite)
library(methyAnalysis)
myDiff25p.tiles.M.R=as(myDiff25p.tiles.M,"GRanges")#annotateDMRInfo这个函数只接受GRanges的文件格式,所以就用methylKit包里的这个功能转换一下
DMRInfo.ann <- annotateDMRInfo(myDiff25p.tiles.M.R, 'TxDb.Mmusculus.UCSC.mm10.knownGene')
#导出这一步比较坑,官方文件里的语句用了报错
write.csv(DMRInfo.ann$sigDMRInfo,"zhou.csv",sep = ",")#这是我自己摸索的,效果还不错
对DMR的功能注释(注释启动子、外显子、内含子等)使用的是regionCounts函数
#注释
gene.obj.M=readTranscriptFeatures("/Users/apple/zhouyh56/mm10_anno.bed.txt")
diffAnn.M<-annotateWithGeneParts(as(myDiff25p.M,"GRanges"),gene.obj.M)
diffAnn.hyper.M<-annotateWithGeneParts(as(myDiff25p.hyper.M,"GRanges"),gene.obj.M)
diffAnn.hypo.M<-annotateWithGeneParts(as(myDiff25p.hypo.M,"GRanges"),gene.obj.M)
#功能注释
promoters=regionCounts(meth.M,gene.obj.M$promoters)
exons=regionCounts(meth.M,gene.obj.M$exons)
introns=regionCounts(meth.M,gene.obj.M$introns)
TSSes=regionCounts(meth.M,gene.obj.M$TSSes)
也可以自己设置启动子区间范围
#自己设定promotor区间范围
txdb=TxDb.Mmusculus.UCSC.mm10.ensGene
promoters.GR=promoters(txdb,upstream=1000,downstream=1000)
elementMetadata(promoters.GR)=data.frame(name=elementMetadata(promoters.GR)[,2],stringsAsFactors = FALSE)
也可以用edmr包计算DMR(比bismark包本身更好),但由于其注释文件在官网无法下载,这里暂且留坑