2020.11.10更新代码
emmm, 稍微想了下把那个辣眼睛的代码改的稍微不辣眼睛了一点...
果然需求是生产力驱动的第一要素.....以前没想过要整,现在又需求了,其实TBtools
就可以很方便的绘制,不过还是手痒想用R实现一下。就用ggplot2撸出来了。国际惯例,上图
方法和原理
其实就是把gff文件可视化一下,看GFF文件的介绍:陈连福大佬的博客-GFF3格式介绍
我们发现GFF3包含了基因整体信息,所以就想**根据gff文件信息,提取UTR和cds信息用ggplot2
的geom_segment
小片段一段一段的画出来。其实很简单。注意以下几个信息:
- strand,方向性。
- exon是否包含UTR。
所以鉴于这两点也是写了两个判断。直来直去的代码有点辣眼睛,不过没时间了,先解决问题,以后在慢慢美化?_?。
Script
更新后, 之前的不删除了,看最后能优化成什么样
drawGeneStructure = function(gff,size,lcolor){
names(gff) = c("chr","source","type","start","end","score","strand","phase","attributes")
chr = unique(gff$chr)
strand = unique(gff$strand)
a = filter(gff,type == "gene")$attributes
desc = strsplit(a,split = ";")%>%unlist(.)
tpm1 = filter(gff,type == "exon")
tpm3 = filter(gff,type == "five_prime_UTR"| type == "three_prime_UTR")
if (strand == "-"){
tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
end = tpm1$start,
start = tpm1$end,
y = 1)
xend = tpm2$end[1]-100
} else {
tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
start = tpm1$start,
end = tpm1$end,
y = 1)
xend = tpm2$end[1]+100
}
p = ggplot(tpm2,aes(x = start,y = y))+
geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
color = lcolor)+
geom_segment(data = tpm2,
mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
size = size)+
geom_segment(aes(x = tpm2$end[1],y = 0, xend = xend, yend = 0),
arrow = arrow(length = unit(0.2,"cm")),
color = "red")+
xlab(chr)+
theme(axis.ticks.y.left = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
axis.line.x.top = element_blank(),
panel.grid = element_blank(),
axis.text.y = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.line = element_line(colour = "black",size = 1),
legend.position = "none")
if(nrow(tpm3) != 0){
p = p +
geom_segment(data = tpm3,
mapping = aes(x = start,xend = end,y = 0, yend = 0),
size = size,
color = "grey")
} else {
p = p
}
return(p)
}
原始辣眼睛代码
drawGeneStructure = function(gff,size,lcolor){
names(gff) = c("chr","source","type","start","end","score","strand","phase","attributes")
chr = unique(gff$chr)
strand = unique(gff$strand)
a = filter(gff,type == "gene")$attributes
desc = strsplit(a,split = ";")%>%unlist(.)
tpm1 = filter(gff,type == "exon")
tpm3 = filter(gff,type == "five_prime_UTR"| type == "three_prime_UTR")
if (nrow(tpm3) != 0) {
if (strand == "-") {
tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
end = tpm1$start,
start = tpm1$end,
y = 1)
tpm3 = data.frame(type = paste(tpm3$type,seq(1:nrow(tpm3)),sep = ""),
end = tpm3$start,
start = tpm3$end)
p = ggplot(tpm2,aes(x = start,y = y))+
geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
color = lcolor)+
geom_segment(data = tpm2,
mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
size = size)+
geom_segment(data = tpm3,
mapping = aes(x = start,xend = end,y = 0, yend = 0),
size = size,
color = "grey")+
geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]-100, yend = 0),
arrow = arrow(length = unit(0.2,"cm")),
color = "red")+
xlab(chr)+
theme(axis.ticks.y.left = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
axis.line.x.top = element_blank(),
panel.grid = element_blank(),
axis.text.y = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.line = element_line(colour = "black",size = 1),
legend.position = "none")
} else {
tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
start = tpm1$start,
end = tpm1$end,
y = 1)
p = ggplot(tpm2,aes(x = start,y = y))+
geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
color = lcolor)+
geom_segment(data = tpm2,
mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
size = size)+
geom_segment(data = tpm3,
mapping = aes(x = start,xend = end,y = 0, yend = 0),
size = size,
color = "grey")+
geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]+100, yend = 0),
arrow = arrow(length = unit(0.2,"cm")),
color = "red")+
xlab(chr)+
theme(axis.ticks.y.left = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
axis.line.x.top = element_blank(),
panel.grid = element_blank(),
axis.text.y = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.line = element_line(colour = "black",size = 1),
legend.position = "none")
}
} else{
if (strand == "-") {
tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
end = tpm1$start,
start = tpm1$end,
y = 1)
p = ggplot(tpm2,aes(x = start,y = y))+
geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
color = lcolor)+
geom_segment(data = tpm2,
mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
size = size)+
geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]-100, yend = 0),
arrow = arrow(length = unit(0.2,"cm")),
color = "red")+
xlab(chr)+
theme(axis.ticks.y.left = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
axis.line.x.top = element_blank(),
panel.grid = element_blank(),
axis.text.y = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.line = element_line(colour = "black",size = 1),
legend.position = "none")
} else {
tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
start = tpm1$start,
end = tpm1$end,
y = 1)
p = ggplot(tpm2,aes(x = start,y = y))+
geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
color = lcolor)+
geom_segment(data = tpm2,
mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
size = size)+
geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]+100, yend = 0),
arrow = arrow(length = unit(0.2,"cm")),
color = "red")+
xlab(chr)+
theme(axis.ticks.y.left = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
axis.line.x.top = element_blank(),
panel.grid = element_blank(),
axis.text.y = element_blank(),
plot.background = element_blank(),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.line = element_line(colour = "black",size = 1),
legend.position = "none")
}
}
return(p)
}
Usage
- 从
gff3
文件提取你要画的基因的结构,如果没有gff3,按照下面的自己写一个df也行:
# 提取
grep "Gh_A08G039000" gh.criv2.gff3
# 提取结果:
A08 EVM gene 4148620 4154669 . - . ID=Gh_A08G039000;Name=RH8;description=DEAD-box ATP-dependent RNA helicase 8 ;
A08 EVM mRNA 4148620 4154669 . - . ID=Gh_A08G039000.1;Parent=Gh_A08G039000
A08 EVM exon 4148620 4148965 . - . Parent=Gh_A08G039000.1
A08 EVM three_prime_UTR 4148620 4148965 . - . Parent=Gh_A08G039000.1
A08 EVM three_prime_UTR 4149091 4149130 . - . Parent=Gh_A08G039000.1
A08 EVM exon 4149091 4149209 . - . Parent=Gh_A08G039000.1
A08 EVM CDS 4149131 4149209 . - 1 Parent=Gh_A08G039000.1
A08 EVM exon 4149308 4149381 . - . Parent=Gh_A08G039000.1
A08 EVM CDS 4149308 4149381 . - 0 Parent=Gh_A08G039000.1
A08 EVM exon 4149465 4149553 . - . Parent=Gh_A08G039000.1
A08 EVM CDS 4149465 4149553 . - 2 Parent=Gh_A08G039000.1
A08 EVM exon 4150111 4150291 . - . Parent=Gh_A08G039000.1
A08 EVM CDS 4150111 4150291 . - 0 Parent=Gh_A08G039000.1
A08 EVM exon 4150367 4150618 . - . Parent=Gh_A08G039000.1
A08 EVM CDS 4150367 4150618 . - 0 Parent=Gh_A08G039000.1
A08 EVM exon 4151917 4152155 . - . Parent=Gh_A08G039000.1
A08 EVM CDS 4151917 4152155 . - 2 Parent=Gh_A08G039000.1
A08 EVM exon 4152683 4152917 . - . Parent=Gh_A08G039000.1
A08 EVM CDS 4152683 4152917 . - 0 Parent=Gh_A08G039000.1
A08 EVM exon 4153404 4153464 . - . Parent=Gh_A08G039000.1
A08 EVM CDS 4153404 4153464 . - 1 Parent=Gh_A08G039000.1
A08 EVM CDS 4154015 4154283 . - 0 Parent=Gh_A08G039000.1
A08 EVM exon 4154015 4154669 . - . Parent=Gh_A08G039000.1
A08 EVM five_prime_UTR 4154284 4154669 . - . Parent=Gh_A08G039000.1
- 画图
参数解释:- gff: 就是上面你提取出来的基因gff信息
- size: 就是图中exon的高度,size越大高度越大,建议
size = 4
- lcolor: 中间那条黑线的颜色,这个看你爱好
drawGeneStructure(gff = gff, lcolor = "black", size = 4)
完工...
为了保住小徽章我也是醉了....