STEP1:了解数据特征
rm(list = ls())
data_test = datasets::attitude
# 因为后面想要做百分比的堆叠柱状图,先查看这个数据适不适合
statistics = apply(data_test, 1, sum) # 得到每个样本的观测值总和
plot(statistics)
# 每个样本的累加值不相等,不能直接用来做百分比柱状图,需要转换下
# 不过这段仅仅是为了作图好看,已经准备好数据的可以不看下面的处理
data_percent = data.frame() # 建立空数据框
for (n in 1:30) {
data_percent = rbind( data_percent, data_test[n,] / statistics[n] )
}
# 再来看下,每个样本总和都等于1,现在符合要求了
statistics = apply(data_percent, 1, sum)
plot(statistics)
# 再加上样本的命名信息,方便看图,已有命名的请忽略
data_percent$names = c(LETTERS[seq( from = 1, to = 15 )],
letters[seq( from = 1, to = 15 )])
STEP2:整理数据(变成长数据)
#作图前有个很重要的前置动作,要把宽矩阵转换为长矩阵
#(具体名词解释可以百度,关键原因是计算机和人的识别习性是不同的)
library(reshape2)
data_plot = melt(data_percent)
colnames(data_plot) = c('name','attitude','percent')
group = c( rep('Upper',15), rep('Lower',15))
data_plot$group = rep(group,7)
STEP3:画图
STEP3.1:基本图形
library(ggplot2)
p = ggplot( data_plot, aes( x = name, weight = percent, fill = attitude))+
geom_bar( position = "stack");p
# 如果把 "stack" 改成 "dodge",可以变成分组柱状图
p = ggplot( data_plot, aes( x = name, weight = percent, fill = attitude))+
geom_bar( position = "dodge");p
STEP3.2:改颜色
#改颜色
library(ggsci)
p + scale_fill_nejm()
p + scale_fill_manual( values = rainbow(7))
# 还可以自定义
p + scale_fill_manual( values = c('yellow','green','red','blue','brown','black','blue'))
STEP3.3:改标签
# 顺带,可以把标签给改了
p + xlab('people') + ylab('percent') + scale_fill_nejm()
STEP3.4:数值排序
#排序的问题,
#如果我想调整不同类型柱子的顺序,让他们按大小排序,可以用factor 函数
order_x = apply( data_percent[,1:7], 2, sum) # 查看各种 attitude 的总和
order_x = order_x[order(order_x, decreasing = T)] # decreasing = T 代表是倒序
order_x # 看一下,是从大到小排着的
# 此时 data_plot数据框里面的 attitude 就按照给定的 levels 排序了
data_plot$attitude = factor(data_plot$attitude,
levels = names(order_x) ,
ordered = T )
p2 = ggplot( data_plot,aes(x = name, weight = percent, fill = attitude))+
geom_bar(position = "stack")
p2
STEP3.5:样本排序
# 还可以给样本排序,同样的道理
names = c(LETTERS[seq( from = 1, to = 15 )],
letters[seq( from = 1, to = 15 )])#拟定的样本顺序
data_plot$name = factor(data_plot$name,
levels = names ,
ordered = T )
p3 = ggplot(data_plot,aes(x = name, weight = percent, fill = attitude))+
geom_bar(position = "stack")
p3
STEP3.6:分组展示
# 可以考虑分组展示
p3 + facet_wrap(~group, scales = 'free', nrow = 2)
STEP3.7:用箱图查看整体分布情况
# 可以查看各种attitude的整体情况
ggplot(data_plot, aes(x = attitude, y = percent,
fill = attitude))+
geom_boxplot()
STEP3.8:线图
# 另外还可以做成线图,不过这个数据做出来不好看
ggplot(data_plot, aes(x =name, y = percent,
group =attitude, color = attitude))+
geom_line()+
scale_color_nejm()
STEP3.8:在堆叠图上标数字(大概就是这个意思,图太丑了)
p = ggplot( data_plot, aes( x = name, weight = percent, fill = attitude))+
geom_bar( position = "stack")+
geom_text(aes(label = percent,y=percent),
position = position_stack(vjust = 0.5), size = 1.5);p
整理自
https://cloud.tencent.com/developer/article/1819219
https://www.javaroad.cn/questions/124111
https://cloud.tencent.com/developer/ask/109270
http://08643.cn/p/ac615ba65ab6