exampledata%>% filter(!is (A_REF,(n =,n,..., )))%>% select(A_REF,labels) plot_data< - exampledata%>% filter(!is.na(V101), !is.na(V43))%>% left_join(标签,by =A_REF) plot_data< - plot_data%>% group_by )%>% summary('5 Sehr unzufrieden`​​ = sum(ifelse(V43 ==5 Sehr unzufrieden,1,0))/ n(),`4` = sum ifelse(V43 ==4,1,0))/ n(),`3` = sum(ifelse(V43 ==3,1,0))/ n(),`2` = sum(ifelse(V43 ==2,1,0))/ n(),`1 Sehr zufrieden`​​ = sum(ifelse(V43 ==1 Sehr zufrieden, (key = Rating,value = prop,-labels) plot_data $ labels< - factor(plot_data $标签) ) plot_da ta $ Rating< - factor(plot_data $ Rating)%>%fct_rev() #剧情===================== ============ ggplot(plot_data,aes(x = labels,y = prop,fill = Rating))+ geom_col()+ scale_y_continuous( labels = percent :: percent,breaks = c(0,0.2,0.4,0.6,0.8,1))+ labs(y = NULL,x = NULL,fill = NULL)+ ggtitle( paste(attr(exampledata,variable.labels)[77]))+ theme_classic()+ geom_text(aes(label = if_else(prop> 0.02,scales :: percent(round(prop,2)),NULL)),position = position_fill(vjust = 0.5))+ coord_flip() Data 结构(list(exampledata.V101 = structure( c(2L,NA,2L,2L, 2L,2L,1L,1L,1L,2L,1L,2L,2L,NA,2L,2L,2L,1L,2L,NA, NA,1L,1L,2L,NA,2L,2L,2L,NA,2L,2L,2N,1L,2L,2L,2L, 1L,2L,2L,2L,2L,2L,2L,2L,2L,2L,2L,2L,2L,2L,2L,2L,1L, 1L,2L,1L,2L,2L,2L,2L,1L,2L,2L,2L,1L,2L,2L,1L,2L,2L,2L, 1L,2L,1L,NA,2L,2L,2L,2L,NA,2L,1L,2L,2L ),。标签= c(Weiblich,Männlich ),exampledata.A_REF = structure(c(18L, 18L,18L,18L,18L,17L,18L,18L,18L,18L,18L,18L,16L,18L, 18L,18L, 18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L, 18L, 16L,18L,18L,17L,18L, 18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L, 16L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L,18L, 18L,16L,18L, 16L,16L,18L,18L,18L,17L,16L,18L),.Label = C( 德语证书EINES Aufbau-奥德Ergänzungsstudiums, 的LA Berufliche Schulen , LA Sonderschule, LA体育馆, LA Haupt- UND实科中学, LA Grundschule, Künstlerischer/ musischer Abschluss, Kirchlicher Abschluss,的国家考试(指数ohne Lehramt) , 的Diplom专业院校,的Diplom我的Gesamthochschulen, 的DiplomUniversität大学,的Diplom II的Gesamthochschulen, Sonstiges, 推广, 国家考试, 魔导师, 的Diplom 主,学士),class =factor),exampledata.V43 = structure(c(3L, 5L,4L,4L,4L,4L,4L,5L,5L, 4L,4L,3L,2L,3L,3L,2L,4L,4L,4L,4L,4L,4L,4L,5L,5L,4L,4L,4L, NA, NA ,3L,5L,2L,4L,5L,4L,4L,5L,5L,4L,NA,NA,4L,NA,3L, 4L,5L,5L,2L,4L,4L,3L, 4L,4L,3L,5L,4L,5L,NA,4L, NA,4L,NA,4L,5L,4L,NA,5L,NA,4L,4L,4L, ,5L, 4L,4L,4L,4L,4L,3L,3L,4L,2L,4L,4L,4L,3L,4L,NA,4L, 5L,5L,4L), .Label = c(5 Sehr unzufrieden,4,3,2,1 Sehr zufrieden),class =factor)),.Names = c(exampledata。 V101,exampledata.A_REF,exampledata.V43),row.names = c(NA,100L ),class =data.frame) ggplot2 为你进行摘要要么是有限的,要么是很难以你想要的方式显示。 库(tidyverse)库(forcats) 因为在绘制 ggplot2 之前最好总结一下你的数据,以下代码将计算每组 label 选择了一个特定的答案。在最后一步中,我将数据从宽变为长,这样所有要绘制的比例都在同一个变量中(我称之为 prop )。 pre $ plot_data< - plot_data%>%group_by(labels)%>%汇总('5 Sehr unzufrieden`​​ = sum (V43 ==5 Sehr unzufrieden,1,0))/ n(),`4` = sum(ifelse(V43 ==4,1,0))/ n ,`3` = sum(ifelse(V43 ==3,1,0))/ n(),`2` = sum(ifelse(V43 ==2,1 ,0))/ n(),`1 Sehr zufrieden`​​ = sum(ifelse(V43 ==1 Sehr zufrieden,1,0))/ n())%>% collect(key = Rating,value = prop,-labels) 最好将分类变量设置为操纵因素,比如说顺序和颜色,所以这就是以下内容。最初,我的代码在上面的 gather 函数中有缩放标签(我称之为 Rating >)以相反的顺序因此我使用 forcats 程序包中的 fct_rev 将其撤消。 plot_data $ labels< - factor(plot_data $标签) plot_data $ Rating< - factor(plot_data $ Rating)%> ;%fct_rev() 对于下面的图表,我只做了一些更改。最值得注意的是我使用 geom_col 而不是 geom_bar 。在后台, geom_col 与 geom_bar(stat =identity)相同 - 键入更快。我们主要是告诉 ggplot2 来按原样绘制数据图表,而不是将其视为原始数据。但是,我确实需要指定 y 唯美性以指示我想要绘制的数据,所以我指定使用 prop ggplot 调用中的$ c>变量。 #Plot = ================================ ggplot(plot_data,aes(x = labels,y = prop, fill = rating))+ geom_col()+ scale_y_continuous(labels = scales :: percent,breaks = c(0,0.2,0.4,0.6,0.8,1))+ labs (y = NULL,x = NULL,fill = NULL)+ ggtitle(paste(attr(exampledata,variable.labels)[77]))+ theme_classic()+ geom_text(aes(label = if_else(prop> 0.01,scales :: percent(round(prop,2)),NULL)),position = position_fill(vjust = 0.5))+ coord_flip() 我改变的唯一其他行是上面的 geom_text 调用。我添加了一个 if_else 函数,以便它显示标签(如果大于1%)或不大于(1%或更小)。另外,我四舍五入百分比,以便使用 round 函数没有任何小数。请记住,您需要舍入到2个小数点。 ProblemThe current percentages in the bar are calculate with the total amount of data. I want to each stack to have a fully 100%. (Solved)Also the percentages should be rounded to the nearest integer. (Solved)Edit: Remove all percentages below or equal to 1. (Solved)Edit2: Make sure no labels are overlapping.I've been googling for a while now. It seems like there isn't a proper way to prevent labels overlapping.Possible solutions I discovered:Flip the plotAdd angle() to rotate the labels"Manually" calculate each positionMake use of check_overlap = TRUECurrent StateMy Code so far# Load libraries & packages =================================library("ggplot2")library("scales")library("dplyr")library("foreign")library("tidyverse")library("forcats")# Data setup =================================spss_file_path <- "D:\\Programming\\Testing\\2017-03-15_data_import&ggplot2\\Beispieldatensatz(fiktiv).sav"exampledata <- read.spss(spss_file_path, use.value.labels = TRUE, to.data.frame = TRUE, reencode = TRUE)exampledata$V43 <- factor(exampledata$V43, levels = c(1,2,3,4,5), labels = c("1 Sehr zufrieden","2","3","4", "5 Sehr unzufrieden"))exampledata$V43 <- factor(exampledata$V43, levels = rev(unique(levels(exampledata$V43))))exampledata$A_REF <- factor(exampledata$A_REF, levels = rev(unique(levels(exampledata$A_REF))))exampledata$V101 <- factor(exampledata$V101, levels = rev(unique(levels(exampledata$V101))))labels <- exampledata %>% filter(!is.na(V101), !is.na(V43)) %>% count(A_REF) %>% mutate(labels = paste(A_REF,"(n=", n, ")")) %>% select(A_REF, labels)plot_data <- exampledata %>% filter(!is.na(V101), !is.na(V43)) %>% left_join(labels, by = "A_REF")plot_data <- plot_data %>% group_by(labels) %>% summarize(`5 Sehr unzufrieden` = sum(ifelse(V43 == "5 Sehr unzufrieden", 1, 0)) / n(), `4` = sum(ifelse(V43 == "4", 1, 0)) / n(), `3` = sum(ifelse(V43 == "3", 1, 0)) / n(), `2` = sum(ifelse(V43 == "2", 1, 0)) / n(), `1 Sehr zufrieden` = sum(ifelse(V43 == "1 Sehr zufrieden", 1, 0)) / n()) %>% gather(key = Rating, value = prop, -labels)plot_data$labels <- factor(plot_data$labels)plot_data$Rating <- factor(plot_data$Rating) %>% fct_rev()# Plot =================================ggplot(plot_data, aes(x = labels, y = prop, fill = Rating)) + geom_col() + scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) + labs(y=NULL, x=NULL, fill=NULL) + ggtitle(paste(attr(exampledata, "variable.labels")[77])) + theme_classic() + geom_text(aes(label = if_else(prop > 0.02, scales::percent(round(prop, 2)), NULL)), position = position_fill(vjust=0.5)) + coord_flip()Datastructure(list(exampledata.V101 = structure(c(2L, NA, 2L, 2L,2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L, NA,NA, NA, 1L, 1L, 2L, NA, 2L, 2L, 2L, NA, 2L, 2L, NA, NA, 1L, NA,2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, NA, NA, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, NA, 1L, NA, 1L, NA,1L, 2L, NA, NA, 2L, NA, 1L, 2L, 2L, NA, 2L, NA, 2L, 2L, 1L, 2L,1L, 2L, 1L, 1L, 2L, 1L, NA, 2L, 2L, 2L, 2L, NA, 2L, 1L, 2L, 2L), .Label = c("Weiblich", "Männlich"), class = "factor"), exampledata.A_REF = structure(c(18L,18L, 18L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L,18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L, 18L, 16L, 18L,16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 16L, 18L,18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 18L, 18L,16L, 18L, 16L, 18L, 18L, 16L, 16L, 18L, 18L, 18L, 18L, 18L, 18L,18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L,16L, 16L, 18L, 18L, 18L, 17L, 16L, 18L), .Label = c("Zertifikat eines Aufbau- oder Ergänzungsstudiums","LA Berufliche Schulen", "LA Sonderschule", "LA Gymnasium", "LA Haupt- und Realschule","LA Grundschule", "Künstlerischer/musischer Abschluss", "Kirchlicher Abschluss","Staatsexamen (ohne Lehramt)", "Diplom Fachhochschule, Diplom I an Gesamthochschulen","Diplom Universität, Diplom II an Gesamthochschulen", "Sonstiges","Promotion", "Staatsexamen", "Magister", "Diplom", "Master","Bachelor"), class = "factor"), exampledata.V43 = structure(c(3L,5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 4L, 3L, 3L, 2L, NA, 4L, 5L, 5L,4L, 4L, 4L, 4L, NA, 2L, 4L, 3L, 5L, 4L, 4L, 4L, NA, 4L, 4L, NA,NA, 3L, 5L, 2L, 4L, 5L, 4L, 4L, 5L, 5L, 4L, NA, NA, 4L, NA, 3L,4L, 5L, 5L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 5L, 4L, 5L, NA, 4L,NA, 4L, NA, 4L, 5L, 4L, NA, 5L, NA, 4L, 4L, 4L, NA, 4L, NA, 5L,4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 4L, 4L, 4L, 3L, 4L, NA, 4L,5L, 5L, 4L), .Label = c("5 Sehr unzufrieden", "4", "3", "2","1 Sehr zufrieden"), class = "factor")), .Names = c("exampledata.V101","exampledata.A_REF", "exampledata.V43"), row.names = c(NA, 100L), class = "data.frame") 解决方案 It's usually preferable to manipulate your data into summarized data before charting it. I find that trying to have ggplot2 do the summarization for you is either limited or difficult to have it shown the way you want.library(tidyverse)library(forcats)Because it's best to summarize your data before plotting it in ggplot2, the following bit of code calculates the proportion withing each group of label that selected a particular answer on the scale. In the final step I turned the data from wide to long, so that all the proportions to be charted are in the same variable (which I call prop).plot_data <- plot_data %>% group_by(labels) %>% summarize(`5 Sehr unzufrieden` = sum(ifelse(V43 == "5 Sehr unzufrieden", 1, 0)) / n(), `4` = sum(ifelse(V43 == "4", 1, 0)) / n(), `3` = sum(ifelse(V43 == "3", 1, 0)) / n(), `2` = sum(ifelse(V43 == "2", 1, 0)) / n(), `1 Sehr zufrieden` = sum(ifelse(V43 == "1 Sehr zufrieden", 1, 0)) / n()) %>% gather(key = Rating, value = prop, -labels)It's preferable that categorical variables are set as factors for manipulating, say, the order and the colours, so this is what the following does. Initially, my code had the scale labels (which I called Rating in the gather function above) go in the reverse order than what you had, so I'm using fct_rev from the forcats package to reverse it back.plot_data$labels <- factor(plot_data$labels)plot_data$Rating <- factor(plot_data$Rating) %>% fct_rev()For the chart below, I just made a couple of changes. The most notable is that I'm using geom_col instead of geom_bar. In the background, geom_col is the same as geom_bar(stat = "identity") - it's just quicker to type. We're essentially telling ggplot2 to chart the data as is instead of treating it like raw data. 