本文介绍了ggplot2:如何定制点的颜色和形状?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧! 问题描述 29岁程序员,3月因学历无情被辞! 我正在尝试使用 ggplot2 创建一个带状图。以下是 tbl 的子集,它包含我正在使用的相关列,以及 dput 。 > tbl [,c('Study_ID','Probe_ID','Group1','Group2','LogFC','adj_P_Value','P_Value','CI_L','CI_R','Disease')] Study_ID Probe_ID Group1 Group2 LogFC adj_P_Value P_Value CI_L CI_R 1 GSE2461 220307_at男女-0.09017596 1.000000e + 00 5.662047e-01 -0.43955752 0.25920561 2 GSE2461 220307_at溃疡性结肠炎过敏性肠综合征0.08704844 1.000000e + 00 5.784053e -01 -0.26134341 0.43544028 3 GSE27887 220307_at nonlesional skin lesional skin -0.03501474 1.000000e + 00 4.409881e-01 -0.12677636 0.05674688 4 GSE27887 220307_at pretreatment posttreatment 0.01096914 1.000000e + 00 8.080366e-01 -0.08064105 0.10257932 5 GSE42296 7921677 Infliximab治疗前-0.03707265 1.000000e + 00 3.979403e-01 -0.12407201 0.04992672 6 GSE42296 7921677 Re sponder无反应者0.07644834 1.000000e + 00 1.505444e-01 -0.02849309 0.18138977 7 GSE42296 7921677类风湿性关节炎克罗恩病0.42318863 3.960125e-06 1.989713e-10 0.31076269 0.53561457 8 GSE58558 220307_at MF -0.11881801 1.000000e + 00 1.130180e-01 -0.26629675 0.02866072 9 GSE58558 220307_at非皮损皮损-0.18914128 1.000000e + 00 3.696739e-03 -0.31525660 -0.06302596 10 GSE58558 220307_at无响应者-0.14470319 1.000000e + 00 2.328062e -01 -0.38396386 0.09455748 11 GSE58558 220307_at本周12天1 -0.39619004 4.311942e-01 2.215798e-05 -0.57226227 -0.22011781 12 GSE58558 220307_本周2天1 -0.28765455 1.000000e + 00 8.753977e- 04 -0.45375957 -0.12154953 13 GSE59294 220307_at C Dupilumab 300 mg B Dupilumab 150 mg 0.16853309 1.000000 e + 00 1.140155e-01 -0.04273877 0.37980494 14 GSE59294 220307_at D安慰剂B Dupilumab 150 mg -0.18995566 1.000000e + 00 2.264691e-01 -0.50367856 0.12376724 15 GSE59294 220307_at NL皮肤LS皮肤0.01376129 1.000000e +00 9.041383e-01 -0.21711706 0.24463964 16 GSE59294 220307_at Pre Post 0.02234607 1.000000e + 00 8.069367e-01 -0.16235054 0.20704268 疾病 1肠易激综合征;溃疡性结肠炎 2肠易激综合征;溃疡性结肠炎 3特应性皮炎 4特应性皮炎 5克罗恩氏病;类风湿性关节炎 6克罗恩病;类风湿性关节炎 7克罗恩病;类风湿性关节炎 8特应性皮炎 9特应性皮炎 10特应性皮炎 11特应性皮炎 12特应性皮炎 13特应性皮炎 14特应性皮炎 15特应性皮炎 16特应性皮炎 以下是 dput : >输入(小滴(tbl [,c('Study_ID','Probe_ID','Group1','Group2','LogFC','adj_P_Value','P_Value','CI_L','CI_R','疾病')] ))结构(列表(Study_ID = c(GSE2461,GSE2461,GSE27887,GSE27887,GSE42296,GSE42296,GSE42296,GSE58558 GSE58558,GSE58558,GSE58558,GSE58558,GSE59294,GSE59294,GSE59294,GSE59294),Probe_ID = c(220307_at, 220307_at,220307_at,220307_at,7921677,7921677,7921677,220307_at,220307_at,220307_at,220307_at220307_at ,220307_at,220307_at,220307_at,220307_at),Group1 = c(男性,溃疡性结肠炎,非病变性皮肤,预处理 ,响应者,类风湿性关节炎,M,非皮损皮肤,响应者,第12周,第2周,C Dupilumab 300mg, D安慰剂,NL皮肤,Pre),组2 = c(女性,肠易激综合征,损伤皮肤,后处理治疗前,无应答者,克罗恩病,F,损伤皮肤,无反应者,第1天,第1天,B Dupilumab 150 mg Logford = c(-0.0901759558643281, 0.0870484364429408,-0.0350147376937934,0.0109691380052655, -0.0370726462749328,0.0764483363743359,0.423188628619509,-0.118818013184408,-0.118818013184408, -0.189141277685995,-0.144703191279992,-0.396190039768736,-0.28765454670704, 0.168533085440721,-0.189955660434197,0.0137612879743023,0.0223460675171673 ),adj_P_Value = c(1,1,1,1,1,1,1, 1,1,1,1,1),P_Value = c(0.566204678925109, 0.578405275354266,0.440988072013756,0.808036622723435,0.397940346528484, 0.150544373610059,1.98971262936634e-10,0.11301796668591,0.00369673863311212, 0.232806229179741,2.21579776371792e-05,0.000875397680320129, 0.114015475901252,0.226469133014055,0.904138332714553,0.806936684043 586 ),CI_L = c(-0.439557521861354,-0.261343410788222,-0.12677635951562, -0.0806410486876688,-0.124072011981945,-0.0284930943795223, 0.310762687356251,-0.26629674914578,-0.315256597358499,-0.383963864121397, -0.57226227039893,-0.453759565458485,-0.0427387734415052,-0.503678563834605, -0.217117064412363,-0.162350541147386),CI_R = C(0.259205610132698, 0.435440283674103,0.0567468841280329,0.1025793246982,0.0499267194320791, 0.181389767128194,0.535614569882768 ,0.0286607227769647,-0.0630259580134921, 0.0945574815614131,-0.220117809138542,-0.121549527955595,0.379804944322947, 0.12376724296621,0.244639640360967,0.207042676181721),疾病= c(过敏性肠综合征;溃疡性结肠炎,过敏性肠综合征;溃疡性结肠炎,特应性皮炎,特应性皮炎,克罗恩病;类风湿性关节炎,克罗恩氏病;类风湿性关节炎,克罗恩氏病;特应性皮炎,特应性皮炎,特应性皮炎,特应性皮炎,特应性皮炎,特应性皮炎 ,特应性皮炎,特应性皮炎)),.Names = c(Study_ID,Probe_ID,Group1,Group2,LogFC,adj_P_Value, P_Value,CI_L,CI_R,疾病),row.names = c(NA, -16L),class =data.frame) $ b $ b code> #test使用ggplot2 maxFC = max(as.numeric(as.character(tbl $ LogFC))) minFC = min(as.numeric(as.character(tbl $ LogFC ))) datasetList = tbl $ Study_ID hLines =(which(duplicated(datasetList)== FALSE) - 0.5) tbl $ ylab ,tbl $ Group1,sep =) p geom_vline(xintercept = log(0.5,2),size = 0.2)+ geom_vline(xintercept = log(2 / 3,2),size = 0.2)+ geom_vline(xintercept = log(1.5,2),size = 0.2)+ geom_vline(xintercept = log(2,2),size = 0.2)+ labs(title = tbl $ gene,y =Contrasts,x = bquote(〜Log [2]〜'(Fold Change)'))+ geom_errorbarh(aes(x = LogFC,xmin = CI_L,xmax = CI_R),height = .1)+ geom_point(aes(color = cut(adj_P_Value,c(-Inf,0.01,0.05,Inf )))+ scale_color_manual(name =P Value, values = c(( - Inf,0.01)=red,(0.01,0.05)) =橙,(0.05,Inf)=黑),标签= c( scale_shape_manual(values = c(4,15,19))+ coord_cartesian(xlim = c(min(-2,minFC),max(2,maxFC)))+ theme(axis.text.y = element_blank(),strip.text.y = element_text(angle = 180),#panel.grid.major = element_blank(),#panel.grid.minor = element_blank(), axis.line.y = element_blank(), axis.line.x = element_blank(),#panel.background = element_rect(fill ='white',color ='white'),#panel.grid = element_blank(), panel.spacing.y =单位(0.5,'lines'), axis.ticks.y = element_blank())+ facet_grid(Study_ID + ylab〜。,scales ='free',space ='free',switch ='both') p 基本上,点的实际位置是通过它们的 LogFC 值,但具有 adj_P_Value 的点应显示为红色圆圈,介于0.01和0.0之间5作为橙色方块,并且> = 0.05 作为黑色十字(即,我提供的数据不应该显示任何方块)。我的尝试是在 geom_point 中使用 cut ,但这似乎不起作用。颜色显示正确,但形状不正确。这一直在困扰我一段时间。如果有任何我打破的约定或标准(我可能是),请让我知道并提出一些可以实现我已经完成的事情。谢谢! 更新: #test使用ggplot2 maxFC = max(as.numeric(as.character(tbl $ LogFC))) minFC = min(as.numeric(as.character(tbl $ LogFC))) datasetList = tbl $ Study_ID hLines =(which(duplicated(datasetList)== FALSE) - 0.5) tbl $ ylab ,tbl $ Group1,sep =) tbl % mutate(colourgroup = case_when (。$adj_P_Value≤0.01〜1,。$ adj_P_Value> 0.01&。$ adj_P_Value 。$ adj_P_Value> = 0.05〜3 $ bp #geom_point + geom_vline(xintercept = log(0.5,2),size = 0.2)+ geom_vline(xintercept = log(2 / 3,2),size = 0.2)+ geom_vline = log(1.5,2),size = 0.2)+ geom_vline(xintercept = log(2,2),size = 0.2)+ labs (标题= tbl $基因,y =对比度,x = bquote(〜Log [2]〜'(倍数变化)'))+ geom_errorbarh(aes(x = LogFC,xmin = CI_L,xmax = CI_R),height = .1)+ geom_point()+ scale_color_manual(c('red','orange','black'))+ scale_shape_manual(c ,4))+ coord_cartesian(xlim = c(min(-2,minFC),max(2,maxFC)))+ theme(axis.text.y = element_blank(),strip。 text.y = element_text(angle = 180),#panel.grid.major = element_blank(),#panel.grid.minor = element_blank(), axis.line.y = element_blank(), axis.line.x = element_blank(),#panel.background = element_rect(fill ='white',color ='white'), #panel。 grid = element_blank(), panel.spacing.y = unit(0.5,'lines'), axis.ticks.y = element_blank())+ facet_grid(Study_ID + ylab〜 。,scale ='free',space ='free',switch ='both') p 解决方案添加你想要的因子列 library (dplyr) tbl< - tbl%>% mutate(colourgroup = case_when( adj_P_Value< = 0.01〜1, adj_P_Value> 0.01& adj_P_Value< 0.05〜2, adj_P_Value> = 0.05〜3)) p> aes(x = LogFC,y = Probe_ID,group = Study_ID) $ b $ p $ b $ $ $ $ $ $ $ $ $ aes(x = LogFC,y = Probe_ID, color = factor(colourgroup),shape = factor(colourgroup)) 和 scale_color_manual(values = c(red,orange,black))+ scale_shape_manual(values = c(1, 2,3)) 最小示例 这个最简单的 ggplot 命令适用于我。注意我有意地切换了 x 和 y 值, red 和 orange 可能难以区分 ggplot(df2, aes(x = Probe_ID,y = LogFC,color = factor(colourgroup),shape = factor(colourgroup)))+ geom_point()+ scale_color_manual(values = c(red,orange ,black))+ scale_shape_manual(values = c(1,2,3)) I am trying to create a stripchart using ggplot2. Below is a subset of tbl which has the relevant columns I am using, along with a dput.> tbl[,c('Study_ID', 'Probe_ID', 'Group1','Group2','LogFC', 'adj_P_Value', 'P_Value', 'CI_L','CI_R','Disease')] Study_ID Probe_ID Group1 Group2 LogFC adj_P_Value P_Value CI_L CI_R1 GSE2461 220307_at Male Female -0.09017596 1.000000e+00 5.662047e-01 -0.43955752 0.259205612 GSE2461 220307_at ulcerative colitis irritable bowel syndrome 0.08704844 1.000000e+00 5.784053e-01 -0.26134341 0.435440283 GSE27887 220307_at nonlesional skin lesional skin -0.03501474 1.000000e+00 4.409881e-01 -0.12677636 0.056746884 GSE27887 220307_at pretreatment posttreatment 0.01096914 1.000000e+00 8.080366e-01 -0.08064105 0.102579325 GSE42296 7921677 Infliximab Before treatment -0.03707265 1.000000e+00 3.979403e-01 -0.12407201 0.049926726 GSE42296 7921677 Responder Nonresponder 0.07644834 1.000000e+00 1.505444e-01 -0.02849309 0.181389777 GSE42296 7921677 Rheumatoid Arthritis Crohn's Disease 0.42318863 3.960125e-06 1.989713e-10 0.31076269 0.535614578 GSE58558 220307_at M F -0.11881801 1.000000e+00 1.130180e-01 -0.26629675 0.028660729 GSE58558 220307_at non lesional skin lesional skin -0.18914128 1.000000e+00 3.696739e-03 -0.31525660 -0.0630259610 GSE58558 220307_at responder nonresponder -0.14470319 1.000000e+00 2.328062e-01 -0.38396386 0.0945574811 GSE58558 220307_at week 12 day 1 -0.39619004 4.311942e-01 2.215798e-05 -0.57226227 -0.2201178112 GSE58558 220307_at week 2 day 1 -0.28765455 1.000000e+00 8.753977e-04 -0.45375957 -0.1215495313 GSE59294 220307_at C Dupilumab 300 mg B Dupilumab 150 mg 0.16853309 1.000000e+00 1.140155e-01 -0.04273877 0.3798049414 GSE59294 220307_at D Placebo B Dupilumab 150 mg -0.18995566 1.000000e+00 2.264691e-01 -0.50367856 0.1237672415 GSE59294 220307_at NL skin LS skin 0.01376129 1.000000e+00 9.041383e-01 -0.21711706 0.2446396416 GSE59294 220307_at Pre Post 0.02234607 1.000000e+00 8.069367e-01 -0.16235054 0.20704268 Disease1 irritable bowel syndrome; ulcerative colitis2 irritable bowel syndrome; ulcerative colitis3 atopic Dermatitis4 atopic Dermatitis5 Crohn's Disease; Rheumatoid Arthritis6 Crohn's Disease; Rheumatoid Arthritis7 Crohn's Disease; Rheumatoid Arthritis8 Atopic Dermatitis9 Atopic Dermatitis10 Atopic Dermatitis11 Atopic Dermatitis12 Atopic Dermatitis13 atopic Dermatitis14 atopic Dermatitis15 atopic Dermatitis16 atopic DermatitisHere is the dput :> dput(droplevels(tbl[,c('Study_ID', 'Probe_ID', 'Group1','Group2','LogFC', 'adj_P_Value', 'P_Value', 'CI_L','CI_R','Disease')]))structure(list(Study_ID = c("GSE2461", "GSE2461", "GSE27887","GSE27887", "GSE42296", "GSE42296", "GSE42296", "GSE58558", "GSE58558","GSE58558", "GSE58558", "GSE58558", "GSE59294", "GSE59294", "GSE59294","GSE59294"), Probe_ID = c("220307_at", "220307_at", "220307_at","220307_at", "7921677", "7921677", "7921677", "220307_at", "220307_at","220307_at", "220307_at", "220307_at", "220307_at", "220307_at","220307_at", "220307_at"), Group1 = c("Male", "ulcerative colitis","nonlesional skin", "pretreatment", "Infliximab", "Responder","Rheumatoid Arthritis", "M", "non lesional skin", "responder","week 12", "week 2", "C Dupilumab 300 mg", "D Placebo", "NL skin","Pre"), Group2 = c("Female", "irritable bowel syndrome", "lesional skin","posttreatment", "Before treatment", "Nonresponder", "Crohn's Disease","F", "lesional skin", "nonresponder", "day 1", "day 1", "B Dupilumab 150 mg","B Dupilumab 150 mg", "LS skin", "Post"), LogFC = c(-0.0901759558643281,0.0870484364429408, -0.0350147376937934, 0.0109691380052655,-0.0370726462749328, 0.0764483363743359, 0.423188628619509, -0.118818013184408,-0.189141277685995, -0.144703191279992, -0.396190039768736, -0.28765454670704,0.168533085440721, -0.189955660434197, 0.0137612879743023, 0.0223460675171673), adj_P_Value = c(1, 1, 1, 1, 1, 1, 3.96012504622782e-06, 1,1, 1, 0.431194244819507, 1, 1, 1, 1, 1), P_Value = c(0.566204678925109,0.578405275354266, 0.440988072013756, 0.808036622723435, 0.397940346528484,0.150544373610059, 1.98971262936634e-10, 0.11301796668591, 0.00369673863311212,0.232806229179741, 2.21579776371792e-05, 0.000875397680320129,0.114015475901252, 0.226469133014055, 0.904138332714553, 0.806936684043586), CI_L = c(-0.439557521861354, -0.261343410788222, -0.12677635951562,-0.0806410486876688, -0.124072011981945, -0.0284930943795223,0.310762687356251, -0.26629674914578, -0.315256597358499, -0.383963864121397,-0.57226227039893, -0.453759565458485, -0.0427387734415052, -0.503678563834605,-0.217117064412363, -0.162350541147386), CI_R = c(0.259205610132698,0.435440283674103, 0.0567468841280329, 0.1025793246982, 0.0499267194320791,0.181389767128194, 0.535614569882768, 0.0286607227769647, -0.0630259580134921,0.0945574815614131, -0.220117809138542, -0.121549527955595, 0.379804944322947,0.12376724296621, 0.244639640360967, 0.207042676181721), Disease = c("irritable bowel syndrome; ulcerative colitis","irritable bowel syndrome; ulcerative colitis", "atopic Dermatitis","atopic Dermatitis", "Crohn's Disease; Rheumatoid Arthritis","Crohn's Disease; Rheumatoid Arthritis", "Crohn's Disease; Rheumatoid Arthritis","Atopic Dermatitis", "Atopic Dermatitis", "Atopic Dermatitis","Atopic Dermatitis", "Atopic Dermatitis", "atopic Dermatitis","atopic Dermatitis", "atopic Dermatitis", "atopic Dermatitis")), .Names = c("Study_ID", "Probe_ID", "Group1", "Group2", "LogFC","adj_P_Value", "P_Value", "CI_L", "CI_R", "Disease"), row.names = c(NA,-16L), class = "data.frame")Finally, here the code that I have so far.#test using ggplot2maxFC = max(as.numeric(as.character(tbl$LogFC)))minFC = min(as.numeric(as.character(tbl$LogFC)))datasetList = tbl$Study_IDhLines =(which(duplicated(datasetList) == FALSE) - 0.5)tbl$ylab <- paste(tbl$Group2," \U2192 ","\n", tbl$Group1, sep = "")p <- ggplot(data = tbl, aes(x = LogFC, y = Probe_ID, group = Study_ID)) + geom_vline(xintercept = log(0.5,2), size = 0.2) + geom_vline(xintercept = log(2/3,2), size = 0.2) + geom_vline(xintercept = log(1.5,2), size = 0.2) + geom_vline(xintercept = log(2,2), size = 0.2) + labs(title = tbl$gene, y = "Contrasts", x = bquote(~Log[2]~'(Fold Change)')) + geom_errorbarh(aes(x = LogFC, xmin = CI_L, xmax = CI_R), height = .1) + geom_point(aes(colour = cut(adj_P_Value, c(-Inf, 0.01, 0.05, Inf)))) + scale_color_manual(name = "P Value", values = c("(-Inf,0.01]" = "red", "(0.01,0.05)" = "orange", "(0.05, Inf]" = "black"), labels = c("<= 0.01", "0.01 < P Value <= 0.05", "> 0.05")) + scale_shape_manual(values = c( 4,15,19)) + coord_cartesian(xlim = c(min(-2,minFC),max(2,maxFC))) + theme(axis.text.y = element_blank(), strip.text.y = element_text(angle = 180), #panel.grid.major = element_blank(), #panel.grid.minor = element_blank(), axis.line.y = element_blank(), axis.line.x = element_blank(), #panel.background = element_rect(fill = 'white', colour = 'white'), #panel.grid = element_blank(), panel.spacing.y = unit(0.5,'lines'), axis.ticks.y = element_blank()) + facet_grid(Study_ID+ylab~ ., scales = 'free', space = 'free', switch = 'both')pBasically, the points actual position is determined via their LogFC value, but the points that have an adj_P_Value <= 0.01 should be displayed as red circles, in between 0.01 and 0.05 as orange squares, and >= 0.05 as black crosses (i.e. the data that I've provided shouldn't show any squares). My attempt as this was using cut in the geom_pointbut this doesn't seem to work. The colors are showing up correct, but the shapes are not. This has been bugging me for a while. If there's any conventions or standards that I'm breaking (which I probably am), please let me know and suggest something that would achieve exactly what I have already. Thanks!Update:#test using ggplot2maxFC = max(as.numeric(as.character(tbl$LogFC)))minFC = min(as.numeric(as.character(tbl$LogFC)))datasetList = tbl$Study_IDhLines =(which(duplicated(datasetList) == FALSE) - 0.5)tbl$ylab <- paste(tbl$Group2," \U2192 ","\n", tbl$Group1, sep = "")tbl <- tbl %>% mutate(colourgroup = case_when( .$adj_P_Value <= 0.01 ~ 1, .$adj_P_Value > 0.01 & .$adj_P_Value < 0.05 ~ 2, .$adj_P_Value >= 0.05 ~ 3 ) )p <- ggplot(data = tbl, aes(x = LogFC, y = Probe_ID, colour = colourgroup, shape = colourgroup)) + #geom_point() + geom_vline(xintercept = log(0.5,2), size = 0.2) + geom_vline(xintercept = log(2/3,2), size = 0.2) + geom_vline(xintercept = log(1.5,2), size = 0.2) + geom_vline(xintercept = log(2,2), size = 0.2) + labs(title = tbl$gene, y = "Contrasts", x = bquote(~Log[2]~'(Fold Change)')) + geom_errorbarh(aes(x = LogFC, xmin = CI_L, xmax = CI_R), height = .1) + geom_point() + scale_color_manual(c('red','orange','black')) + scale_shape_manual(c(19,15,4))+ coord_cartesian(xlim = c(min(-2,minFC),max(2,maxFC))) + theme(axis.text.y = element_blank(), strip.text.y = element_text(angle = 180), #panel.grid.major = element_blank(), #panel.grid.minor = element_blank(), axis.line.y = element_blank(), axis.line.x = element_blank(), #panel.background = element_rect(fill = 'white', colour = 'white'), #panel.grid = element_blank(), panel.spacing.y = unit(0.5,'lines'), axis.ticks.y = element_blank()) + facet_grid(Study_ID+ylab~ ., scales = 'free', space = 'free', switch = 'both')p 解决方案 Add a column of the factor you wantlibrary(dplyr)tbl <- tbl %>% mutate(colourgroup = case_when( adj_P_Value <= 0.01 ~ 1, adj_P_Value > 0.01 & adj_P_Value < 0.05 ~ 2, adj_P_Value >= 0.05 ~ 3 ))Then change aes(x = LogFC, y = Probe_ID, group = Study_ID)To aes(x = LogFC, y = Probe_ID, colour = factor(colourgroup), shape = factor(colourgroup))And scale_color_manual(values=c("red","orange","black")) +scale_shape_manual(values=c(1,2,3))MINIMAL EXAMPLEThis minimal ggplot command worked for me. Note I switched the x and y values on purpose, and red and orange can be difficult to distinguishggplot(df2, aes(x = Probe_ID, y=LogFC, colour=factor(colourgroup), shape=factor(colourgroup))) + geom_point() + scale_color_manual(values=c("red","orange","black")) + scale_shape_manual(values=c(1,2,3)) 这篇关于ggplot2:如何定制点的颜色和形状?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持! 上岸,阿里云!
06-13 06:28