我正在尝试在ggplot中绘制roc曲线,我知道ggplot需要“ FPR”和“ TPR”,但是我可以从“性能”功能或“预测”功能获得该值。

我需要在一个图中组合两个分类器的roc曲线。

这是我要使用的代码:

ggplot(df,aes(FPR,TPR,color=classifier))+geom_line(size = 2, alpha = 0.7)+
      labs(title= "ROC curve",
           x = "False Positive Rate (1-Specificity)",
           y = "True Positive Rate (Sensitivity)")


这是数据:

> dput(xgboost_prediction[1:100,])
structure(list(no = c(0.990442335605621, 0.534177422523499, 0.991996705532074,
0.84391313791275, 0.965542376041412, 0.691059589385986, 0.718063056468964,
0.995023250579834, 0.956590950489044, 0.94465434551239, 0.775800347328186,
0.725375294685364, 0.8955979347229, 0.519426822662354, 0.262101024389267,
0.920495390892029, 0.961367130279541, 0.529997289180756, 0.753180921077728,
0.374252378940582, 0.649519801139832, 0.873721957206726, 0.938673436641693,
0.98504501581192, 0.990583181381226, 0.991510570049286, 0.981655895709991,
0.903484642505646, 0.965107679367065, 0.953721821308136, 0.554582417011261,
0.919193744659424, 0.965399146080017, 0.574111104011536, 0.898433864116669,
0.901707828044891, 0.951646208763123, 0.637083053588867, 0.947289109230042,
0.920474767684937, 0.995938301086426, 0.973612129688263, 0.964727997779846,
0.884541928768158, 0.95257955789566, 0.412161201238632, 0.990020036697388,
0.976162195205688, 0.939770936965942, 0.968810081481934, 0.983040153980255,
0.89262580871582, 0.970016121864319, 0.923628032207489, 0.886549472808838,
0.522226393222809, 0.960890293121338, 0.975551664829254, 0.710578739643097,
0.973441541194916, 0.388043284416199, 0.944134533405304, 0.962944269180298,
0.958814978599548, 0.948761761188507, 0.973393976688385, 0.971923232078552,
0.883057355880737, 0.974324226379395, 0.886299729347229, 0.464900106191635,
0.746802806854248, 0.735462188720703, 0.663913428783417, 0.965535283088684,
0.990276217460632, 0.201064333319664, 0.98608660697937, 0.870771527290344,
0.99166601896286, 0.974869549274445, 0.990644574165344, 0.991726994514465,
0.827968239784241, 0.988746464252472, 0.783728361129761, 0.221792578697205,
0.98324054479599, 0.617589056491852, 0.986640393733978, 0.932099580764771,
0.967139899730682, 0.956782400608063, 0.90983521938324, 0.990782082080841,
0.995104193687439, 0.809846758842468, 0.698821008205414, 0.996101856231689,
0.528671264648438), yes = c(0.00955766439437866, 0.465822577476501,
0.00800329446792603, 0.15608686208725, 0.0344576239585876, 0.308940410614014,
0.281936943531036, 0.00497674942016602, 0.0434090495109558, 0.0553456544876099,
0.224199652671814, 0.274624705314636, 0.1044020652771, 0.480573177337646,
0.737898975610733, 0.0795046091079712, 0.038632869720459, 0.470002710819244,
0.246819078922272, 0.625747621059418, 0.350480198860168, 0.126278042793274,
0.0613265633583069, 0.0149549841880798, 0.00941681861877441,
0.00848942995071411, 0.0183441042900085, 0.0965153574943542,
0.0348923206329346, 0.046278178691864, 0.445417582988739, 0.0808062553405762,
0.0346008539199829, 0.425888895988464, 0.101566135883331, 0.0982921719551086,
0.0483537912368774, 0.362916946411133, 0.0527108907699585, 0.0795252323150635,
0.00406169891357422, 0.0263878703117371, 0.0352720022201538,
0.115458071231842, 0.0474204421043396, 0.587838798761368, 0.0099799633026123,
0.0238378047943115, 0.0602290630340576, 0.0311899185180664, 0.0169598460197449,
0.10737419128418, 0.0299838781356812, 0.076371967792511, 0.113450527191162,
0.477773606777191, 0.0391097068786621, 0.0244483351707458, 0.289421260356903,
0.0265584588050842, 0.611956715583801, 0.055865466594696, 0.0370557308197021,
0.0411850214004517, 0.0512382388114929, 0.026606023311615, 0.0280767679214478,
0.116942644119263, 0.0256757736206055, 0.113700270652771, 0.535099893808365,
0.253197193145752, 0.264537811279297, 0.336086571216583, 0.0344647169113159,
0.00972378253936768, 0.798935666680336, 0.0139133930206299, 0.129228472709656,
0.00833398103713989, 0.0251304507255554, 0.00935542583465576,
0.00827300548553467, 0.172031760215759, 0.0112535357475281, 0.216271638870239,
0.778207421302795, 0.01675945520401, 0.382410943508148, 0.0133596062660217,
0.0679004192352295, 0.0328601002693176, 0.0432175993919373, 0.0901647806167603,
0.00921791791915894, 0.00489580631256104, 0.190153241157532,
0.301178991794586, 0.00389814376831055, 0.471328735351562)), .Names = c("no",
"yes"), row.names = c(NA, 100L), class = "data.frame")

> dput(randomforest_prediction[1:100,])
structure(list(no = c(0.694, 0.606, 0.778, 0.498, 0.748, 0.604,
0.446, 0.586, 0.686, 0.748, 0.708, 0.574, 0.662, 0.614, 0.65,
0.616, 0.618, 0.664, 0.562, 0.496, 0.628, 0.77, 0.652, 0.738,
0.674, 0.73, 0.684, 0.7, 0.678, 0.672, 0.616, 0.71, 0.93, 0.774,
0.668, 0.682, 0.752, 0.926, 0.776, 0.796, 0.85, 0.728, 0.57,
0.648, 0.804, 0.64, 0.766, 0.722, 0.73, 0.706, 0.748, 0.608,
0.684, 0.708, 0.746, 0.554, 0.732, 0.816, 0.888, 0.656, 0.808,
0.908, 0.69, 0.722, 0.764, 0.746, 0.728, 0.57, 0.95, 0.656, 0.066,
0.692, 0.794, 0.638, 0.63, 0.8, 0.572, 0.776, 0.776, 0.702, 0.848,
0.77, 0.864, 0.682, 0.784, 0.582, 0.028, 0.694, 0.642, 0.428,
0.636, 0.654, 0.798, 0.576, 0.674, 0.756, 0.606, 0.648, 0.676,
0.498), yes = c(0.306, 0.394, 0.222, 0.502, 0.252, 0.396, 0.554,
0.414, 0.314, 0.252, 0.292, 0.426, 0.338, 0.386, 0.35, 0.384,
0.382, 0.336, 0.438, 0.504, 0.372, 0.23, 0.348, 0.262, 0.326,
0.27, 0.316, 0.3, 0.322, 0.328, 0.384, 0.29, 0.07, 0.226, 0.332,
0.318, 0.248, 0.074, 0.224, 0.204, 0.15, 0.272, 0.43, 0.352,
0.196, 0.36, 0.234, 0.278, 0.27, 0.294, 0.252, 0.392, 0.316,
0.292, 0.254, 0.446, 0.268, 0.184, 0.112, 0.344, 0.192, 0.092,
0.31, 0.278, 0.236, 0.254, 0.272, 0.43, 0.05, 0.344, 0.934, 0.308,
0.206, 0.362, 0.37, 0.2, 0.428, 0.224, 0.224, 0.298, 0.152, 0.23,
0.136, 0.318, 0.216, 0.418, 0.972, 0.306, 0.358, 0.572, 0.364,
0.346, 0.202, 0.424, 0.326, 0.244, 0.394, 0.352, 0.324, 0.502
)), .Names = c("no", "yes"), row.names = c("1", "2", "3", "4",
"7", "8", "9", "10", "13", "14", "15", "16", "18", "19", "22",
"23", "24", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "38", "39", "40", "41", "45", "46", "47", "48", "50", "51",
"55", "56", "57", "58", "60", "61", "62", "64", "65", "66", "68",
"70", "71", "73", "75", "76", "77", "78", "79", "80", "82", "83",
"84", "85", "86", "87", "88", "89", "90", "91", "92", "93", "95",
"96", "99", "100", "101", "102", "103", "105", "106", "107",
"108", "109", "110", "112", "114", "115", "116", "118", "120",
"123", "124", "125", "126", "127", "128", "129", "130", "131",
"132", "133", "135"), class = "data.frame")

> dput(test_set$y[1:100])
structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("no", "yes"), class = "factor")


有人可以帮忙吗?我已经尝试了几个小时!

最佳答案

以下代码可以解决您的问题。
希望它能对您有所帮助。

pred.xgb <- prediction(xgboost_prediction[,2], test_set$y)
perf.xgb <- performance(pred.xgb, "fpr", "tpr")
tpr.xgb <- [email protected][[1]]
fpr.xgb <- [email protected][[1]]

pred.rf <- prediction(randomforest_prediction[,2], test_set$y)
perf.rf <- performance(pred.rf, "fpr", "tpr")
tpr.rf <- [email protected][[1]]
fpr.rf <- [email protected][[1]]

library(ggplot2)
df <- data.frame(tpr=c(tpr.xgb,tpr.rf), fpr=c(fpr.xgb, fpr.rf),
        Method=c(rep("XGB",each=length(tpr.xgb)),
                 rep("RF",each=length(tpr.rf))) )
ggplot(aes(x=fpr, y=tpr, color=Method), data=df) + geom_line()


r - 在R中的ggplot中访问roc曲线的TPR和FPR-LMLPHP

可以使用以下公式计算AUC:

performance(pred.xgb, "auc")@y.values[[1]]
# 0.6766304

performance(pred.rf, "auc")@y.values[[1]]
# 0.6650815

10-04 23:22