# Calculate distance using daisy's gower
daisyDist <- daisy(rbind(df,cent),metric="gower")
daisyDist <- as.matrix(daisyDist)
daisyDist <- daisyDist[(nrow(df)+1):nrow(daisyDist),1:nrow(df)] #only look at part where rows from df are compared to (rows of) cent
# Calculate distance using dist.gower
gowerDist <- gower.dist(cent,df)
df <- structure(list(searchType = structure(c(NA, 1L, 1L, 1L, 1L), .Label = c("1", "2"), class = "factor"), roomMin = structure(c(4L, 1L, 1L, 6L, 6L), .Label = c("10", "100", "150", "20", "255", "30", "40", "50", "60", "70", "Missing[NoInput]"), class = "factor"), roomMax = structure(c(8L, 8L, NA, 10L, 9L), .Label = c("10", "100", "120", "150", "160", "20", "255", "30", "40", "50", "60", "70", "80", "90", "Missing[NoInput]"), class = "factor"), priceMin = c(NA, 73, 60, 29, 11), priceMax = c(35, 11, 1, 62, 23), sizeMin = structure(c(5L, 5L, 5L, 6L, 6L), .Label = c("100", "125", "150", "250", "50", "75", "Missing[NoInput]"), class = "factor"), sizeMax = structure(c(1L, 6L, 5L, 3L, 1L), .Label = c("100", "125", "150", "250", "50", "75", "Missing[NoInput]"), class = "factor"), longitude = c(6.6306, 7.47195, 8.5562, NA, 8.569), latitude = c(46.52425, 46.9512, 47.37515, NA, 47.3929), specificSearch = structure(c(1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"), objectType = structure(c(NA, 2L, 2L, 2L, 2L), .Label = c("1", "2", "3", "Missing[]"), class = "factor")), .Names = c("searchType", "roomMin", "roomMax", "priceMin", "priceMax", "sizeMin", "sizeMax", "longitude", "latitude", "specificSearch", "objectType"), row.names = c(112457L, 94601L, 78273L, 59172L, 117425L), class = "data.frame")
cent <- structure(list(searchType = structure(c(1L, 1L, 1L), .Label = c("1", "2"), class = "factor"), roomMin = structure(c(1L, 4L, 4L), .Label = c("10", "100", "150", "20", "255", "30", "40", "50", "60", "70", "Missing[NoInput]"), class = "factor"), roomMax = structure(c(6L, 9L, 8L), .Label = c("10", "100", "120", "150", "160", "20", "255", "30", "40", "50", "60", "70", "80", "90", "Missing[NoInput]"), class = "factor"), priceMin = c(60, 33, 73), priceMax = c(103, 46, 23), sizeMin = structure(c(1L, 5L, 5L), .Label = c("100", "125", "150", "250", "50", "75", "Missing[NoInput]"), class = "factor"), sizeMax = structure(c(1L, 2L, 1L), .Label = c("100", "125", "150", "250", "50", "75", "Missing[NoInput]"), class = "factor"), longitude = c(8.3015, 7.42765, 7.6104), latitude = c(47.05485, 46.9469, 46.75125), specificSearch = structure(c(1L, 1L, 1L), .Label = c("0", "1"), class = "factor"), objectType = structure(c(2L, 2L, 2L), .Label = c("1", "2", "3", "Missing[]"), class = "factor")), .Names = c("searchType", "roomMin", "roomMax", "priceMin", "priceMax", "sizeMin", "sizeMax", "longitude", "latitude", "specificSearch", "objectType"), row.names = c(60656L, 66897L, 130650L), class = "data.frame")
df1 <- rbind(df,cent)
searchType roomMin roomMax priceMin priceMax sizeMin sizeMax longitude latitude specificSearch objectType
112457 <NA> 20 30 NA 35 50 100 6.63060 46.52425 0 <NA>
94601 1 10 30 73 11 50 75 7.47195 46.95120 0 2
78273 1 10 <NA> 60 1 50 50 8.55620 47.37515 0 2
59172 1 30 50 29 62 75 150 NA NA 0 2
117425 1 30 40 11 23 75 100 8.56900 47.39290 0 2
60656 1 10 20 60 103 100 100 8.30150 47.05485 0 2
# only use the numeric column priceMin (4th column) to compute the distance
# [1] "numeric"
df2 <- df1[4]
# daisy output
112457 94601 78273 59172 117425 60656 66897 130650
112457 0 NA NA NA NA NA NA NA
94601 NA 0.0000000 0.2096774 0.70967742 1.0000000 0.2096774 0.64516129 0.0000000
78273 NA 0.2096774 0.0000000 0.50000000 0.7903226 0.0000000 0.43548387 0.2096774
59172 NA 0.7096774 0.5000000 0.00000000 0.2903226 0.5000000 0.06451613 0.7096774
117425 NA 1.0000000 0.7903226 0.29032258 0.0000000 0.7903226 0.35483871 1.0000000
60656 NA 0.2096774 0.0000000 0.50000000 0.7903226 0.0000000 0.43548387 0.2096774
66897 NA 0.6451613 0.4354839 0.06451613 0.3548387 0.4354839 0.00000000 0.6451613
130650 NA 0.0000000 0.2096774 0.70967742 1.0000000 0.2096774 0.64516129 0.0000000
# gower.dist output
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] NaN NaN NaN NaN NaN NaN NaN NaN
[2,] NaN 0 0 0 0 0 0 0
[3,] NaN 0 0 0 0 0 0 0
[4,] NaN 0 0 0 0 0 0 0
[5,] NaN 0 0 0 0 0 0 0
[6,] NaN 0 0 0 0 0 0 0
[7,] NaN 0 0 0 0 0 0 0
[8,] NaN 0 0 0 0 0 0 0
gower.dist(df2, rngs=max(df2, na.rm=TRUE) - min(df2, na.rm=TRUE))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] NaN NaN NaN NaN NaN NaN NaN NaN
[2,] NaN 0.0000000 0.2096774 0.70967742 1.0000000 0.2096774 0.64516129 0.0000000
[3,] NaN 0.2096774 0.0000000 0.50000000 0.7903226 0.0000000 0.43548387 0.2096774
[4,] NaN 0.7096774 0.5000000 0.00000000 0.2903226 0.5000000 0.06451613 0.7096774
[5,] NaN 1.0000000 0.7903226 0.29032258 0.0000000 0.7903226 0.35483871 1.0000000
[6,] NaN 0.2096774 0.0000000 0.50000000 0.7903226 0.0000000 0.43548387 0.2096774
[7,] NaN 0.6451613 0.4354839 0.06451613 0.3548387 0.4354839 0.00000000 0.6451613
[8,] NaN 0.0000000 0.2096774 0.70967742 1.0000000 0.2096774 0.64516129 0.0000000
df1 <- rbind(df,cent)
# compute the ranges of the numeric variables correctly
cols <- which(sapply(df1, is.numeric))
rngs <- rep(1, ncol(df1))
rngs[cols] <- sapply(df1[cols], function(x) max(x, na.rm=TRUE) - min(x, na.rm=TRUE))
daisyDist <- as.matrix(daisy(df1,metric="gower"))
gowerDist <- gower.dist(df1)
112457 94601 78273 59172 117425 60656 66897 130650
112457 0.0000000 0.3951059 0.6151851 0.7107843 0.6397059 0.6424374 0.3756990 0.1105551
94601 0.3951059 0.0000000 0.2355126 0.5788530 0.5629176 0.4235379 0.3651002 0.2199324
78273 0.6151851 0.2355126 0.0000000 0.5122549 0.4033046 0.3500130 0.3951874 0.3631533
59172 0.7107843 0.5788530 0.5122549 0.0000000 0.2969639 0.5446623 0.4690421 0.5657812
117425 0.6397059 0.5629176 0.4033046 0.2969639 0.0000000 0.4638003 0.4256891 0.4757460
60656 0.6424374 0.4235379 0.3500130 0.5446623 0.4638003 0.0000000 0.5063082 0.4272755
66897 0.3756990 0.3651002 0.3951874 0.4690421 0.4256891 0.5063082 0.0000000 0.2900150
130650 0.1105551 0.2199324 0.3631533 0.5657812 0.4757460 0.4272755 0.2900150 0.0000000
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 0.0000000 0.3951059 0.6151851 0.7107843 0.6397059 0.6424374 0.3756990 0.1105551
[2,] 0.3951059 0.0000000 0.2355126 0.5788530 0.5629176 0.4235379 0.3651002 0.2199324
[3,] 0.6151851 0.2355126 0.0000000 0.5122549 0.4033046 0.3500130 0.3951874 0.3631533
[4,] 0.7107843 0.5788530 0.5122549 0.0000000 0.2969639 0.5446623 0.4690421 0.5657812
[5,] 0.6397059 0.5629176 0.4033046 0.2969639 0.0000000 0.4638003 0.4256891 0.4757460
[6,] 0.6424374 0.4235379 0.3500130 0.5446623 0.4638003 0.0000000 0.5063082 0.4272755
[7,] 0.3756990 0.3651002 0.3951874 0.4690421 0.4256891 0.5063082 0.0000000 0.2900150
[8,] 0.1105551 0.2199324 0.3631533 0.5657812 0.4757460 0.4272755 0.2900150 0.0000000
关于r - R-结果不同gower.dist和daisy(…,metric =“gower”),我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/40264815/