我有兴趣找到将 matrix 转换为长格式 data.frame 的最快方法。

我在这里比较了三种解决方案来实现这一点,但我想知道是否存在更快的方法,例如使用 data.table

这是我比较的三种方法的可重现代码:

# Generate matrix -------------------------------------------------------------
set.seed(1)
ex <- matrix(data = round(runif(100000), 1), nrow = 1000, ncol = 100)
rownames(ex) <- paste0("row", 1:nrow(ex))
colnames(ex) <- paste0("col", 1:ncol(ex))
ex[1:5, 1:5]

     col1 col2 col3 col4 col5
row1  0.3  0.5  0.9  0.8  0.2
row2  0.4  0.7  1.0  0.5  0.5
row3  0.6  0.4  0.9  0.2  0.0
row4  0.9  1.0  0.4  0.4  0.5
row5  0.2  0.1  0.2  0.8  0.9

# table solution --------------------------------------------------------------
df1 <- as.data.frame(as.table(ex))

# reshape solution ------------------------------------------------------------
df2 <- reshape2::melt(ex)

# dplyr solution --------------------------------------------------------------
library(dplyr)
library(tidyr)
df3 <- ex %>%
  as.data.frame() %>%
  tibble::rownames_to_column("Var1") %>%
  gather("Var2", "value", -Var1)

# check for equality ----------------------------------------------------------
colnames(df1)[colnames(df1) == "Freq"] <- "value"
head(df1)
  Var1 Var2 value
1 row1 col1   0.3
2 row2 col1   0.4
3 row3 col1   0.6
4 row4 col1   0.9
5 row5 col1   0.2
6 row6 col1   0.9

df1$Var1 <- as.character(df1$Var1)
df1$Var2 <- as.character(df1$Var2)
df2$Var1 <- as.character(df2$Var1)
df2$Var2 <- as.character(df2$Var2)

identical(df1, df2); identical(df1, df3)
TRUE

# Microbenchmark --------------------------------------------------------------
library(microbenchmark)
comp <- microbenchmark(
  table = {
    df1 <- as.data.frame(as.table(ex))
  },

  reshape = {
    df2 <- reshape2::melt(ex)
  },

  dplyr = {
    df3 <- ex %>%
      as.data.frame() %>%
      tibble::rownames_to_column("Var1") %>%
      gather("Var2", "value", -Var1)
  }
)

library(ggplot2)
autoplot(comp)

r - R中矩阵到长格式数据帧的最快转换-LMLPHP
reshape 方法是迄今为止最快的。

最佳答案

这么多选择:

library(dplyr)
library(tidyr)
library(data.table)

library(microbenchmark)
library(ggplot2)

set.seed(1)
ex <- matrix(data = round(runif(100000), 1), nrow = 1000, ncol = 100)
rownames(ex) <- paste0("row", 1:nrow(ex))
colnames(ex) <- paste0("col", 1:ncol(ex))


comp <- microbenchmark(
    table = {
        df1 <- as.data.frame(as.table(ex))
    },

    reshape = {
        df2 <- reshape2::melt(ex)
    },

    dplyr = {
        df3 <- ex %>%
            as.data.frame() %>%
            tibble::rownames_to_column("Var1") %>%
            gather("Var2", "value", -Var1)
    },

    data.table = {
        dt = melt(data.table(ex, keep.rownames = TRUE) , id.vars = c("rn"))
    },

    data.table2 = {
        melt(as.data.table(ex)[, rn := seq_len(.N)], id.var = 'rn')
    },

    data.table3 = {
        data.table(Var1 = rownames(ex), Var2 = colnames(ex), value = c(ex))
    }

)

autoplot(comp)

r - R中矩阵到长格式数据帧的最快转换-LMLPHP

关于r - R中矩阵到长格式数据帧的最快转换,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/60101919/

10-12 23:25