我正在使用speedglm来估计一些数据的逻辑回归模型。我创建了一个可重现的示例,该示例产生的错误与使用原始数据时发生的错误相同。

library(speedglm)
n <- 10000
dtf <- data.frame( y = sample(c(0,1), n, 1),
                  x1 = as.factor(sample(c("a","b"), n, 1)),
                  x2 = rnorm(n, 30, 10))
m <- speedglm(y ~ x1 + x2, dtf, family=binomial())
summary(m)

输出如下:
Generalized Linear Model of class 'speedglm':

Call:  speedglm(formula = y ~ x1 + x2, data = dtf, family = binomial())

Coefficients:
 ------------------------------------------------------------------
Error in data.frame(..., check.names = FALSE) :
  arguments imply differing number of rows: 3, 0

我已经通过执行summary.speedglm检查了getS3method("summary", "speedglm")的源代码,并找到了产生错误的代码行,但这无助于解决问题。

PS:代表1500+的人应该创建speedglm标签。

更新
speedglm的维护者Marco Enea,要求发布summary.speedglmprint.summary.speedglm的以下临时修补程序。
summary.speedglm <- function (object, correlation = FALSE, ...)
{
  if (!inherits(object, "speedglm"))
    stop("object is not of class speedglm")
  z <- object
  var_res <- as.numeric(z$RSS/z$df)
  dispersion <- if (z$family$family %in% c("poisson", "binomial")) 1 else var_res
  if (z$method == "qr") {
    z$XTX <- z$XTX[z$ok, z$ok]
  }
  inv <- solve(z$XTX, tol = z$tol.solve)
  covmat <- diag(inv)
  se_coef <- rep(NA, length(z$coefficients))
  se_coef[z$ok] <- sqrt(dispersion * covmat)
  if (z$family$family %in% c("binomial", "poisson")) {
    z1 <- z$coefficients/se_coef
    p <- 2 * pnorm(abs(z1), lower.tail = FALSE)
  } else {
    t1 <- z$coefficients/se_coef
    p <- 2 * pt(abs(t1), df = z$df, lower.tail = FALSE)
  }
  ip <- !is.na(p)
  p[ip] <- as.numeric(format(p[ip], digits = 3))
  dn <- c("Estimate", "Std. Error")
  if (z$family$family %in% c("binomial", "poisson")) {
    format.coef <- if (any(na.omit(abs(z$coef)) < 1e-04))
      format(z$coefficients, scientific = TRUE, digits = 4) else
        round(z$coefficients, digits = 7)
    format.se <- if (any(na.omit(se_coef) < 1e-04))
      format(se_coef, scientific = TRUE, digits = 4) else round(se_coef, digits = 7)
    format.pv <- if (any(na.omit(p) < 1e-04))
      format(p, scientific = TRUE, digits = 4) else round(p, digits = 4)
    param <- data.frame(format.coef, format.se, round(z1,
                                                      digits = 4), format.pv)
    dimnames(param) <- list(names(z$coefficients), c(dn,
                                                     "z value", "Pr(>|z|)"))
  } else {
    format.coef <- if (any(abs(na.omit(z$coefficients)) <
                             1e-04))
      format(z$coefficients, scientific = TRUE, digits = 4) else
        round(z$coefficients, digits = 7)
    format.se <- if (any(na.omit(se_coef) < 1e-04))
      format(se_coef, scientific = TRUE, digits = 4) else
        round(se_coef, digits = 7)
    format.pv <- if (any(na.omit(p) < 1e-04))
      format(p, scientific = TRUE, digits = 4) else round(p, digits = 4)
    param <- data.frame(format.coef, format.se, round(t1,
                                                      digits = 4), format.pv)
    dimnames(param) <- list(names(z$coefficients), c(dn,
                                                     "t value", "Pr(>|t|)"))
  }
  eps <- 10 * .Machine$double.eps
  if (z$family$family == "binomial") {
    if (any(z$mu > 1 - eps) || any(z$mu < eps))
      warning("fitted probabilities numerically 0 or 1 occurred")
  }
  if (z$family$family == "poisson") {
    if (any(z$mu < eps))
      warning("fitted rates numerically 0 occurred")
  }
  keep <- match(c("call", "terms", "family", "deviance", "aic",
                  "df", "nulldev", "nulldf", "iter", "tol", "n", "convergence",
                  "ngoodobs", "logLik", "RSS", "rank"), names(object),
                0)
  ans <- c(object[keep], list(coefficients = param, dispersion = dispersion,
                              correlation = correlation, cov.unscaled = inv, cov.scaled = inv *
                                var_res))
  if (correlation) {
    ans$correl <- (inv * var_res)/outer(na.omit(se_coef),
                                        na.omit(se_coef))
  }
  class(ans) <- "summary.speedglm"
  return(ans)
}

print.summary.speedglm <- function (x, digits = max(3, getOption("digits") - 3), ...)
{
  cat("Generalized Linear Model of class 'speedglm':\n")
  if (!is.null(x$call))
    cat("\nCall: ", deparse(x$call), "\n\n")
  if (length(x$coef)) {
    cat("Coefficients:\n")
    cat(" ------------------------------------------------------------------",
        "\n")
    sig <- function(z){
      if (!is.na(z)){
        if (z < 0.001)
          "***"
        else if (z < 0.01)
          "** "
        else if (z < 0.05)
          "*  "
        else if (z < 0.1)
          ".  "
        else "   "
      } else "   "
    }
    options(warn=-1)
    sig.1 <- sapply(as.numeric(as.character(x$coefficients[,4])),
                    sig)
    options(warn=0)
    est.1 <- cbind(format(x$coefficients, digits = digits),
                   sig.1)
    colnames(est.1)[ncol(est.1)] <- ""
    print(est.1)
    cat("\n")
    cat("-------------------------------------------------------------------",
        "\n")
    cat("Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1",
        "\n")
    cat("\n")
  }
  else cat("No coefficients\n")
  cat("---\n")
  cat("null df: ", x$nulldf, "; null deviance: ", round(x$nulldev,
                                                        digits = 2), ";\n", "residuals df: ", x$df, "; residuals deviance: ",
      round(x$deviance, digits = 2), ";\n", "# obs.: ", x$n,
      "; # non-zero weighted obs.: ", x$ngoodobs, ";\n", "AIC: ",
      x$aic, "; log Likelihood: ", x$logLik, ";\n", "RSS: ",
      round(x$RSS, digits = 1), "; dispersion: ", x$dispersion,
      "; iterations: ", x$iter, ";\n", "rank: ", round(x$rank,
                                                       digits = 1), "; max tolerance: ", format(x$tol, scientific = TRUE,
                                                                                                digits = 3), "; convergence: ", x$convergence, ".\n",
      sep = "")
  invisible(x)
  if (x$correlation) {
    cat("---\n")
    cat("Correlation of Coefficients:\n")
    x$correl[upper.tri(x$correl, diag = TRUE)] <- NA
    print(x$correl[-1, -nrow(x$correl)], na.print = "", digits = 2)
  }
}

根据42'的建议,我还要添加以下内容:
environment(summary.speedglm) <- environment(speedglm)
environment(print.summary.speedglm) <- environment(speedglm)

最佳答案

print.summary.speedglm函数中有一个小错误。如果您更改此行:

sig.1 <- cbind(sapply(as.numeric(as.character(x$coefficients$"Pr(>|t|)")), sig))

到这行:
 sig.1 <- cbind(sapply(as.numeric(as.character(x$coefficients$"Pr(>|z|)")), sig))

并运行:
environment(print.summary.speedglm) <- environment(speedglm)

您将不再看到错误消息。

报告错误的正确方法是与维护人员联系(我会给他发送电子邮件):
maintainer('speedglm')
[1] "Marco Enea <[email protected]>"

关于r - R-使用speedglm包中的summary()时出错,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/34887295/

10-12 22:07