我想为我的数据库调用(使用C#和用于访问数据库的Microsoft技术)编写一些包装器代码,并在“ transient ”异常时自动重试。暂时的,我的意思是说,很有可能最终解决(相对于永远不会起作用的逻辑错误)。我能想到的例子包括:

  • 死锁
  • 连接超时
  • 命令超时

  • 我曾计划使用SqlException的错误号来发现这些错误。因此,例如:
    List<RunStoredProcedureResultType> resultSet = null;
    int limit = 3;
    for (int i = 0; i < limit; ++i)
    {
        bool isLast = i == limit - 1;
        try
        {
            using (var db = /* ... */)
            {
                resultSet = db.RunStoredProcedure(param1, param2).ToList();
            }
            //if it gets here it was successful
            break;
        }
        catch (SqlException ex)
        {
            if (isLast)
            {
                //3 transient errors in a row. So just kill it
                throw;
            }
            switch (ex.Number)
            {
                case 1205: //deadlock
                case -2:   //timeout (command timeout?)
                case 11:   //timeout (connection timeout?)
                    // do nothing - continue the loop
                    break;
                default:
                    //a non-transient error. Just throw the exception on
                    throw;
            }
        }
        Thread.Sleep(TimeSpan.FromSeconds(1)); //some kind of delay - might not use Sleep
    }
    return resultSet;
    

    (对我来说,如果有任何错误,请原谅,我只是即时编写的。我也意识到我可以很好地将其包装起来...)

    因此,关键问题是:我应该将哪些数字视为“ transient ”(我意识到我认为 transient 的数字可能与其他人认为 transient 的数字不同)。我在这里找到了一个不错的 list :

    https://msdn.microsoft.com/en-us/library/cc645603.aspx

    但是它的庞大和注释非常有用。 是否有其他人建立过类似的 list ?

    更新

    最后,如果错误是已知的“非暂时性错误”列表中的一个,则我们选择了“错误列表”,通常是程序员错误。我列出了我们用作答案的数字列表。

    最佳答案

    sql Azure中有一个[SqlDatabaseTransientErrorDetectionStrategy.cs]类,用于瞬时故障处理。它涵盖了几乎所有类型的可以视为 transient 的异常代码。这也是Retry strategy的完整实现。

    在此处添加摘要以供将来引用:

    /// <summary>
    /// Error codes reported by the DBNETLIB module.
    /// </summary>
    private enum ProcessNetLibErrorCode
    {
        ZeroBytes = -3,
    
        Timeout = -2,
        /* Timeout expired. The timeout period elapsed prior to completion of the operation or the server is not responding. */
    
        Unknown = -1,
    
        InsufficientMemory = 1,
    
        AccessDenied = 2,
    
        ConnectionBusy = 3,
    
        ConnectionBroken = 4,
    
        ConnectionLimit = 5,
    
        ServerNotFound = 6,
    
        NetworkNotFound = 7,
    
        InsufficientResources = 8,
    
        NetworkBusy = 9,
    
        NetworkAccessDenied = 10,
    
        GeneralError = 11,
    
        IncorrectMode = 12,
    
        NameNotFound = 13,
    
        InvalidConnection = 14,
    
        ReadWriteError = 15,
    
        TooManyHandles = 16,
    
        ServerError = 17,
    
        SSLError = 18,
    
        EncryptionError = 19,
    
        EncryptionNotSupported = 20
    }
    

    进一步检查情况,以检查错误代码是否在sql异常中返回:
    switch (err.Number)
    {
        // SQL Error Code: 40501
        // The service is currently busy. Retry the request after 10 seconds. Code: (reason code to be decoded).
        case ThrottlingCondition.ThrottlingErrorNumber:
            // Decode the reason code from the error message to determine the grounds for throttling.
            var condition = ThrottlingCondition.FromError(err);
    
            // Attach the decoded values as additional attributes to the original SQL exception.
            sqlException.Data[condition.ThrottlingMode.GetType().Name] =
                condition.ThrottlingMode.ToString();
            sqlException.Data[condition.GetType().Name] = condition;
    
            return true;
    
        // SQL Error Code: 10928
        // Resource ID: %d. The %s limit for the database is %d and has been reached.
        case 10928:
        // SQL Error Code: 10929
        // Resource ID: %d. The %s minimum guarantee is %d, maximum limit is %d and the current usage for the database is %d.
        // However, the server is currently too busy to support requests greater than %d for this database.
        case 10929:
        // SQL Error Code: 10053
        // A transport-level error has occurred when receiving results from the server.
        // An established connection was aborted by the software in your host machine.
        case 10053:
        // SQL Error Code: 10054
        // A transport-level error has occurred when sending the request to the server.
        // (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
        case 10054:
        // SQL Error Code: 10060
        // A network-related or instance-specific error occurred while establishing a connection to SQL Server.
        // The server was not found or was not accessible. Verify that the instance name is correct and that SQL Server
        // is configured to allow remote connections. (provider: TCP Provider, error: 0 - A connection attempt failed
        // because the connected party did not properly respond after a period of time, or established connection failed
        // because connected host has failed to respond.)"}
        case 10060:
        // SQL Error Code: 40197
        // The service has encountered an error processing your request. Please try again.
        case 40197:
        // SQL Error Code: 40540
        // The service has encountered an error processing your request. Please try again.
        case 40540:
        // SQL Error Code: 40613
        // Database XXXX on server YYYY is not currently available. Please retry the connection later. If the problem persists, contact customer
        // support, and provide them the session tracing ID of ZZZZZ.
        case 40613:
        // SQL Error Code: 40143
        // The service has encountered an error processing your request. Please try again.
        case 40143:
        // SQL Error Code: 233
        // The client was unable to establish a connection because of an error during connection initialization process before login.
        // Possible causes include the following: the client tried to connect to an unsupported version of SQL Server; the server was too busy
        // to accept new connections; or there was a resource limitation (insufficient memory or maximum allowed connections) on the server.
        // (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
        case 233:
        // SQL Error Code: 64
        // A connection was successfully established with the server, but then an error occurred during the login process.
        // (provider: TCP Provider, error: 0 - The specified network name is no longer available.)
        case 64:
        // DBNETLIB Error Code: 20
        // The instance of SQL Server you attempted to connect to does not support encryption.
        case (int)ProcessNetLibErrorCode.EncryptionNotSupported:
            return true;
    }
    

    参见完整的source here

    关于c# - SQL Server transient 异常号,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/35630829/

    10-12 13:49