我需要估计先决条件的数据库大小,因此我试图了解SQL Server如何在下面的示例中存储数据。

在我的SQL Server数据库中,我有一个名为InfoComp的表,该表包含4行:

IdInfoComp : Integer Not Null (PK)
IdDefinition : Integer Not Null (FK)
IdObject : Integer Not Null (FK)
Value : NVarChar(Max) Not Null


我想估计表的大小。在实际使用中,我可以使用以下SQL查询获取存储在Value中的平均长度:

SELECT AVG(Value) FROM InfoComp
Result : 8


因此,我的计算似乎是(以字节为单位):

(Size(IdInfoComp) + Size(IdDefinition) + Size(IdObject) + AVG Size(Value)) * Rows count

( 4 + 4 + 4 + ((8 * 2) + 2)) * NbRows


但是,当我尝试在实际情况下应用此计算时,这是错误的。就我而言,我有3,250,273行,因此结果应为92 MB,但是MS SQL报告说:


(数据)147888 KB(索引)113072 KB和(保留)261160 KB。


我哪里错了?

最佳答案

试试这个...让我靠近。我用msdn文章来创建。您可以设置行数。这将处理数据库中包含索引的每个表。暂不做列存储,也不会处理关系。它将行数估计值应用于每个表。

/*Do NOT change this section*/
GO
CREATE TABLE RowSizes (TypeName VARCHAR(30), TableName VARCHAR(255), IndexName VARCHAR(255), Null_Bitmap SMALLINT, VariableFieldSize BIGINT, FixedFieldSize BIGINT, Row_Size BIGINT, LOBFieldSize BIGINT);
CREATE TABLE LeafSizes (TypeName VARCHAR(30), TableName VARCHAR(255), IndexName VARCHAR(255), Row_Size BIGINT, Rows_Per_Page BIGINT, Free_Rows_Per_Page BIGINT, Non_Leaf_Levels BIGINT, Num_Leaf_Pages BIGINT, Num_Index_Pages BIGINT, Leaf_space_used_bytes BIGINT);
GO
CREATE PROCEDURE dbo.cp_CalcIndexPages
    @IndexType VARCHAR(20)
AS
BEGIN
    DECLARE @IndexName VARCHAR(255)
        , @TableName varchar(255)
        , @Non_Leaf_Levels bigint = 127
        , @Rows_Per_Page bigint = 476
        , @Num_Leaf_Pages bigint =10000;

    WHILE EXISTS(SELECT TOP 1 1 FROM dbo.LeafSizes WHERE TypeName = @IndexType AND Num_Index_Pages = 0)-- AND IndexName = 'PK_ProcessingMessages')
    BEGIN
        SELECT TOP 1 @IndexName = IndexName
            , @TableName = TableName
            , @Non_Leaf_Levels = Non_Leaf_Levels
            , @Rows_Per_Page = Rows_Per_Page
            , @Num_Leaf_Pages = Num_Leaf_Pages
        FROM dbo.LeafSizes
        WHERE TypeName = @IndexType
            AND Num_Index_Pages = 0;

        DECLARE @Counter INT = 1
            , @Num_Index_Pages INT = 0;

        WHILE @Counter <= @Non_Leaf_Levels
        BEGIN
            BEGIN TRY

            SELECT @Num_Index_Pages += ROUND(CASE WHEN @Num_Leaf_Pages/POWER(@Rows_Per_Page, @Counter) < CONVERT(FLOAT, 1) THEN 1 ELSE @Num_Leaf_Pages/POWER(@Rows_Per_Page, @Counter) END, 0)
            END TRY

            BEGIN CATCH
                SET @Num_Index_Pages += 1
            END CATCH

            SET @Counter += 1
        END

        IF @Num_Index_Pages = 0
            SET @Num_Index_Pages =  1;

        UPDATE dbo.LeafSizes
        SET Num_Index_Pages = @Num_Index_Pages
            , Leaf_space_used_bytes = 8192 * @Num_Index_Pages
        WHERE TableName = @TableName
            AND IndexName = @IndexName;

    END
END
GO
/*Do NOT change above here*/

--Set parameters here
DECLARE @NumRows INT = 1000000 --Number of rows for estimate
    ,@VarPercentFill money = .6; --Percentage of variable field space used to estimate.  1 will provide estimate as if all variable columns are 100% full.


/*Do not change*/
WITH cte_Tables AS (--Get Tables
    SELECT o.object_id, s.name+'.'+o.name AS ObjectName
    FROM sys.objects o
    INNER JOIN sys.schemas s ON o.schema_id = s.schema_id
    WHERE type = 'U'
), cte_TableData AS (--Calculate Field Sizes
    SELECT o.ObjectName AS TableName
        , SUM(CASE WHEN t.name IN ('int', 'bigint', 'tinyint', 'char', 'datetime', 'smallint', 'date') THEN 1 ELSE 0 END) AS FixedFields
        , SUM(CASE WHEN t.name IN ('int', 'bigint', 'tinyint', 'char', 'datetime', 'smallint', 'date') THEN c.max_length ELSE 0 END) AS FixedFieldSize
        , SUM(CASE WHEN t.name IN ('varchar') THEN 1 ELSE 0 END) AS VariableFields
        , SUM(CASE WHEN t.name IN ('varchar') THEN c.max_length ELSE 0 END)*@VarPercentFill AS VariableFieldSize
        , SUM(CASE WHEN t.name IN ('xml') THEN 1 ELSE 0 END) AS LOBFields
        , SUM(CASE WHEN t.name IN ('xml') THEN 10000 ELSE 0 END) AS LOBFieldSize
        , COUNT(1) AS TotalColumns
    FROM sys.columns c
    INNER JOIN cte_Tables o ON o.object_id = c.object_id
    INNER JOIN sys.types t ON c.system_type_id = t.system_type_id
    GROUP BY o.ObjectName
), cte_Indexes AS (--Get Indexes and size
    SELECT s.name+'.'+o.name AS TableName
        , ISNULL(i.name, '') AS IndexName
        , i.type_desc
        , i.index_id
        , SUM(CASE WHEN t.name IN ('tinyint','smallint', 'int', 'bigint', 'char', 'datetime', 'date') AND c.key_ordinal > 0 THEN 1 ELSE 0 END) AS FixedFields
        , SUM(CASE WHEN t.name IN ('tinyint','smallint', 'int', 'bigint', 'char', 'datetime', 'date') AND c.key_ordinal > 0 THEN tc.max_length ELSE 0 END) AS FixedFieldSize
        , SUM(CASE WHEN t.name IN ('varchar') AND c.key_ordinal > 0 THEN 1 ELSE 0 END) AS VariableFields
        , SUM(CASE WHEN t.name IN ('varchar') AND c.key_ordinal > 0 THEN tc.max_length ELSE 0 END)*@VarPercentFill AS VariableFieldSize
        , SUM(CASE WHEN t.name IN ('xml') AND c.key_ordinal > 0 THEN 1 ELSE 0 END) AS LOBFields
        , SUM(CASE WHEN t.name IN ('xml') AND c.key_ordinal > 0 THEN 10000 ELSE 0 END) AS LOBFieldSize
        , SUM(CASE WHEN t.name IN ('tinyint','smallint', 'int', 'bigint', 'char', 'datetime', 'date') AND c.is_included_column > 0 THEN 1 ELSE 0 END) AS FixedIncludes
        , SUM(CASE WHEN t.name IN ('tinyint','smallint', 'int', 'bigint', 'char', 'datetime', 'date') AND c.is_included_column > 0 THEN 1 ELSE 0 END) AS FixedIncludesSize
        , SUM(CASE WHEN t.name IN ('varchar') AND c.is_included_column > 0 THEN 1 ELSE 0 END)*@VarPercentFill AS VariableIncludes
        , SUM(CASE WHEN t.name IN ('varchar') AND c.is_included_column > 0 THEN tc.max_length ELSE 0 END) AS VariableIncludesSize
        , COUNT(1) AS TotalColumns
    FROM sys.indexes i
    INNER JOIN sys.columns tc ON i.object_id = tc.object_id
    INNER JOIN sys.index_columns c ON i.index_id = c.index_id
        AND c.column_id = tc.column_id
        AND c.object_id = i.object_id
    INNER JOIN sys.objects o ON o.object_id = i.object_id AND o.is_ms_shipped = 0
    INNER JOIN sys.schemas s ON o.schema_id = s.schema_id
    INNER JOIN sys.types t ON tc.system_type_id = t.system_type_id
    GROUP BY s.name+'.'+o.name, ISNULL(i.name, ''), i.type_desc, i.index_id
)
INSERT RowSizes
SELECT 'Table' AS TypeName
    , n.TableName
    , '' AS IndexName
    , 2 + ((n.FixedFields+n.VariableFields+7)/8) AS Null_Bitmap
    , 2 + (n.VariableFields * 2) + n.VariableFieldSize AS Variable_Data_Size
    , n.FixedFieldSize
    /*FixedFieldSize + Variable_Data_Size + Null_Bitmap*/
    , n.FixedFieldSize + (2 + (n.VariableFields * 2) + (n.VariableFieldSize)) + (2 + ((n.FixedFields+n.VariableFields+7)/8)) + 4 AS Row_Size
    , n.LOBFieldSize
FROM cte_TableData n
UNION
SELECT i.type_desc
    , i.TableName
    , i.IndexName
    , 0 AS Null_Bitmap
    , CASE WHEN i.VariableFields > 0 THEN 2 + (i.VariableFields * 2) + i.VariableFieldSize + 4 ELSE 0 END AS Variable_Data_Size
    , i.FixedFieldSize
    /*FixedFieldSize + Variable_Data_Size + Null_Bitmap if not clustered*/
    , i.FixedFieldSize + CASE WHEN i.VariableFields > 0 THEN 2 + (i.VariableFields * 2) + i.VariableFieldSize + 4 ELSE 0 END + 7 AS Row_Size
    , i.LOBFieldSize
FROM cte_Indexes i
WHERE i.index_id IN(0,1)
UNION
SELECT i.type_desc
    , i.TableName
    , i.IndexName
    , CASE WHEN si.TotalColumns IS NULL THEN 2 + ((i.FixedFields+i.VariableFields+i.VariableIncludes+i.FixedIncludes+8)/8)
            ELSE 2 + ((i.FixedFields+i.VariableFields+i.VariableIncludes+i.FixedIncludes+7)/8)
        END AS Null_Bitmap
    , CASE WHEN si.TotalColumns IS NULL THEN 2 + ((i.VariableFields + 1) * 2) + (i.VariableFieldSize + 8)
            ELSE 2 + (i.VariableFields * 2) + i.VariableFieldSize
        END AS Variable_Data_Size
    , CASE WHEN si.TotalColumns IS NULL THEN si.FixedFieldSize
            ELSE i.FixedFieldSize + si.FixedFieldSize
        END AS FixedFieldSize
    /*FixedFieldSize + Variable_Data_Size + Null_Bitmap if not clustered*/
    , CASE WHEN si.TotalColumns IS NULL THEN i.FixedFieldSize + (2 + ((i.VariableFields + 1) * 2) + (i.VariableFieldSize + 8)) + (2 + ((i.TotalColumns+8)/8)) + 7
            ELSE i.FixedFieldSize + (2 + (i.VariableFields * 2) + i.VariableFieldSize) + (2 + ((i.TotalColumns+7)/8)) + 4
        END AS Row_Size
    , i.LOBFieldSize
FROM cte_Indexes i
LEFT OUTER JOIN cte_Indexes si ON i.TableName = si.TableName AND si.type_desc = 'CLUSTERED'
WHERE i.index_id NOT IN(0,1) AND i.type_desc = 'NONCLUSTERED';

--SELECT * FROM RowSizes

/*Calculate leaf sizes for tables and HEAPs*/
INSERT LeafSizes
SELECT r.TypeName
    , r.TableName
    ,'' AS IndexName
    , r.Row_Size
    , 8096 / (r.Row_Size + 2) AS Rows_Per_Page
    , 8096 * ((100 - 90)/100) / (r.Row_Size + 2) AS Free_Rows_Per_Page
    , 0 AS Non_Leaf_Levels
    /*Num_Leaf_Pages = Number of Rows / (Rows_Per_Page - Free_Rows_Per_Page) OR 1 if less than 1*/
    , CASE WHEN @NumRows / ((8096 / (r.Row_Size + 2)) - (8096 * ((100 - 90)/100) / (r.Row_Size + 2))) < 1
            THEN 1
            ELSE @NumRows / ((8096 / (r.Row_Size + 2)) - (8096 * ((100 - 90)/100) / (r.Row_Size + 2)))
        END AS Num_Leaf_Pages
    , 0 AS Num_Index_Pages
    /*Leaf_space_used = 8192 * Num_Leaf_Pages*/
    , 8192 * CASE WHEN @NumRows / ((8096 / (r.Row_Size + 2)) - (8096 * ((100 - 90)/100) / (r.Row_Size + 2))) < 1
                THEN 1
                ELSE @NumRows / ((8096 / (r.Row_Size + 2)) - (8096 * ((100 - 90)/100) / (r.Row_Size + 2)))
            END + (@NumRows * LOBFieldSize) AS Leaf_space_used_bytes
FROM RowSizes r
WHERE r.TypeName = 'Table'
ORDER BY TypeName, TableName;

/*Calculate leaf sizes for CLUSTERED indexes*/
INSERT LeafSizes
SELECT r.TypeName
    , r.TableName
    , r.IndexName
    , r.Row_Size
    , 8096 / (r.Row_Size + 2) AS Rows_Per_Page
    , 0 AS Free_Rows_Per_Page
    , 1 + ROUND(LOG(8096 / (r.Row_Size + 2)), 0)*(l.Num_Leaf_Pages/(8096 / (r.Row_Size + 2))) AS Non_Leaf_Levels
    , l.Num_Leaf_Pages
    , 0 AS Num_Index_Pages
    , 0 AS Leaf_space_used_bytes
FROM RowSizes r
INNER JOIN LeafSizes l ON r.TableName = l.TableName AND l.TypeName = 'Table'
WHERE r.TypeName = 'CLUSTERED';

PRINT 'CLUSTERED'
EXEC dbo.cp_CalcIndexPages @IndexType = 'CLUSTERED'

/*Calculate leaf sizes for NONCLUSTERED indexes*/
INSERT LeafSizes
SELECT r.TypeName
    , r.TableName
    , r.IndexName
    , r.Row_Size
    , 8096 / (r.Row_Size + 2) AS Rows_Per_Page
    , 0 AS Free_Rows_Per_Page
    , 1 + ROUND(LOG(8096 / (r.Row_Size + 2)), 0)*(l.Num_Leaf_Pages/(8096 / (r.Row_Size + 2))) AS Non_Leaf_Levels
    , l.Num_Leaf_Pages
    , 0 AS Num_Index_Pages
    , 0 AS Leaf_space_used_bytes
FROM RowSizes r
INNER JOIN LeafSizes l ON r.TableName = l.TableName AND l.TypeName = 'Table'
WHERE r.TypeName = 'NONCLUSTERED';

PRINT 'NONCLUSTERED'
EXEC dbo.cp_CalcIndexPages @IndexType = 'NONCLUSTERED'

SELECT *
FROM dbo.LeafSizes
--WHERE TableName = 'eligibility.clientrequest'

SELECT TableName
    , @NumRows AS RowsPerTable
    , @VarPercentFill*100 AS VariableFieldFillFactor
    , SUM(CASE WHEN TypeName = 'Table' THEN Leaf_space_used_bytes ELSE 0 END)/1024/1024 AS TableSizeMB
    , SUM(Leaf_space_used_bytes)/1024/1024 AS SizeWithIndexesMB
FROM LeafSizes
--WHERE TableName = 'eligibility.clientrequest'
GROUP BY TableName
ORDER BY TableName;


GO
/*Cleanup when done*/
DROP PROCEDURE dbo.cp_CalcIndexPages;
DROP TABLE dbo.RowSizes;
DROP TABLE dbo.LeafSizes;

10-05 21:18
查看更多