SSB(Star Schema Benchmark)是麻省州立大学波士顿校区的研究人员定义的基于现实商业应用的数据模型,业界公认用来模拟决策支持类应用,比较公正和中立。
学术界和工业界普遍采用它来评价决策支持技术方面应用的性能。
全方位评测系统的整体商业计算综合能力,对厂商的要求更高。
在银行信贷分析和信用卡分析、电信运营分析、税收分析、烟草行业决策分析中都有广泛的应用。
SSB基准测试包括:
1个事实表:lineorder
4个维度表:customer,part,dwdate,supplier
13条标准SQL查询测试语句:统计查询、多表关联、sum、复杂条件、group by、order by等组合方式。
SSB数据生成器
SSB数据自动生成器下载地址:点击这里
实验证明,在Linux上操作比较方便,步骤如下:
- 将下载的文件上传至Linux主机上的目录
- 执行如下命令,生成dbgen文件:make
- 查看 dbgen 命令帮助: ./dbgen -h
- 生成数据文件,命令如下:
(customer.tbl)
dbgen -s 1 -T c (part.tbl)
dbgen -s 1 -T p (supplier.tbl)
dbgen -s 1 -T s (date.tbl)
dbgen -s 1 -T d (fact table lineorder.tbl)
dbgen -s 1 -T l (for all SSBM tables)
dbgen -s 1 -T a- 生成文件如下: ll *tbl -l
- 连接mysql数据库,创建数据库,数据表
- 创建数据库: create database ssb;
- 创建数据库表:
CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER,
C_NAME VARCHAR(25) NOT NULL,
C_ADDRESS VARCHAR(40) NOT NULL,
C_CITY VARCHAR(10) NOT NULL,
C_NATION VARCHAR(15) NOT NULL,
C_REGION VARCHAR(12) NOT NULL,
C_PHONE VARCHAR(15) NOT NULL,
C_MKTSEGMENT VARCHAR(10) NOT NULL); CREATE TABLE DATES ( D_DATEKEY INTEGER,
D_DATE VARCHAR(18) NOT NULL,
D_DAYOFWEEK VARCHAR(18) NOT NULL,
D_MONTH VARCHAR(9) NOT NULL,
D_YEAR INTEGER NOT NULL,
D_YEARMONTHNUM INTEGER,
D_YEARMONTH VARCHAR(7) NOT NULL,
D_DAYNUMINWEEK INTEGER,
D_DAYNUMINMONTH INTEGER,
D_DAYNUMINYEAR INTEGER,
D_MONTHNUMINYEAR INTEGER,
D_WEEKNUMINYEAR INTEGER,
D_SELLINGSEASON VARCHAR(12) NOT NULL,
D_LASTDAYINWEEKFL INTEGER,
D_LASTDAYINMONTHFL INTEGER,
D_HOLIDAYFL INTEGER,
D_WEEKDAYFL INTEGER); CREATE TABLE PART ( P_PARTKEY INTEGER,
P_NAME VARCHAR(22) NOT NULL,
P_MFGR VARCHAR(6) NOT NULL,
P_CATEGORY VARCHAR(7) NOT NULL,
P_BRAND VARCHAR(9) NOT NULL,
P_COLOR VARCHAR(11) NOT NULL,
P_TYPE VARCHAR(25) NOT NULL,
P_SIZE INTEGER NOT NULL,
P_CONTAINER VARCHAR(10) NOT NULL); CREATE TABLE SUPPLIER ( S_SUPPKEY INTEGER,
S_NAME VARCHAR(25) NOT NULL,
S_ADDRESS VARCHAR(25) NOT NULL,
S_CITY VARCHAR(10) NOT NULL,
S_NATION VARCHAR(15) NOT NULL,
S_REGION VARCHAR(12) NOT NULL,
S_PHONE VARCHAR(15) NOT NULL); CREATE TABLE LINEORDER ( LO_ORDERKEY BIGINT,
LO_LINENUMBER BIGINT,
LO_CUSTKEY INTEGER NOT NULL,
LO_PARTKEY INTEGER NOT NULL,
LO_SUPPKEY INTEGER NOT NULL,
LO_ORDERDATE INTEGER NOT NULL,
LO_ORDERPRIOTITY VARCHAR(15) NOT NULL,
LO_SHIPPRIOTITY INTEGER,
LO_QUANTITY BIGINT,
LO_EXTENDEDPRICE BIGINT,
LO_ORDTOTALPRICE BIGINT,
LO_DISCOUNT BIGINT,
LO_REVENUE BIGINT,
LO_SUPPLYCOST BIGINT,
LO_TAX BIGINT,
LO_COMMITDATE INTEGER NOT NULL,
LO_SHIPMODE VARCHAR(10) NOT NULL);- 执行完成后,结果如下:
- 执行如下SQL、导入数据
set autocommit=off;
load data infile '/dbgen/lineorder.tbl' into table LINEORDER fields terminated by '|' lines terminated by '|\n';
commit; load data infile '/dbgen/supplier.tbl' into table SUPPLIER fields terminated by '|' lines terminated by '|\n';
commit; load data infile '/dbgen/customer.tbl' into table CUSTOMER fields terminated by '|' lines terminated by '|\n';
commit; load data infile '/dbgen/part.tbl' into table PART fields terminated by '|' lines terminated by '|\n';
commit; load data infile '/dbgen/date.tbl' into table DATES fields terminated by '|' lines terminated by '|\n';
commit;- 查看数据是否正确
标准SQL查询测试语句
PROMPT Q1.1
SELECT SUM(LO_EXTENDEDPRICE*LO_DISCOUNT) AS
REVENUE
FROM LINEORDER, DATES
WHERE LO_ORDERDATE = D_DATEKEY
AND D_YEAR = 1993
AND LO_DISCOUNT BETWEEN 1 AND 3
AND LO_QUANTITY < 25; PROMPT Q1.2
SELECT SUM(LO_EXTENDEDPRICE*LO_DISCOUNT) AS
REVENUE
FROM LINEORDER, DATES
WHERE LO_ORDERDATE = D_DATEKEY
AND D_YEARMONTH = 'Jan1994'
AND LO_DISCOUNT BETWEEN 4 AND 6
AND LO_QUANTITY BETWEEN 26 AND 35; PROMPT Q1.3
SELECT SUM(LO_EXTENDEDPRICE*LO_DISCOUNT) AS
REVENUE
FROM LINEORDER, DATES
WHERE LO_ORDERDATE = D_DATEKEY
AND D_WEEKNUMINYEAR = 6
AND D_YEAR = 1994
AND LO_DISCOUNT BETWEEN 5 AND 7
AND LO_QUANTITY BETWEEN 26 AND 35; PROMPT Q2.1
SELECT SUM(LO_REVENUE), D_YEAR, P_BRAND
FROM LINEORDER, DATES, PART, SUPPLIER
WHERE LO_ORDERDATE = D_DATEKEY
AND LO_PARTKEY = P_PARTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND P_CATEGORY = 'MFGR#12'
AND S_REGION = 'AMERICA'
GROUP BY D_YEAR, P_BRAND
ORDER BY D_YEAR, P_BRAND; PROMPT Q2.2
SELECT SUM(LO_REVENUE), D_YEAR, P_BRAND
FROM LINEORDER, DATES, PART, SUPPLIER
WHERE LO_ORDERDATE = D_DATEKEY
AND LO_PARTKEY = P_PARTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND P_BRAND BETWEEN 'MFGR#2221'
AND 'MFGR#2228'
AND S_REGION = 'ASIA'
GROUP BY D_YEAR, P_BRAND
ORDER BY D_YEAR, P_BRAND; PROMPT Q2.3
SELECT SUM(LO_REVENUE), D_YEAR, P_BRAND
FROM LINEORDER, DATES, PART, SUPPLIER
WHERE LO_ORDERDATE = D_DATEKEY
AND LO_PARTKEY = P_PARTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND P_BRAND= 'MFGR#2239'
AND S_REGION = 'EUROPE'
GROUP BY D_YEAR, P_BRAND
ORDER BY D_YEAR, P_BRAND; PROMPT Q3.1
SELECT C_NATION, S_NATION, D_YEAR,
SUM(LO_REVENUE) AS REVENUE
FROM CUSTOMER, LINEORDER, SUPPLIER, DATES
WHERE LO_CUSTKEY = C_CUSTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND LO_ORDERDATE = D_DATEKEY
AND C_REGION = 'ASIA'
AND S_REGION = 'ASIA'
AND D_YEAR >= 1992 AND D_YEAR <= 1997
GROUP BY C_NATION, S_NATION, D_YEAR
ORDER BY D_YEAR ASC, REVENUE DESC; PROMPT Q3.2
SELECT C_CITY, S_CITY, D_YEAR, SUM(LO_REVENUE)
AS REVENUE
FROM CUSTOMER, LINEORDER, SUPPLIER, DATES
WHERE LO_CUSTKEY = C_CUSTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND LO_ORDERDATE = D_DATEKEY
AND C_NATION = 'UNITED STATES'
AND S_NATION = 'UNITED STATES'
AND D_YEAR >= 1992 AND D_YEAR <= 1997
GROUP BY C_CITY, S_CITY, D_YEAR
ORDER BY D_YEAR ASC, REVENUE DESC; PROMPT Q3.3
SELECT C_CITY, S_CITY, D_YEAR, SUM(LO_REVENUE)
AS REVENUE
FROM CUSTOMER, LINEORDER, SUPPLIER, DATES
WHERE LO_CUSTKEY = C_CUSTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND LO_ORDERDATE = D_DATEKEY
AND (C_CITY='UNITED KI1'
OR C_CITY='UNITED KI5')
AND (S_CITY='UNITED KI1'
OR S_CITY='UNITED KI5')
AND D_YEAR >= 1992 AND D_YEAR <= 1997
GROUP BY C_CITY, S_CITY, D_YEAR
ORDER BY D_YEAR ASC, REVENUE DESC; PROMPT Q3.4
SELECT C_CITY, S_CITY, D_YEAR, SUM(LO_REVENUE)
AS REVENUE
FROM CUSTOMER, LINEORDER, SUPPLIER, DATES
WHERE LO_CUSTKEY = C_CUSTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND LO_ORDERDATE = D_DATEKEY
AND (C_CITY='UNITED KI1'
OR C_CITY='UNITED KI5')
AND (S_CITY='UNITED KI1'
OR S_CITY='UNITED KI5')
AND D_YEARMONTH = 'Dec1997'
GROUP BY C_CITY, S_CITY, D_YEAR
ORDER BY D_YEAR ASC, REVENUE DESC; PROMPT Q4.1
SELECT D_YEAR, C_NATION,
SUM(LO_REVENUE - LO_SUPPLYCOST) AS PROFIT
FROM DATES, CUSTOMER, SUPPLIER, PART, LINEORDER
WHERE LO_CUSTKEY = C_CUSTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND LO_PARTKEY = P_PARTKEY
AND LO_ORDERDATE = D_DATEKEY
AND C_REGION = 'AMERICA'
AND S_REGION = 'AMERICA'
AND (P_MFGR = 'MFGR#1'
OR P_MFGR = 'MFGR#2')
GROUP BY D_YEAR, C_NATION
ORDER BY D_YEAR, C_NATION; PROMPT Q4.2
SELECT D_YEAR, S_NATION, P_CATEGORY,
SUM(LO_REVENUE - LO_SUPPLYCOST) AS PROFIT
FROM DATES, CUSTOMER, SUPPLIER, PART, LINEORDER
WHERE LO_CUSTKEY = C_CUSTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND LO_PARTKEY = P_PARTKEY
AND LO_ORDERDATE = D_DATEKEY
AND C_REGION = 'AMERICA'
AND S_REGION = 'AMERICA'
AND (D_YEAR = 1997 OR D_YEAR = 1998)
AND (P_MFGR = 'MFGR#1'
OR P_MFGR = 'MFGR#2')
GROUP BY D_YEAR, S_NATION, P_CATEGORY
ORDER BY D_YEAR, S_NATION, P_CATEGORY; PROMPT Q4.3
SELECT D_YEAR, S_CITY, P_BRAND,
SUM(LO_REVENUE - LO_SUPPLYCOST) AS PROFIT
FROM DATES, CUSTOMER, SUPPLIER, PART, LINEORDER
WHERE LO_CUSTKEY = C_CUSTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND LO_PARTKEY = P_PARTKEY
AND LO_ORDERDATE = D_DATEKEY
AND S_NATION = 'UNITED STATES'
AND (D_YEAR = 1997 OR D_YEAR = 1998)
AND P_CATEGORY = 'MFGR#14'
GROUP BY D_YEAR, S_CITY, P_BRAND
ORDER BY D_YEAR, S_CITY, P_BRAND;- nfobright 342 on linux 64