非规范化关系结构
第二部分我们讲了复合主键,这可以灵活的解决主从关系,也即是一对多关系,那么多对多关系呢?
多对多关系的数据模型应该回答两个问题:
- 我跟着谁?
- 谁跟着我?
-- 建表,我们发现这里有个不寻常的事情,表都有2个列,都是主键。2个表,一个出站,一个入站
CREATE TABLE "user_outbound_follows" (
"follower_username" text,
"followed_username" text,
PRIMARY KEY ("follower_username", "followed_username")
); CREATE TABLE "user_inbound_follows" (
"followed_username" text,
"follower_username" text,
PRIMARY KEY ("followed_username", "follower_username")
);
-- 新建数据
INSERT INTO "user_outbound_follows"
("follower_username", "followed_username")
VALUES ('alice', 'bob');
INSERT INTO "user_inbound_follows"
("followed_username", "follower_username")
VALUES ('bob', 'alice');
INSERT INTO "user_outbound_follows"
("follower_username", "followed_username")
VALUES ('alice', 'carol');
INSERT INTO "user_inbound_follows"
("followed_username", "follower_username")
VALUES ('carol', 'alice'); SELECT "followed_username"
FROM "user_outbound_follows"
WHERE "follower_username" = 'alice';
-- 将会报错,followed_username不是分区列,簇列精确查询将报错
SELECT "follower_username"
FROM "user_outbound_follows"
WHERE "followed_username" = 'bob';
-- 正常,在入站表中followed_username是分区列
SELECT "follower_username"
FROM "user_inbound_follows"
WHERE "followed_username" = 'bob'; DELETE FROM "user_outbound_follows"
WHERE "follower_username" = 'alice'
AND "followed_username" = 'bob'; DELETE FROM "user_inbound_follows"
WHERE "followed_username" = 'bob'
AND "follower_username" = 'alice'; SELECT "followed_username"
FROM "user_outbound_follows"
WHERE "follower_username" = 'alice';
非规范化数据结构
上面我们看到的第一个非规范化的例子,这是在多个地方存储相同数据的实践。在关系数据库模式中,非规范化通常是不受欢迎的,尽管从实际角度来看,即使在这种情况下,非规范化也常常是一种有用的优化。在非关系数据库中,非规范化通常是查询驱动设计中的一个关键工具。
非规范化的缺点体现在前面的插入模式上:我们必须做两个插入语句才能完全表示一个基本事实(多对多跟随)。从性能的角度来看,这是可以接受的:Cassandra针对高效的写操作进行了优化,因此我们很高兴进行详细的写操作,以便允许高效的读取。当然,这会在应用程序级别上增加更多的复杂度
CREATE TABLE "user_follows" (
"followed_username" text,
"follower_username" text,
PRIMARY KEY ("followed_username", "follower_username")
);
-- 将会报错
SELECT * FROM "user_follows"
WHERE "follower_username" = 'alice';
-- 建立索引
CREATE INDEX ON "user_follows" ("follower_username"); INSERT INTO "user_follows"
("followed_username", "follower_username")
VALUES ('alice', 'bob'); INSERT INTO "user_follows"
("followed_username", "follower_username")
VALUES ('alice', 'carol'); INSERT INTO "user_follows"
("followed_username", "follower_username")
VALUES ('carol', 'bob'); INSERT INTO "user_follows"
("followed_username", "follower_username")
VALUES ('dave', 'bob'); SELECT * FROM "user_follows"
WHERE "follower_username" = 'bob';
-- 将会报错
CREATE INDEX ON "users" ("email", "encrypted_password"); SELECT * FROM "user_follows"
WHERE "follower_username" > 'alice';
视图
在建模关系时,我们在其中一列上创建了辅助索引。
但是,在非分区键列上添加辅助索引会对读取产生性能影响。如果需要高性能,需要在查询中精准指定分区键,这样查询就不会跨多个分区扩展。
为了避免客户端非规范化对数列使用二级索引,Cassandra 3.0引入了视图。视图处理服务器端的非规范化,确保基本数据和视图数据之间的最终一致性。
CREATE TABLE "user_follows_base" (
"followed_username" text,
"follower_username" text,
PRIMARY KEY ("followed_username", "follower_username")
); CREATE MATERIALIZED VIEW "user_follows_view"
AS SELECT followed_username, follower_username
FROM "user_follows_base"
WHERE follower_username IS NOT NULL AND followed_username IS NOT NULL
PRIMARY KEY (follower_username, followed_username); INSERT INTO "user_follows_base"
("followed_username", "follower_username")
VALUES ('alice', 'bob'); INSERT INTO "user_follows_base"
("followed_username", "follower_username")
VALUES ('alice', 'carol'); INSERT INTO "user_follows_base"
("followed_username", "follower_username")
VALUES ('carol', 'bob'); INSERT INTO "user_follows_base"
("followed_username", "follower_username")
VALUES ('dave', 'bob'); SELECT * FROM "user_follows_base"
WHERE "followed_username" = 'alice'; SELECT * FROM "user_follows_base"
WHERE "follower_username" = 'bob'; SELECT * FROM "user_follows_view"
WHERE "follower_username" = 'bob'; DELETE FROM "user_follows_base"
WHERE "followed_username" = 'dave'
AND "follower_username" = 'bob'; INSERT INTO "user_outbound_follows"
("follower_username", "followed_username")
VALUES ('alice', 'dave'); INSERT INTO "user_inbound_follows"
("followed_username", "follower_username")
VALUES ('dave', 'alice'); INSERT INTO "user_status_updates" ("username", "id", "body")
VALUES ('dave', NOW(), 'dave update one'); INSERT INTO "user_status_updates" ("username", "id", "body")
VALUES ('carol', NOW(), 'carol update one'); INSERT INTO "user_status_updates" ("username", "id", "body")
VALUES ('dave', NOW(), 'dave update two'); INSERT INTO "user_status_updates" ("username", "id", "body")
VALUES ('carol', NOW(), 'carol update two'); SELECT "followed_username"
FROM "user_outbound_follows"
WHERE "follower_username" = 'alice'; SELECT "id", UNIXTIMESTAMPOF("id"), "body"
FROM "user_status_updates"
WHERE "username" IN ('carol', 'dave');
-- 将会报错
SELECT "username", "id", UNIXTIMESTAMPOF("id"), "body"
FROM "user_status_updates"
WHERE "username" IN ('carol', 'dave')
ORDER BY "id" DESC
LIMIT 2; SELECT "username", "id", UNIXTIMESTAMPOF("id"), "body"
FROM "user_status_updates"
WHERE "username" IN ('carol', 'dave')
AND "id" < 3a59c320-28cf-11e4-8069-5f98e903bf02
ORDER BY "id" DESC
LIMIT 2; CREATE TABLE "home_status_update_ids" (
"timeline_username" text,
"status_update_id" timeuuid,
"status_update_username" text,
PRIMARY KEY ("timeline_username", "status_update_id")
)
WITH CLUSTERING ORDER BY ("status_update_id" DESC); SELECT "follower_username"
FROM "user_inbound_follows"
WHERE "followed_username" = 'carol'; INSERT INTO "user_status_updates"
("username", "id", "body")
VALUES(
'carol', 65cd8320-2ad7-11e4-8069-5f98e903bf02,
'carol update 3'); INSERT INTO "home_status_update_ids"
("timeline_username", "status_update_id",
"status_update_username")
VALUES
('alice', 65cd8320-2ad7-11e4-8069-5f98e903bf02, 'carol'); SELECT NOW() FROM "user_status_updates" LIMIT 1; INSERT INTO "user_status_updates"
("username", "id", "body")
VALUES
('dave', a05b90b0-2ada-11e4-8069-5f98e903bf02, 'dave update 3'); SELECT * FROM "user_status_updates"
WHERE "username" IN ('dave', 'carol')
AND "id" IN (
a05b90b0-2ada-11e4-8069-5f98e903bf02,
65cd8320-2ad7-11e4-8069-5f98e903bf02
); DELETE FROM "user_status_updates"
WHERE username = 'dave'
AND id = a05b90b0-2ada-11e4-8069-5f98e903bf02; DELETE FROM "home_status_update_ids"
WHERE timeline_username IN ('alice')
AND status_update_id = a05b90b0-2ada-11e4-8069-5f98e903bf02; CREATE TABLE "home_status_updates" (
"timeline_username" text,
"status_update_id" timeuuid,
"status_update_username" text,
"body" text,
PRIMARY KEY ("timeline_username", "status_update_id")
) WITH CLUSTERING ORDER BY ("status_update_id" DESC); SELECT "followed_username"
FROM "user_inbound_follows"
WHERE "followed_username" = 'carol'; INSERT INTO "user_status_updates"
("username", "id", "body")
VALUES
('carol',
cacc7de0-2af9-11e4-8069-5f98e903bf02,
'carol update 4'
); INSERT INTO "home_status_updates"
("timeline_username", "status_update_id", "status_update_username", "body")
VALUES (
'alice',
cacc7de0-2af9-11e4-8069-5f98e903bf02,
'carol',
'carol update 4'
); SELECT "followed_username"
FROM "user_inbound_follows"
WHERE "followed_username" = 'dave'; INSERT INTO "user_status_updates"
("username", "id", "body")
VALUES
('dave', 16e2f240-2afa-11e4-8069-5f98e903bf02, 'dave update 4'); INSERT INTO "home_status_updates"
("timeline_username", "status_update_id", "status_update_username", "body")
VALUES (
'alice',
16e2f240-2afa-11e4-8069-5f98e903bf02,
'dave',
'dave update 4'
); SELECT "status_update_username", "status_update_id", "body"
FROM "home_status_updates"
WHERE "timeline_username" = 'alice';
批处理
-- 批处理 BEGIN BATCH
INSERT INTO "user_status_updates"
("username", "id", "body")
VALUES(
'dave',
16e2f240-2afa-11e4-8069-5f98e903bf02,
'dave update 4'
);
INSERT INTO "home_status_updates" (
"timeline_username",
"status_update_id",
"status_update_username",
"body")
VALUES (
'alice',
16e2f240-2afa-11e4-8069-5f98e903bf02,
'dave',
'dave update 4'
);
APPLY BATCH; CREATE TABLE batchlog (
id uuid PRIMARY KEY,
written_at timestamp,
data blob
); BEGIN UNLOGGED BATCH
INSERT INTO "user_status_updates"
("username", "id", "body")
VALUES(
'dave',
16e2f240-2afa-11e4-8069-5f98e903bf02,
'dave update 4'
);
INSERT INTO "home_status_updates" (
"timeline_username",
"status_update_id",
"status_update_username",
"body")
VALUES (
'alice',
16e2f240-2afa-11e4-8069-5f98e903bf02,
'dave',
'dave update 4'
);
APPLY BATCH; BEGIN UNLOGGED BATCH
INSERT INTO "status_update_replies" ("status_update_username",
"status_update_id", "id", "author_username", "body")
VALUES ('alice', 76e7a4d0-e796-11e3-90ce-5f98e903bf02, NOW(), 'bob',
'Bob Reply1');
INSERT INTO "status_update_replies" ("status_update_username",
"status_update_id", "id", "author_username", "body")
VALUES ('alice', 76e7a4d0-e796-11e3-90ce-5f98e903bf02, NOW(), 'carol',
'Carol Reply1');
APPLY BATCH; BEGIN UNLOGGED BATCH
INSERT INTO "user_status_updates"
("username", "id", "body")
VALUES(
'dave',
NOW(),
'dave update 5'
);
INSERT INTO "user_status_updates"
("username", "id", "body")
VALUES(
'ellen',
NOW(),
'ellen update 6'
);
APPLY BATCH;