如何在Cassandra中存储嵌套数据

时间:2015-05-29 22:38:45

标签: database cassandra denormalization

考虑以下"文件",这两个文件将如何存储在一个集合中。

// collection posts:
{
  id: 1,
  name: "kingsbounty",
  fields: {
    "title": {
      "title": "Game Title",
      "value": "Kings Bounty"
    }
  },
  {
    "body": {
      "title": "Game Description",
      "value": "Kings Bounty is a turn-based fantasy..."
    }
  }
}

// collection posts:
{
  id: 2,
  name: "outrun",
  fields: { 
    "vehicle": {
      "title": "Vehicle",
      "value": "Ferrari Testarossa"
    },
    "color": {
      "title": "Vehicle Color",
      "value": "Red"
    },
    "driver": {
      "title": "Driver",
      "value": "David Hasselhoff"
    }
  }
}

注意字段是不同大小的地图。

因为cassandra不允许定义此类型fields <map <map, text>>

我想学习&#34; cassandra&#34;这样做的方式,非规范化的方式。 这种方式不是非规范化的,但可以存储和检索任意长度的嵌套数据。

CREATE TABLE posts (
  id uuid,
  name text,
  fields list<text>
  PRIMARY KEY (id)
);
CREATE INDEX post_name_key ON posts (name);

CREATE TABLE post_fields (
  post_name text,
  field_name text,
  title text,
  value text,
  PRIMARY KEY (post_name, field_name)
);

INSERT INTO posts (id, name, fields) VALUES ( uuid(), 'kingsbounty', [ 'title', 'body' ] );
INSERT INTO posts (id, name, fields) VALUES ( uuid(), 'outrun', [ 'vehicle', 'color', 'driver' ] );

INSERT INTO post_fields (post_name, field_name, title, value) VALUES ( 'kingsbounty', 'title', 'Game Title', 'Kings Bounty');
INSERT INTO post_fields (post_name, field_name, title, value) VALUES ( 'kingsbounty', 'body', 'Game Description', 'Kings Bounty is a turn-based fantasy...');
INSERT INTO post_fields (post_name, field_name, title, value) VALUES ( 'outrun', 'vehicle', 'Vehicle', 'Ferrari Testarossa');
INSERT INTO post_fields (post_name, field_name, title, value) VALUES ( 'outrun', 'color', 'Vehicle Color', 'Red');
INSERT INTO post_fields (post_name, field_name, title, value) VALUES ( 'outrun', 'driver', 'Driver', 'David Hasselhoff');

SELECT fields FROM posts WHERE name = 'kingsbounty';

     fields
    -------------------
     ['title', 'body']

SELECT * FROM post_fields WHERE post_name = 'kingsbounty';

     post_name   | field_name | title            | value
    -------------+------------+------------------+-----------------------------------------
     kingsbounty |       body | Game Description | Kings Bounty is a turn-based fantasy...
     kingsbounty |      title |       Game Title |                            Kings Bounty

SELECT fields FROM posts WHERE name = 'outrun';

     fields
    --------------------------------
     ['vehicle', 'color', 'driver']

SELECT * FROM post_fields WHERE post_name = 'outrun';

     post_name | field_name | title         | value
    -----------+------------+---------------+--------------------
        outrun |      color | Vehicle Color |                Red
        outrun |     driver |        Driver |   David Hasselhoff
        outrun |    vehicle |       Vehicle | Ferrari Testarossa

什么是存储此类数据的更好,非规范化的方式?

2 个答案:

答案 0 :(得分:6)

来自#cassandra的jeffj在irc上建议我甚至不需要第一张桌子。

我现在开始明白了。

CREATE TABLE posts (
  name text,
  field text,
  title text,
  value text,
  PRIMARY KEY (name, field)
);

INSERT INTO posts (name, field, title, value) VALUES ( 'kingsbounty', 'title', 'Game Title', 'Kings Bounty');
INSERT INTO posts (name, field, title, value) VALUES ( 'kingsbounty', 'body', 'Game Description', 'Kings Bounty is a turn-based fantasy...');
INSERT INTO posts (name, field, title, value) VALUES ( 'outrun', 'vehicle', 'Vehicle', 'Ferrari Testarossa');
INSERT INTO posts (name, field, title, value) VALUES ( 'outrun', 'color', 'Vehicle Color', 'Red');
INSERT INTO posts (name, field, title, value) VALUES ( 'outrun', 'driver', 'Driver', 'David Hasselhoff');

SELECT field FROM posts WHERE name = 'kingsbounty';

 field
-------
  body
 title

SELECT * FROM posts WHERE name = 'kingsbounty';

 name        | field | title            | value
-------------+-------+------------------+-----------------------------------------
 kingsbounty |  body | Game Description | Kings Bounty is a turn-based fantasy...
 kingsbounty | title |       Game Title |                            Kings Bounty

SELECT fields FROM posts WHERE name = 'outrun';

 field
---------
   color
  driver
 vehicle


SELECT * FROM posts WHERE name = 'outrun';

 name   | field   | title         | value
--------+---------+---------------+--------------------
 outrun |   color | Vehicle Color |                Red
 outrun |  driver |        Driver |   David Hasselhoff
 outrun | vehicle |       Vehicle | Ferrari Testarossa

答案 1 :(得分:0)

使用您想要返回的任何信息创建表。假设您需要返回所有信息,请将其存储在单个表中,如下所示,并在客户端进行必要的操作。

CREATE TABLE posts (
  id uuid,
  name text,
  fields map<text,text>,
  PRIMARY KEY (id)
);

insert into posts (id,name,fields) values (uuid(),'kingsbounty',{'title':'"title": "Game Title","value": "Kings Bounty"','body':'"title": "Game Description","value": "Kings Bounty is a turn-based fantasy..."'});
insert into posts (id,name,fields) values (uuid(),'outrun',{'vehicle':'"title": "Vehicle","value": "Ferrari Testarossa"','color':'"title": "Vehicle Color","value": "Red"','driver':'"title": "Driver","value": "David Hasselhoff"'});

   cqlsh> select id,name,fields from posts;

 id                                   | name        | fields
--------------------------------------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 dd31393d-2654-42ec-a5fb-73ab13c12932 | kingsbounty | {'body': '"title": "Game Description","value": "Kings Bounty is a turn-based fantasy..."', 'title': '"title": "Game Title","value": "Kings Bounty"'}
 a1e2b512-7177-4a2d-8da3-528b9d5097c0 |      outrun | {'color': '"title": "Vehicle Color","value": "Red"', 'driver': '"title": "Driver","value": "David Hasselhoff"', 'vehicle': '"title": "Vehicle","value": "Ferrari Testarossa"'}