Cassandra排序不起作用

时间:2015-04-25 09:00:34

标签: cassandra cql

我正在使用Cassandra表,我想根据action_time字段对数据进行排序。

请查看以下脚本详情。

    cassandra@cqlsh:activitylog> desc keyspace;cassandra@cqlsh:activitylog> desc keyspace;

    CREATE KEYSPACE activitylog WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '3'}  AND durable_writes = true;

    CREATE TABLE activitylog.activity_log (
        id timeuuid,
        action_time timestamp,
        vendor_id bigint,
        PRIMARY KEY (id, action_time)
    ) WITH CLUSTERING ORDER BY (action_time ASC)
        AND bloom_filter_fp_chance = 0.01
        AND caching = '{"keys":"ALL", "rows_per_partition":"NONE"}'
        AND comment = ''
        AND compaction = {'min_threshold': '4', 'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32'}
        AND compression = {'sstable_compression': 'org.apache.cassandra.io.compress.LZ4Compressor'}
        AND dclocal_read_repair_chance = 0.1
        AND default_time_to_live = 0
        AND gc_grace_seconds = 864000
        AND max_index_interval = 2048
        AND memtable_flush_period_in_ms = 0
        AND min_index_interval = 128
        AND read_repair_chance = 0.0
        AND speculative_retry = '99.0PERCENTILE';

    cassandra@cqlsh:activitylog> select id,action_time,vendor_id from activity_log;

     id                                   | action_time              | vendor_id
    --------------------------------------+--------------------------+-----------
     4ce8ea50-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:11:18+0530 |      1234
     4ce8ea53-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:11:18+0530 |      1234
     86aedb31-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:13:34+0530 |      1234
     09ec5340-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:09:25+0530 |      1234
     65e85a71-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:12:39+0530 |      1234
     65e85a72-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:12:39+0530 |      1234
     86aedb33-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:13:34+0530 |      1234
     4ce8ea52-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:11:18+0530 |      1234
     86aedb32-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:13:34+0530 |      1234
     09ec5341-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:09:25+0530 |      1234
     20935240-ea70-11e4-8fd7-4943c7dd5536 | 2015-04-24 16:23:20+0530 |      1234
     65e85a73-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:12:39+0530 |      1234
     20935243-ea70-11e4-8fd7-4943c7dd5536 | 2015-04-24 16:23:20+0530 |      1234
     86aedb30-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:13:34+0530 |      1234
     09ec5343-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:09:25+0530 |      1234
     20935241-ea70-11e4-8fd7-4943c7dd5536 | 2015-04-24 16:23:20+0530 |      1234
     65e85a70-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:12:39+0530 |      1234
     20935242-ea70-11e4-8fd7-4943c7dd5536 | 2015-04-24 16:23:20+0530 |      1234
     09ec5342-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:09:25+0530 |      1234
     4ce8ea51-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:11:18+0530 |      1234


    CREATE KEYSPACE activitylog WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '3'}  AND durable_writes = true;

    CREATE TABLE activitylog.activity_log (
        id timeuuid,
        action_time timestamp,
        vendor_id bigint,
        PRIMARY KEY (id, action_time)
    ) WITH CLUSTERING ORDER BY (action_time ASC)
        AND bloom_filter_fp_chance = 0.01
        AND caching = '{"keys":"ALL", "rows_per_partition":"NONE"}'
        AND comment = ''
        AND compaction = {'min_threshold': '4', 'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32'}
        AND compression = {'sstable_compression': 'org.apache.cassandra.io.compress.LZ4Compressor'}
        AND dclocal_read_repair_chance = 0.1
        AND default_time_to_live = 0
        AND gc_grace_seconds = 864000
        AND max_index_interval = 2048
        AND memtable_flush_period_in_ms = 0
        AND min_index_interval = 128
        AND read_repair_chance = 0.0
        AND speculative_retry = '99.0PERCENTILE';

    cassandra@cqlsh:activitylog> select id,action_time,vendor_id from activity_log;

     id                                   | action_time              | vendor_id
    --------------------------------------+--------------------------+-----------
     4ce8ea50-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:11:18+0530 |      1234
     4ce8ea53-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:11:18+0530 |      1234
     86aedb31-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:13:34+0530 |      1234
     09ec5340-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:09:25+0530 |      1234
     65e85a71-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:12:39+0530 |      1234
     65e85a72-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:12:39+0530 |      1234
     86aedb33-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:13:34+0530 |      1234
     4ce8ea52-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:11:18+0530 |      1234
     86aedb32-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:13:34+0530 |      1234
     09ec5341-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:09:25+0530 |      1234
     20935240-ea70-11e4-8fd7-4943c7dd5536 | 2015-04-24 16:23:20+0530 |      1234
     65e85a73-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:12:39+0530 |      1234
     20935243-ea70-11e4-8fd7-4943c7dd5536 | 2015-04-24 16:23:20+0530 |      1234
     86aedb30-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:13:34+0530 |      1234
     09ec5343-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:09:25+0530 |      1234
     20935241-ea70-11e4-8fd7-4943c7dd5536 | 2015-04-24 16:23:20+0530 |      1234
     65e85a70-ea7f-11e4-aa45-4943c7dd5536 | 2015-04-24 18:12:39+0530 |      1234
     20935242-ea70-11e4-8fd7-4943c7dd5536 | 2015-04-24 16:23:20+0530 |      1234
     09ec5342-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:09:25+0530 |      1234
     4ce8ea51-ea55-11e4-a220-4943c7dd5536 | 2015-04-24 13:11:18+0530 |      1234

正如我们所看到的,select语句未按action_time列排序。但是在获取表的描述时,它表明数据将按ASC顺序按action_time排序。

请帮帮我。

1 个答案:

答案 0 :(得分:3)

值得注意的是,Cassandra将在分区键中强制执行排序顺序 。在您的情况下,您的分区键为id,您的表定义表明所有具有相似分区键的行都将按您的群集键action_time排序。

您的问题不在于排序顺序不起作用。这是你的分区键的基数如此之高,以至于Cassandra

使用基数较低的分区键对数据建模时,这一点很清楚。观察当我使用重新排序的PRIMARY KEY重新创建表activity_log_by_vendor时会发生什么:

CREATE TABLE activity_log_by_vendor (
    id timeuuid,
    action_time timestamp,
    vendor_id bigint,
    PRIMARY KEY (vendor_id, action_time, id));

aploetz@cqlsh:stackoverflow2> SELECT * FROm activity_log_by_vendor WHERE vendor_id = 1234;

 vendor_id | action_time              | id
-----------+--------------------------+--------------------------------------
      1234 | 2015-04-24 02:39:25-0500 | 09ec5340-ea55-11e4-a220-4943c7dd5536
      1234 | 2015-04-24 02:39:25-0500 | 09ec5341-ea55-11e4-a220-4943c7dd5536
      1234 | 2015-04-24 02:39:25-0500 | 09ec5342-ea55-11e4-a220-4943c7dd5536
      1234 | 2015-04-24 02:39:25-0500 | 09ec5343-ea55-11e4-a220-4943c7dd5536
      1234 | 2015-04-24 02:41:18-0500 | 4ce8ea50-ea55-11e4-a220-4943c7dd5536
      1234 | 2015-04-24 02:41:18-0500 | 4ce8ea51-ea55-11e4-a220-4943c7dd5536
      1234 | 2015-04-24 02:41:18-0500 | 4ce8ea52-ea55-11e4-a220-4943c7dd5536
      1234 | 2015-04-24 02:41:18-0500 | 4ce8ea53-ea55-11e4-a220-4943c7dd5536
      1234 | 2015-04-24 05:53:20-0500 | 20935240-ea70-11e4-8fd7-4943c7dd5536
      1234 | 2015-04-24 05:53:20-0500 | 20935241-ea70-11e4-8fd7-4943c7dd5536
      1234 | 2015-04-24 05:53:20-0500 | 20935242-ea70-11e4-8fd7-4943c7dd5536
      1234 | 2015-04-24 05:53:20-0500 | 20935243-ea70-11e4-8fd7-4943c7dd5536
      1234 | 2015-04-24 07:42:39-0500 | 65e85a70-ea7f-11e4-aa45-4943c7dd5536
      1234 | 2015-04-24 07:42:39-0500 | 65e85a71-ea7f-11e4-aa45-4943c7dd5536
      1234 | 2015-04-24 07:42:39-0500 | 65e85a72-ea7f-11e4-aa45-4943c7dd5536
      1234 | 2015-04-24 07:42:39-0500 | 65e85a73-ea7f-11e4-aa45-4943c7dd5536
      1234 | 2015-04-24 07:43:34-0500 | 86aedb30-ea7f-11e4-aa45-4943c7dd5536
      1234 | 2015-04-24 07:43:34-0500 | 86aedb31-ea7f-11e4-aa45-4943c7dd5536
      1234 | 2015-04-24 07:43:34-0500 | 86aedb32-ea7f-11e4-aa45-4943c7dd5536
      1234 | 2015-04-24 07:43:34-0500 | 86aedb33-ea7f-11e4-aa45-4943c7dd5536

(20 rows)

通过在vendor_id上进行分区,Cassandra实际上有20个CQL行要排序。由于它们具有相同的vendor_id,因此Cassandra将在action_timeid上强制执行排序顺序,降序(注意,我将id添加到PRIMARY KEY以确保唯一性)

上个月,我为PlanetCassandra撰写了一篇描述此行为的文章:We Shall Have Order!

DataStax的Patrick McFadin也有一篇文章,讨论如何模拟这些类型的问题:Getting Started With Time Series Data Modeling

阅读这两篇文章,它们应该可以帮助您更好地理解如何正确利用Cassandra集群顺序。