关于从某些用户获取最新帖子的可扩展性的问题是:两种不同的实现

时间:2016-04-17 01:21:15

标签: mysql design-patterns database-design relational-database innodb

我有两种不同的实现方法可以从有人关注的用户那里检索民意调查,我想知道哪一个适用于更具可扩展性的数据库。首先,我将向您展示表格,然后是两个实现。

民意调查表

#include <iostream>
using namespace std;

double height, width, length, radius, base_area, result;

 //Function prototypes

int ReadInputShapeChoice();
void readshapedimension(int choice);
float CalculateBasicVolume(int choice);
void PrintResult(int choice);

double rectangular_solid(double length1, double width1, double height1);
double cylinder(double radius2, double height2);
double cone(double radius3, double height3);
double sphere(double radius4);
double square_based_pyramid(double height5, double base_area5);

//function definitions
double rectangular_solid(double length1, double width1, double height1)
{
    double value;
    value = (length1 * width1 * height1);
    return value;
}
double cylinder(double radius2, double height2)
{
    double value;
    value = (3.14159 * (radius2 * radius2) * height2);
    return value;
}
double cone(double radius3, double height3)
{
    double value;
    value = ((3.14159 * (radius3 * radius3) * height3) / 3);
    return value;
}
double sphere(double radius4)
{
   double value;
   value = ((3.14159 * (radius4 * radius4 * radius4))*(4 / 3));
   return value;
}
double square_based_pyramid(double height5, double base_area5)
   {
   double value;
   value = ((height5 * base_area5) * (1 / 3));
   return value;
   }


int ReadInputShapeChoice()
{   int choice;
    cout << "Choose what shape you want to calculate" << endl;
    cout << "1 = Rectangular solid" << endl;
    cout << "2 = Cylinder" << endl;
    cout << "3 = Cone" << endl;
    cout << "4 = Sphere" << endl;
    cout << "5 = Square based pyramid" << endl;
    cin >> choice;
    return choice;
}

void readshapedimension(int choice)
{
    switch (choice)
    {
    case 1:
    {
        int length, width, height;
        cout << "You have chosen rectuangular solid" << endl;
        cout << "Enter the values for length width and height" << endl;
        cin >> length >> width >> height;
        break;
    }
    case 2:
    {
        int radius, height;
        cout << "You have chosen cylinder" << endl;
        cout << "Enter the values for radius and height" << endl;
        cin >> radius >> height;
        break;
    }
    case 3:
    {
        int radius, height;
        cout << "You have chosen cone" << endl;
        cout << "Enter the values for radius and height" << endl;
        cin >> radius >> height;
        break;
    }
    case 4:
    {
        int radius;
        cout << "You have chosen sphere" << endl;
        cout << "Enter the radius" << endl;
        cin >> radius;
        break;
    }
    case 5:
    {
        int height, base_area;
        cout << "You have chosen square based pyramid" << endl;
        cout << "Enter height and area of the base" << endl;
        cin >> height >> base_area;
        break;
    }
    }
}

float CalculateBasicVolume(int choice)
{
switch (choice)
{
    int result;
case 1:
{
    result = rectangular_solid(length, width, height);
    break;
}
case 2:
{
    result = cylinder(radius, height);
    break;
}
case 3:
{
    result = cone(radius, height);
    break;
}
case 4:
{
    result = sphere(radius);
    break;
}
case 5:
{
    result = square_based_pyramid(height, base_area);
    break;
}
    return result;
    }
}
void PrintResult(int choice)
{
    switch (choice)
    {
case 1:
{
    cout << "The volume of the rectangular solid is " << result << endl;
    break;
}
case 2:
{
    cout << "the volume of the cylinder is " << result << endl;
    break;
}
case 3:
{
    cout << "The volume of the cone is " << result << endl;
    break;
}
case 4:
{
    cout << "The volume of the sphere is " << result << endl;
    break;
}
case 5:
{
    cout << "the volume of the square based pyramid is " << result <<     endl;
    break;
}

}
}



int main() {
    int choice;
    choice = ReadInputShapeChoice();
    readshapedimension(choice);
    result = CalculateBasicVolume(choice);
    PrintResult(choice);

        return 0;
}}

repoll table - 两个实现都必需

CREATE TABLE `poll` (
`id` int(1) unsigned NOT NULL AUTO_INCREMENT,
`creator_id` int(1) unsigned NOT NULL,
`date_created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`question` varchar(255) NOT NULL,
`num_of_responses` int(1) unsigned DEFAULT NULL,
`num_of_answers` enum('2','3','4','5') NOT NULL,
PRIMARY KEY (`id`),
KEY `creator_id` (`creator_id`),
KEY `date_created` (`date_created`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

下表

CREATE TABLE `repoll` (
`repoller_id` int(1) unsigned NOT NULL,
`poll_id` int(1) unsigned NOT NULL,
`date_created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
KEY `repoller_id` (`repoller_id`),
KEY `poll_id` (`poll_id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

user_feed表 - 仅用于第二次实现

CREATE TABLE `following` (
`follower` int(1) unsigned NOT NULL,
`followee` int(1) unsigned NOT NULL,
KEY `follower` (`follower`),
KEY `followee` (`followee`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

第一个实现:不需要user_feed表,但查询似乎比实现二中的查询计算成本更高。

CREATE TABLE `user_feed` (
`user_id` int(1) unsigned NOT NULL,
`poll_id` int(1) unsigned NOT NULL,
`repoller_id` int(1) unsigned DEFAULT NULL,
`date_created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
KEY `user_id` (`user_id`),
KEY `date_created` (`date_created`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

第二次实现:需要user_feed表和repoll表。每次有人发布/重新发布内容时,我都会向user_feed表添加一条记录。为每个海报的粉丝添加记录。我只保留user_feed表中任何特定用户的120条记录。如果发布了帖子且用户在user_feed表中已有120条记录,则删除该用户的最旧记录并将其添加到repoll表中;新的取而代之。如果用户请求的记录多于user_feed表中存在的记录,则第一个实现用于检索多余的记录。

SELECT P.id, P.creator_id, P.date_created
FROM
following f JOIN
(
    SELECT id, creator_id, date_created
    FROM poll
    UNION ALL
    SELECT poll_id, repoller_id, date_created
    FROM repoll
) AS P(id, creator_id, date_created)
ON f.followee=P.creator_id
AND f.follower=23
ORDER BY P.date_created DESC
LIMIT 120;

2 个答案:

答案 0 :(得分:0)

展开原始查询,看起来像查询正在寻找:

  • 由23
  • 创建的民意调查
  • 由23
  • 创建的repolls
  • 由某人创建的民意调查,然后是23
  • 由某人创建的重新创建,然后是23

假设“跟随”中的行有限制,以致23不能成为他自己的追随者(即,在追随者=追随者的情况下不允许行)

并且假设用户无法在同一时间重新开始相同的轮询,也就是说,(poll_id,creator_id,created_on)元组在repoll中是独一无二的

(可能还有其他一些我尚未确定的情况......)

它看起来像四个不同的集合:

1)由23

创建的民意调查
 SELECT p.id
      , p.creator_id
      , p.created_on
      , NULL AS repoller_id
   FROM poll p
  WHERE p.creator_id = 23
  ORDER BY p.created_on DESC LIMIT 80

2)23岁时重新开始

 SELECT p.id
      , p.creator_id
      , r.created_on
      , r.repoller_id
   FROM poll p
   JOIN repoll r
     ON r.poll_id = p.id
  WHERE r.repoller_id = 23
  ORDER BY r.created_on DESC LIMIT 80

3)由某人创建的民意调查,然后是23

 SELECT p.id
      , p.creator_id
      , p.created_on
      , NULL AS repoller_id
   FROM poll p
   JOIN following f
     ON f.followee = p.creator_id
    AND f.follower = 23
    AND f.follower <> f.followee  -- only needed if we don't disallow 23 to follow 23
  ORDER BY p.created_on DESC LIMIT 80

4)由某人创建的重新创建,然后是23

 SELECT p.id
      , r.creator_id
      , r.created_on
      , r.repoller_id
   FROM poll p
   JOIN repoll r
     ON r.poll_id = p.id
   JOIN following f
     ON f.followee = r.repoller_id
    AND f.follower = 23
    AND f.follower <> f.followee   -- only needed if we don't disallow 23 to follow 23
  ORDER BY r.created_on DESC LIMIT 80

如果需要消除重复的可能性(例如因为我们没有对表有适当的UNIQUE约束),我们可以将GROUP BY子句添加到所需的查询中。

我们可以调整每个单独的查询,确保使用EXPLAIN可以使用和使用适当的索引。

然后我们可以将查询与UNION ALL集合运算符组合在一起。我的偏好是不在查询中重用任何表别名,即使它对MySQL明确,它使语句更容易阅读,特别是,当每个表引用都有唯一的别名时,使EXPLAIN更容易破译。

由于原始查询按create_on按降序排序,并指定返回80行的限制,因此我们可以对每个子查询应用相同的排序和限制。当我们在整个集合中获得订单时,我们最多有320(= 4x80行)。为了使结果更具确定性,我们将按顺序包含第二个表达式。< / p>

(
  SELECT p1.id
       , p1.creator_id
       , p1.created_on
       , NULL AS repoller_id
    FROM poll p1
   WHERE p1.creator_id = 23         -- query parameter
   ORDER BY p1.created_on DESC, p1.id DESC LIMIT 80
)
UNION ALL
(
  SELECT p2.id
       , p2.creator_id
       , r2.created_on
       , r2.repoller_id
    FROM poll p2
    JOIN repoll r2
      ON r2.poll_id = p2.id
   WHERE r2.repoller_id = 23         -- query parameter
   ORDER BY r2.created_on DESC, r2.poll_id DESC LIMIT 80
)
UNION ALL
(
  SELECT p3.id
       , p3.creator_id
       , p3.created_on
       , NULL AS repoller_id
    FROM poll p3
    JOIN following f3
      ON f3.followee = p3.creator_id
     AND f3.follower = 23            -- query parameter
     AND f3.follower <> f3.followee  -- only needed if we allow 23 to follow 23
   ORDER BY p3.created_on DESC, p3.id DESC LIMIT 80
)
UNION ALL
(
  SELECT p4.id
       , p4.creator_id
       , r4.created_on
       , r4.repoller_id
    FROM poll p4
    JOIN repoll r4
      ON r4.poll_id = p4.id
    JOIN following f4
      ON f4.followee = r4.repoller_id
     AND f4.follower = 23            -- query parameter
     AND f4.follower <> f4.followee  -- only needed if we allow 23 to follow 23
   ORDER BY r4.created_on DESC, r4.poll_id DESC LIMIT 80
)
ORDER BY created_on DESC, id DESC LIMIT 80

即使此查询中的SQL文本比您发布的两个选项中的任何一个都要长,但我认为在给定合适的索引的情况下,我们可以更好地获得可预测的性能。

代替单例列`creator_id`

上的索引
CREATE INDEX poll_IX1 ON poll (creator_id, created_on, id) ; 
CREATE INDEX repoll_IX1 ON repoll (creator_id, created_on, id)

在`follow`表上,制作一个唯一约束,例如

ALTER TABLE `following` ADD PRIMARY KEY (follower, followee) 

不是针对此查询,而是针对可能在系统中使用的其他查询......

CREATE UNIQUE INDEX following_UX1 ON following (followee_id, follower_id)

并将(现在冗余的)索引放在`following`表的单例列上。

还要考虑添加适当的外键约束。

答案 1 :(得分:0)

可以改进实施1:

SELECT  P.id, P.creator_id, P.date_created
    FROM  following f
    JOIN (
          ( SELECT  id, creator_id, date_created
                FROM  poll
                ORDER BY  date_created DESC
                LIMIT  120
          ) UNION  ALL 
          (  SELECT  poll_id, repoller_id, date_created
                FROM  repoll
                ORDER BY  date_created DESC
                LIMIT  120
          )
         ) AS P   ON f.followee=P.creator_id
      AND  f.follower=23
    ORDER BY  P.date_created DESC
    LIMIT  120;

poll and repoll need:  INDEX(creator_id, date_created)

说明:在大多数情况下,看似冗余的ORDER BY .. LIMIT ..子句实际上是一种优化。在SELECTs中的UNION中,它最小化了UNION将创建的临时表中存储的行数。临时表不超过2 * 120行;如果表有数百万行,这一点很重要。外部查询还需要子句,以便将子列表混合在一起,并将结果减少到只需要的120个。

改进索引并允许覆盖索引&#34;:

CREATE TABLE `following` (
  `follower` int unsigned NOT NULL,
  `followee` int unsigned NOT NULL,
  PRIMARY KEY(`follower`, followee),
  INDEX      (`followee`, follower)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

说明:我假设followerfollowee组合是唯一的&#39;,因此可以是PRIMARY KEY。以这种方式执行操作的性能优势是,给定跟随者的所有跟随者都可以在相邻行中找到,反之亦然。你原来拥有的速度要慢得多,因为它必须首先查看索引,然后读入数据以获取两个字段。我给你的是&#34;覆盖&#34;和#34;聚集&#34;

你的第二种口味不应该是LIMIT吗?

uf需要INDEX(user_id, date_created)

至于您原来的问题&#39;哪个&#39;,...我不会看到第二个查询做对了。