Question

我有两种不同的实现方法可以从有人关注的用户那里检索民意调查，我想知道哪一个适用于更具可扩展性的数据库。首先，我将向您展示表格，然后是两个实现。

民意调查表

#include <iostream>
using namespace std;

double height, width, length, radius, base_area, result;

 //Function prototypes

int ReadInputShapeChoice();
void readshapedimension(int choice);
float CalculateBasicVolume(int choice);
void PrintResult(int choice);

double rectangular_solid(double length1, double width1, double height1);
double cylinder(double radius2, double height2);
double cone(double radius3, double height3);
double sphere(double radius4);
double square_based_pyramid(double height5, double base_area5);

//function definitions
double rectangular_solid(double length1, double width1, double height1)
{
    double value;
    value = (length1 * width1 * height1);
    return value;
}
double cylinder(double radius2, double height2)
{
    double value;
    value = (3.14159 * (radius2 * radius2) * height2);
    return value;
}
double cone(double radius3, double height3)
{
    double value;
    value = ((3.14159 * (radius3 * radius3) * height3) / 3);
    return value;
}
double sphere(double radius4)
{
   double value;
   value = ((3.14159 * (radius4 * radius4 * radius4))*(4 / 3));
   return value;
}
double square_based_pyramid(double height5, double base_area5)
   {
   double value;
   value = ((height5 * base_area5) * (1 / 3));
   return value;
   }


int ReadInputShapeChoice()
{   int choice;
    cout << "Choose what shape you want to calculate" << endl;
    cout << "1 = Rectangular solid" << endl;
    cout << "2 = Cylinder" << endl;
    cout << "3 = Cone" << endl;
    cout << "4 = Sphere" << endl;
    cout << "5 = Square based pyramid" << endl;
    cin >> choice;
    return choice;
}

void readshapedimension(int choice)
{
    switch (choice)
    {
    case 1:
    {
        int length, width, height;
        cout << "You have chosen rectuangular solid" << endl;
        cout << "Enter the values for length width and height" << endl;
        cin >> length >> width >> height;
        break;
    }
    case 2:
    {
        int radius, height;
        cout << "You have chosen cylinder" << endl;
        cout << "Enter the values for radius and height" << endl;
        cin >> radius >> height;
        break;
    }
    case 3:
    {
        int radius, height;
        cout << "You have chosen cone" << endl;
        cout << "Enter the values for radius and height" << endl;
        cin >> radius >> height;
        break;
    }
    case 4:
    {
        int radius;
        cout << "You have chosen sphere" << endl;
        cout << "Enter the radius" << endl;
        cin >> radius;
        break;
    }
    case 5:
    {
        int height, base_area;
        cout << "You have chosen square based pyramid" << endl;
        cout << "Enter height and area of the base" << endl;
        cin >> height >> base_area;
        break;
    }
    }
}

float CalculateBasicVolume(int choice)
{
switch (choice)
{
    int result;
case 1:
{
    result = rectangular_solid(length, width, height);
    break;
}
case 2:
{
    result = cylinder(radius, height);
    break;
}
case 3:
{
    result = cone(radius, height);
    break;
}
case 4:
{
    result = sphere(radius);
    break;
}
case 5:
{
    result = square_based_pyramid(height, base_area);
    break;
}
    return result;
    }
}
void PrintResult(int choice)
{
    switch (choice)
    {
case 1:
{
    cout << "The volume of the rectangular solid is " << result << endl;
    break;
}
case 2:
{
    cout << "the volume of the cylinder is " << result << endl;
    break;
}
case 3:
{
    cout << "The volume of the cone is " << result << endl;
    break;
}
case 4:
{
    cout << "The volume of the sphere is " << result << endl;
    break;
}
case 5:
{
    cout << "the volume of the square based pyramid is " << result <<     endl;
    break;
}

}
}



int main() {
    int choice;
    choice = ReadInputShapeChoice();
    readshapedimension(choice);
    result = CalculateBasicVolume(choice);
    PrintResult(choice);

        return 0;
}}

repoll table - 两个实现都必需

CREATE TABLE `poll` (
`id` int(1) unsigned NOT NULL AUTO_INCREMENT,
`creator_id` int(1) unsigned NOT NULL,
`date_created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`question` varchar(255) NOT NULL,
`num_of_responses` int(1) unsigned DEFAULT NULL,
`num_of_answers` enum('2','3','4','5') NOT NULL,
PRIMARY KEY (`id`),
KEY `creator_id` (`creator_id`),
KEY `date_created` (`date_created`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

下表

CREATE TABLE `repoll` (
`repoller_id` int(1) unsigned NOT NULL,
`poll_id` int(1) unsigned NOT NULL,
`date_created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
KEY `repoller_id` (`repoller_id`),
KEY `poll_id` (`poll_id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

user_feed表 - 仅用于第二次实现

CREATE TABLE `following` (
`follower` int(1) unsigned NOT NULL,
`followee` int(1) unsigned NOT NULL,
KEY `follower` (`follower`),
KEY `followee` (`followee`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

第一个实现：不需要user_feed表，但查询似乎比实现二中的查询计算成本更高。

CREATE TABLE `user_feed` (
`user_id` int(1) unsigned NOT NULL,
`poll_id` int(1) unsigned NOT NULL,
`repoller_id` int(1) unsigned DEFAULT NULL,
`date_created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
KEY `user_id` (`user_id`),
KEY `date_created` (`date_created`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

第二次实现：需要user_feed表和repoll表。每次有人发布/重新发布内容时，我都会向user_feed表添加一条记录。为每个海报的粉丝添加记录。我只保留user_feed表中任何特定用户的120条记录。如果发布了帖子且用户在user_feed表中已有120条记录，则删除该用户的最旧记录并将其添加到repoll表中;新的取而代之。如果用户请求的记录多于user_feed表中存在的记录，则第一个实现用于检索多余的记录。

SELECT P.id, P.creator_id, P.date_created
FROM
following f JOIN
(
    SELECT id, creator_id, date_created
    FROM poll
    UNION ALL
    SELECT poll_id, repoller_id, date_created
    FROM repoll
) AS P(id, creator_id, date_created)
ON f.followee=P.creator_id
AND f.follower=23
ORDER BY P.date_created DESC
LIMIT 120;

Answer 1

展开原始查询，看起来像查询正在寻找：

由23
由23
由某人创建的民意调查，然后是23
由某人创建的重新创建，然后是23

假设“跟随”中的行有限制，以致23不能成为他自己的追随者（即，在追随者=追随者的情况下不允许行）

并且假设用户无法在同一时间重新开始相同的轮询，也就是说，（poll_id，creator_id，created_on）元组在repoll中是独一无二的

（可能还有其他一些我尚未确定的情况......）

它看起来像四个不同的集合：

1）由23

创建的民意调查

 SELECT p.id
      , p.creator_id
      , p.created_on
      , NULL AS repoller_id
   FROM poll p
  WHERE p.creator_id = 23
  ORDER BY p.created_on DESC LIMIT 80

2）23岁时重新开始

 SELECT p.id
      , p.creator_id
      , r.created_on
      , r.repoller_id
   FROM poll p
   JOIN repoll r
     ON r.poll_id = p.id
  WHERE r.repoller_id = 23
  ORDER BY r.created_on DESC LIMIT 80

3）由某人创建的民意调查，然后是23

 SELECT p.id
      , p.creator_id
      , p.created_on
      , NULL AS repoller_id
   FROM poll p
   JOIN following f
     ON f.followee = p.creator_id
    AND f.follower = 23
    AND f.follower <> f.followee  -- only needed if we don't disallow 23 to follow 23
  ORDER BY p.created_on DESC LIMIT 80

4）由某人创建的重新创建，然后是23

 SELECT p.id
      , r.creator_id
      , r.created_on
      , r.repoller_id
   FROM poll p
   JOIN repoll r
     ON r.poll_id = p.id
   JOIN following f
     ON f.followee = r.repoller_id
    AND f.follower = 23
    AND f.follower <> f.followee   -- only needed if we don't disallow 23 to follow 23
  ORDER BY r.created_on DESC LIMIT 80

如果需要消除重复的可能性（例如因为我们没有对表有适当的UNIQUE约束），我们可以将GROUP BY子句添加到所需的查询中。

我们可以调整每个单独的查询，确保使用EXPLAIN可以使用和使用适当的索引。

然后我们可以将查询与UNION ALL集合运算符组合在一起。我的偏好是不在查询中重用任何表别名，即使它对MySQL明确，它使语句更容易阅读，特别是，当每个表引用都有唯一的别名时，使EXPLAIN更容易破译。

由于原始查询按create_on按降序排序，并指定返回80行的限制，因此我们可以对每个子查询应用相同的排序和限制。当我们在整个集合中获得订单时，我们最多有320（= 4x80行）。为了使结果更具确定性，我们将按顺序包含第二个表达式。< / p>

(
  SELECT p1.id
       , p1.creator_id
       , p1.created_on
       , NULL AS repoller_id
    FROM poll p1
   WHERE p1.creator_id = 23         -- query parameter
   ORDER BY p1.created_on DESC, p1.id DESC LIMIT 80
)
UNION ALL
(
  SELECT p2.id
       , p2.creator_id
       , r2.created_on
       , r2.repoller_id
    FROM poll p2
    JOIN repoll r2
      ON r2.poll_id = p2.id
   WHERE r2.repoller_id = 23         -- query parameter
   ORDER BY r2.created_on DESC, r2.poll_id DESC LIMIT 80
)
UNION ALL
(
  SELECT p3.id
       , p3.creator_id
       , p3.created_on
       , NULL AS repoller_id
    FROM poll p3
    JOIN following f3
      ON f3.followee = p3.creator_id
     AND f3.follower = 23            -- query parameter
     AND f3.follower <> f3.followee  -- only needed if we allow 23 to follow 23
   ORDER BY p3.created_on DESC, p3.id DESC LIMIT 80
)
UNION ALL
(
  SELECT p4.id
       , p4.creator_id
       , r4.created_on
       , r4.repoller_id
    FROM poll p4
    JOIN repoll r4
      ON r4.poll_id = p4.id
    JOIN following f4
      ON f4.followee = r4.repoller_id
     AND f4.follower = 23            -- query parameter
     AND f4.follower <> f4.followee  -- only needed if we allow 23 to follow 23
   ORDER BY r4.created_on DESC, r4.poll_id DESC LIMIT 80
)
ORDER BY created_on DESC, id DESC LIMIT 80

即使此查询中的SQL文本比您发布的两个选项中的任何一个都要长，但我认为在给定合适的索引的情况下，我们可以更好地获得可预测的性能。

代替单例列`creator_id`

上的索引

CREATE INDEX poll_IX1 ON poll (creator_id, created_on, id) ; 
CREATE INDEX repoll_IX1 ON repoll (creator_id, created_on, id)

在`follow`表上，制作一个唯一约束，例如

ALTER TABLE `following` ADD PRIMARY KEY (follower, followee)

不是针对此查询，而是针对可能在系统中使用的其他查询......

CREATE UNIQUE INDEX following_UX1 ON following (followee_id, follower_id)

并将（现在冗余的）索引放在`following`表的单例列上。

还要考虑添加适当的外键约束。

Answer 2

可以改进实施1：

SELECT  P.id, P.creator_id, P.date_created
    FROM  following f
    JOIN (
          ( SELECT  id, creator_id, date_created
                FROM  poll
                ORDER BY  date_created DESC
                LIMIT  120
          ) UNION  ALL 
          (  SELECT  poll_id, repoller_id, date_created
                FROM  repoll
                ORDER BY  date_created DESC
                LIMIT  120
          )
         ) AS P   ON f.followee=P.creator_id
      AND  f.follower=23
    ORDER BY  P.date_created DESC
    LIMIT  120;

poll and repoll need:  INDEX(creator_id, date_created)

说明：在大多数情况下，看似冗余的ORDER BY .. LIMIT ..子句实际上是一种优化。在SELECTs中的UNION中，它最小化了UNION将创建的临时表中存储的行数。临时表不超过2 * 120行;如果表有数百万行，这一点很重要。外部查询还需要子句，以便将子列表混合在一起，并将结果减少到只需要的120个。

改进索引并允许覆盖索引＆＃34;：

CREATE TABLE `following` (
  `follower` int unsigned NOT NULL,
  `followee` int unsigned NOT NULL,
  PRIMARY KEY(`follower`, followee),
  INDEX      (`followee`, follower)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

说明：我假设follower和followee的组合是唯一的＆＃39;，因此可以是PRIMARY KEY。以这种方式执行操作的性能优势是，给定跟随者的所有跟随者都可以在相邻行中找到，反之亦然。你原来拥有的速度要慢得多，因为它必须首先查看索引，然后读入数据以获取两个字段。我给你的是＆＃34;覆盖＆＃34;和＃34;聚集＆＃34;

你的第二种口味不应该是LIMIT吗？

uf需要INDEX(user_id, date_created)

至于您原来的问题＆＃39;哪个＆＃39;，...我不会看到第二个查询做对了。

关于从某些用户获取最新帖子的可扩展性的问题是：两种不同的实现

2 个答案: