我如何创建两个视图,一个用于训练数据,另一个用于在 mySql 中拆分 70:30 的测试数据。
CREATE VIEW training_data
AS
SELECT Posts.post_content as post_content,
CASE
WHEN (Posts.post_title like '%covid%corona%covid19%' or Posts.post_content like '%covid%corona%covid19%') THEN 1
ELSE 0
END AS tag
FROM Posts;
CREATE VIEW test_data
AS
SELECT Posts.post_content as post_content,
CASE
WHEN (Posts.post_title like '%covid%corona%covid19%' or Posts.post_content like '%covid%corona%covid19%') THEN 1
ELSE 0
END AS tag
FROM Posts;
答案 0 :(得分:0)
CREATE VIEW training_data
AS
WITH cte AS ( SELECT Posts.post_content as post_content,
CASE WHEN Posts.post_title like '%covid%corona%covid19%'
THEN 1
WHEN Posts.post_content like '%covid%corona%covid19%'
THEN 1
ELSE 0 END AS tag,
ROW_NUMBER() OVER (ORDER BY id) rn
FROM Posts )
SELECT post_content, tag
FROM cte
WHERE rn MOD 10 IN (0,1,3,4,6,7,9);
和
CREATE VIEW test_data
AS
WITH cte AS ( SELECT Posts.post_content as post_content,
CASE WHEN Posts.post_title like '%covid%corona%covid19%'
THEN 1
WHEN Posts.post_content like '%covid%corona%covid19%'
THEN 1
ELSE 0 END AS tag,
ROW_NUMBER() OVER (ORDER BY id) rn
FROM Posts )
SELECT post_content, tag
FROM cte
WHERE rn MOD 10 IN (2,5,8);
考虑到 - 由于全扫描,两个查询都很慢。我建议您将带有 tag
表达式的生成列添加到表结构中以进行改进。