我将数据库存储在sqlite文件中,并且某些select
查询返回数百万行。我想编写一个并行应用程序,其中每个进程将:
我的问题是算法的第二部分。如何更改我的SELECT
语句,使其返回部分行?
我考虑过OFFSET
/ LIMIT
,但这意味着我知道最后一行的数目,但我不知道。再加上查询的运行时间很长,我不想两次运行相同的查询以获取总行数。
答案 0 :(得分:0)
考虑
我考虑过OFFSET / LIMIT,但这意味着我知道 最后一行的数量,我没有。
您可以通过以下方法来维护计数:每个表只有一行,每行只有一个小表,要用两列进行计数,表名(如果只有一个表则不需要)和计数。两个TRIGGERS可以自动维护计数。
如果使用AUTOINCREMENT,另一种替代方法(但也不是那么费劲(确定计数的概念))将是通过访问seq列获得<< strong> sqlite_sequence 表。
也许考虑以下两个示例:-
DROP TRIGGER IF EXISTS newmain1row;
DROP TRIGGER IF EXISTS deletedmain1row;
DROP TABLE IF EXISTS rowcount;
DROP TABLE IF EXISTS main1;
DROP TABLE IF EXISTS main2;
CREATE TABLE IF NOT EXISTS main1 (id INTEGER PRIMARY KEY, mycolumn TEXT);
CREATE TABLE IF NOT EXISTS main2 (id INTEGER PRIMARY KEY AUTOINCREMENT, mycolumn TEXT); -- is it slower/faster than
CREATE TABLE IF NOT EXISTS rowcount (tablename TEXT PRIMARY KEY, rowcount INTEGER) WITHOUT ROWID;
INSERT INTO rowcount VALUES('main1',0); -- initialise rowcount table
-- Trigger for when a new row is added to the table
CREATE TRIGGER newmain1row
AFTER INSERT ON main1 BEGIN
UPDATE rowcount SET rowcount = rowcount + 1 WHERE tablename = 'main1';
END
;
-- Trigger for when a row is deleted from the table
CREATE TRIGGER deletedmain1row
AFTER DELETE ON main1 BEGIN
UPDATE rowcount SET rowcount = rowcount - 1 WHERE tablename = 'main1';
END
;
-- populate the two example main tables twice so in reverse order 2nd time
-- with a million rows so 2 million per table
WITH RECURSIVE cte1(counter) AS
(
SELECT 1
UNION ALL SELECT counter+1 FROM cte1 WHERE counter < 1000000
)
INSERT INTO main1 (mycolumn) SELECT counter FROM cte1
;
WITH RECURSIVE cte1(counter) AS
(
SELECT 1
UNION ALL SELECT counter+1 FROM cte1 WHERE counter < 1000000
)
INSERT INTO main2 (mycolumn) SELECT counter FROM cte1
;
WITH RECURSIVE cte1(counter) AS
(
SELECT 1
UNION ALL SELECT counter+1 FROM cte1 WHERE counter < 1000000
)
INSERT INTO main2 (mycolumn) SELECT counter FROM cte1
;
WITH RECURSIVE cte1(counter) AS
(
SELECT 1
UNION ALL SELECT counter+1 FROM cte1 WHERE counter < 1000000
)
INSERT INTO main1 (mycolumn) SELECT counter FROM cte1
;
-- DELETE 40,000 rows
DELETE FROM main1 WHERE CAST(mycolumn AS INTEGER) % 50 = 0;
DELETE FROM main2 WHERE CAST(mycolumn AS INTEGER) % 50 = 0;
-- get the number of rows
SELECT rowcount FROM rowcount WHERE tablename = 'main1';
SELECT seq FROM sqlite_sequence WHERE name = 'main2';
此:-
日志:-
DROP TRIGGER IF EXISTS newmain1row
> OK
> Time: 0.187s
DROP TRIGGER IF EXISTS deletedmain1row
> OK
> Time: 0.084s
DROP TABLE IF EXISTS rowcount
> OK
> Time: 0.153s
DROP TABLE IF EXISTS main1
> OK
> Time: 2.534s
DROP TABLE IF EXISTS main2
> OK
> Time: 2.547s
CREATE TABLE IF NOT EXISTS main1 (id INTEGER PRIMARY KEY, mycolumn TEXT)
> OK
> Time: 0.158s
CREATE TABLE IF NOT EXISTS main2 (id INTEGER PRIMARY KEY AUTOINCREMENT, mycolumn TEXT)
> OK
> Time: 0.167s
-- is it slower/faster than
CREATE TABLE IF NOT EXISTS rowcount (tablename TEXT PRIMARY KEY, rowcount INTEGER) WITHOUT ROWID
> OK
> Time: 0.167s
INSERT INTO rowcount VALUES('main1',0)
> Affected rows: 1
> Time: 0.165s
-- initialise rowcount table
-- Trigger for when a new row is added to the table
CREATE TRIGGER newmain1row
AFTER INSERT ON main1 BEGIN
UPDATE rowcount SET rowcount = rowcount + 1 WHERE tablename = 'main1';
END
> Affected rows: 1
> Time: 0.086s
-- Trigger for when a row is deleted from the table
CREATE TRIGGER deletedmain1row
AFTER DELETE ON main1 BEGIN
UPDATE rowcount SET rowcount = rowcount - 1 WHERE tablename = 'main1';
END
> Affected rows: 1
> Time: 0.096s
-- populate the two example main tables twice so in reverse order 2nd time
-- with a million rows so 2 million per table
WITH RECURSIVE cte1(counter) AS
(
SELECT 1
UNION ALL SELECT counter+1 FROM cte1 WHERE counter < 1000000
)
INSERT INTO main1 (mycolumn) SELECT counter FROM cte1
> Affected rows: 1000000
> Time: 1.199s
WITH RECURSIVE cte1(counter) AS
(
SELECT 1
UNION ALL SELECT counter+1 FROM cte1 WHERE counter < 1000000
)
INSERT INTO main2 (mycolumn) SELECT counter FROM cte1
> Affected rows: 1000000
> Time: 0.811s
WITH RECURSIVE cte1(counter) AS
(
SELECT 1
UNION ALL SELECT counter+1 FROM cte1 WHERE counter < 1000000
)
INSERT INTO main2 (mycolumn) SELECT counter FROM cte1
> Affected rows: 1000000
> Time: 1.058s
WITH RECURSIVE cte1(counter) AS
(
SELECT 1
UNION ALL SELECT counter+1 FROM cte1 WHERE counter < 1000000
)
INSERT INTO main1 (mycolumn) SELECT counter FROM cte1
> Affected rows: 1000000
> Time: 1.177s
DELETE FROM main1 WHERE CAST(mycolumn AS INTEGER) % 50 = 0
> Affected rows: 40000
> Time: 2.82s
DELETE FROM main2 WHERE CAST(mycolumn AS INTEGER) % 50 = 0
> Affected rows: 40000
> Time: 2.649s
-- get the number of rows
SELECT rowcount FROM rowcount WHERE tablename = 'main1'
> OK
> Time: 0s
SELECT seq FROM sqlite_sequence WHERE name = 'main2'
> OK
> Time: 0s
总时间超过16秒。
2个结果分别是1.960,000和2,000,000。