好,
我正在使用PowerShell中的程序来操纵我创建的SQLite数据库。我从来没有编写过认真的应用程序来利用数据库,所以现在我对优化查询非常感兴趣,因此我对反馈非常感兴趣。我的主要问题是我有很多数据要包含在一个单独的表中,该表可能已经存在或不存在。我的所有研究似乎真的导致执行INSERT并让UNIQUE约束对其进行分类,而不是对新记录进行选择,这似乎是两次表扫描并且效率很低。因此,我的解决方案是创建一个临时表,将包含所需数据的表从该表插入到临时表中,如果数据不在临时表中,则执行INSERT。今晚我喝了几杯酒,还没有测试代码,所以请不要批评小错别字,我只是想知道我的方法是否适合午餐,如果可以,请提供更好的指导。
我的表如下所示:
CREATE TABLE Processes (
pk INTEGER PRIMARY KEY AUTOINCREMENT
UNIQUE,
hostname INTEGER NOT NULL,
artifacttype INTEGER REFERENCES ArtifactType (pk),
processname INTEGER REFERENCES ProcessesName (pk),
filelocation INTEGER NOT NULL
REFERENCES files (pk),
pid INTEGER,
ppid INTEGER,
starttime INTEGER,
stoptime INTEGER,
token STRING,
logonid INTEGER,
exitstatus INTEGER,
threadcount INTEGER,
commandline INTEGER REFERENCES ProcessesCommandline (pk),
user INTEGER REFERENCES users (pk),
PeakVirtualSize INTEGER,
VirtualSize INTEGER,
PeakWorkingSetSize INTEGER,
suspicious BOOLEAN,
malicious BOOLEAN
);
交易:
@"
CREATE TEMPORARY TABLE IF NOT EXISTS Results(pk INTEGER, data TEXT);
INSERT INTO Results(pk, data) VALUES ((SELECT pk, name FROM ProcessesName WHERE name = @processname));
INSERT INTO ProcessesName(name) VALUES (SELECT @processname WHERE NOT EXISTS (SELECT pk FROM Results WHERE data = @processname));
INSERT INTO Results (pk, data) VALUES ((SELECT last_insert_row_id, @processname WHERE NOT EXISTS (SELECT pk FROM Results WHERE data =@processname)));
INSERT INTO Results(pk, data) VALUES ((SELECT pk, file FROM Files WHERE file = @filelocation));
INSERT INTO Files(file) VALUES (SELECT @filelocation WHERE NOT EXISTS (SELECT pk FROM Results WHERE data = @filelocation));
INSERT INTO Results (pk, data) VALUES ((SELECT last_insert_row_id, @filelocation WHERE NOT EXISTS (SELECT pk FROM Results WHERE data =@filelocation)));
INSERT INTO Results(pk, data) VALUES ((SELECT pk, commandline FROM ProcessesCommandline WHERE commandline = @commandline));
INSERT INTO ProcesseCommandline(commandline) VALUES (SELECT @commandline WHERE NOT EXISTS (SELECT pk FROM Results WHERE data = @commandline));
INSERT INTO Results (pk, data) VALUES ((SELECT last_insert_row_id, @filelocation WHERE NOT EXISTS (SELECT pk FROM Results WHERE data =@commandline)));
INSERT INTO Results(pk, data) VALUES ((SELECT pk, SID FROM Users WHERE SID = @SID));
INSERT INTO Users(SID) VALUES (SELECT @SID WHERE NOT EXISTS (SELECT pk FROM Results WHERE data = @filelocation));
INSERT INTO Results (pk, data) VALUES ((SELECT last_insert_row_id, @SID WHERE NOT EXISTS (SELECT pk FROM Results WHERE data =@SID)));
INSERT INTO processes(hostname, artifacttype, processname, filelocation, pid, ppid, starttime, threadcount, commandline, user, PeakVirtualSize, VirtualSize, PeakWorkingSetSize)
VALUES (@hostname, @artifacttype, (SELECT pk FROM Results WHERE data = @processname), (SELECT pk FROM Results WHERE data @filelocation), @pid, @ppid, @starttime, @threadcount, (SELECT pk FROM Results where data = @commandline), SELECT pk FROM Results WHERE data = @SID, @PeakVirtualSize, @VirtualSize, @PeakWorkingSetSize);
DROP TABLE Results;
"@
*在应用程序端跟踪数据的外键很少,因此不需要复杂的查询。
所以我的核心问题是,有没有更有效的方法?
谢谢大家!