我将收到一堆目录和文件的完整路径,需要建立一个目录结构。此结构将使用定义为:
的表中的邻接列表存储在SQL Server中CREATE TABLE [dbo].[DirTreeEntry]
(
[id] [int] IDENTITY(1,1) NOT NULL,
[full_path] [nvarchar](2048) NOT NULL,
[name] [nvarchar](255) NOT NULL,
[is_file] [bit] NOT NULL,
[is_root] [bit] NOT NULL,
[parent_id] [int] NULL,
[source_id] [int] NULL
)
以id
为主键,name
在这种情况下是直接名称,而不是完整路径,source_id
引用源表条目(如果它明确存在)。 / p>
源数据将在此表中:
CREATE TABLE [dbo].[dir_path]
(
[dir_path_id] [int] IDENTITY(1,1) NOT NULL,
[directory_path] [nvarchar](2048) NOT NULL,
[name] [nvarchar](255) NOT NULL,
[file_flag] [bit] NOT NULL,
[filesize] [bigint] NULL,
[create_date] [datetime] NOT NULL
)
完整路径实际上是directory_path
和name
的组合。
鉴于[dbo].[dir_path]
中的以下条目:
1, '/root/subdir1/subdir2', 'subdir3', 0, NULL, '9/9/2014'
2, '/root/subdir1/subdir2/subdir3', 'somefile.txt', 1, 25, '9/9/2014'
3, '/etc/rc.d', 'rc.local', 1, 10, '9/9/2014'
我需要在[dbo].[DirTreeEntry]
:
1,'/','/',0,1, NULL,NULL
2,'/root','root',0,0,1,NULL
3,'/root/subdir1','subdir1',0,0,2,NULL
4,'/root/subdir1/subdir2','subdir2',0,0,3,NULL
5,'/root/subdir1/subdir2/subdir3','subdir3',0,0,4,1
6,'/root/subdir1/subdir2/subdir3/somefile.txt','somefile.txt',1,0,5,2
7,'/etc','etc',0,0,1,NULL
8,'/etc/rc.d','rc.d',0,0,7,NULL
9,'/etc/rc.d/rc.local','rc.local',1,0,8,3
{C}中的以下代码完全正是我在寻找C#中的层次结构:
public class Node
{
private readonly IDictionary<string, Node> _nodes =
new Dictionary<string, Node>();
public string Path { get; set; }
}
public void AddPath(string path)
{
char[] charSeparators = new char[] {'\\'};
// Parse into a sequence of parts.
string[] parts = path.Split(charSeparators,
StringSplitOptions.RemoveEmptyEntries);
// The current node. Start with this.
Node current = this;
// Iterate through the parts.
foreach (string part in parts)
{
// The child node.
Node child;
// Does the part exist in the current node? If
// not, then add.
if (!current._nodes.TryGetValue(part, out child))
{
// Add the child.
child = new Node {
Path = part
};
// Add to the dictionary.
current._nodes[part] = child;
}
// Set the current to the child.
current = child;
}
}
但是,我可能会获得包含100,000多个条目的源数据,而且我不想在C#端的内存中构建该结构,然后必须将所有这些数据发送到SQL。我已经有了一种快速的方法将源数据导入数据库,我现在需要一个存储过程来根据源数据构建[dbo].[DirTreeEntry]
表。
任何指导都将不胜感激!
答案 0 :(得分:1)
这应该可以解决问题。它可以运行一次以上,因为它不会添加任何已经存在的内容,但它不会处理并发更新。
DECLARE @ParentID as int
DECLARE @Path as nvarchar(2048)
DECLARE @Name as nvarchar(255)
DECLARE @LastPart as nvarchar(255)
DECLARE @PartialPath as nvarchar(2048)
-- Loop through the input table
DECLARE paths CURSOR FOR SELECT p.directory_path, p.name FROM dir_path p;
OPEN paths;
FETCH NEXT FROM paths INTO @Path, @LastPart;
WHILE @@FETCH_STATUS = 0
BEGIN
-- Reset loop variables
SET @ParentID = NULL
SET @PartialPath = '';
-- Split the full path into parts and loop through those
DECLARE parts CURSOR FOR SELECT Value FROM dbo.splitstring(@Path+'/'+@LastPart, '/');
OPEN parts;
FETCH NEXT FROM parts INTO @Name;
WHILE @@FETCH_STATUS = 0
BEGIN
-- Build the path for this iteration
SET @PartialPath = (CASE WHEN @PartialPath = '/' THEN '/'+@Name ELSE @PartialPath + '/' + @Name END);
IF @Name = '' BEGIN
SET
@Name = '/'
END
-- Insert the new path when it doesn't exist yet
INSERT INTO DirTreeEntry (name, full_path, parent_id, is_file, is_root)
SELECT @Name, @PartialPath, @ParentID, 0, 0
WHERE NOT EXISTS (SELECT ID FROM DirTreeEntry WHERE full_path = @PartialPath)
-- Store the id for the next level
SELECT @ParentID = ID FROM DirTreeEntry WHERE full_path = @PartialPath;
FETCH NEXT FROM parts INTO @Name;
END
CLOSE parts;
DEALLOCATE parts;
FETCH NEXT FROM paths INTO @Path,@LastPart;
END
CLOSE paths;
DEALLOCATE paths;
-- Update missing values in target table
UPDATE DirTreeEntry SET source_id = S.dir_path_id, is_file = S.file_flag
FROM DirTreeEntry T
INNER JOIN dir_path S ON S.directory_path+'/'+S.name = T.full_path
UPDATE DirTreeEntry SET is_root = 1 WHERE full_path = '/'
编辑: 它可以更快地完成。 使用此SplitString function的改编:
CREATE FUNCTION [dbo].[SplitPath]
(
@List NVARCHAR(MAX)
)
RETURNS TABLE
AS
RETURN ( SELECT [Path], [Name] FROM
(
SELECT
[Path] = LTRIM(RTRIM(SUBSTRING(@List, 0,
(CASE WHEN CHARINDEX('/', @List + '/', [Number]) > 1 THEN CHARINDEX('/', @List + '/', [Number]) ELSE 2 END)))),
[Name] = LTRIM(RTRIM(SUBSTRING(@List, [Number],
(CASE WHEN CHARINDEX('/', @List + '/', [Number]) - [Number] > 0 THEN CHARINDEX('/', @List + '/', [Number]) - [Number] ELSE 1 END))))
FROM (SELECT Number = ROW_NUMBER() OVER (ORDER BY name)
FROM sys.all_objects) AS x
WHERE Number <= LEN(@List)
AND SUBSTRING('/' + @List, [Number], LEN('/')) = '/'
) AS y
);
这会吐出部分路径和名称,并修复'/'特殊情况。此输出可以与dir_path表交叉应用,在DirTreeEntry中创建基本条目。然后可以使用缺少的信息来丰富DirTreeEntry。
INSERT DirTreeEntry (full_path, name, is_file, is_root)
SELECT DISTINCT d.Path, d.Name, 0, 0 FROM dir_path p
CROSS APPLY dbo.SplitPath(directory_path+'/'+name) as d
ORDER BY d.Path
UPDATE DirTreeEntry SET source_id = S.dir_path_id, is_file = S.file_flag
FROM DirTreeEntry T
INNER JOIN dir_path S ON S.directory_path+'/'+S.name = T.full_path
UPDATE DirTreeEntry SET is_root = 1 WHERE full_path = '/'
UPDATE A SET parent_id = B.id
FROM DirTreeEntry A INNER JOIN DirTreeEntry B ON CASE WHEN B.full_path = '/' THEN B.full_path + A.name ELSE B.full_path + '/' + A.name END = A.full_path
可能还有进一步优化的空间,但这应该明显加快。