我有一个功能代码,它将一个属性的字符串拆分为类的列表:由string, string, string
组成的数据帧。
现在我宣布一个空的Dataframe2(string,string[], string
)并使用Add
class Program
{
public static string[] SPString(string text)
{
string[] elements;
elements = text.Split(' ');
return elements;
}
//Structures
public class Dataframe
{
public string Name { get; set; }
public string Text { get; set; }
public string Cat { get; set; }
}
public class Dataframe2
{
public string Name { get; set; }
public string[] Text { get; set; }
public string Cat { get; set; }
}
static void Main(string[] args)
{
List<Dataframe> doc = new List<Dataframe>{new Dataframe { Name = "Doc1", Text = "The quick brown cat", Cat = ""},
new Dataframe { Name = "Doc2", Text = "The big fat cat", Cat = "Two"},
new Dataframe { Name = "Doc4", Text = "The quick brown rat", Cat = "One"},
new Dataframe { Name = "Doc3", Text = "Its the cat in the hat", Cat = "Two"},
new Dataframe { Name = "Doc5", Text = "Mice and rats eat seeds", Cat = "One"},
};
// Can this be made more efficient?
ConcurrentBag<Dataframe2> doc2 = new ConcurrentBag<Dataframe2>();
Parallel.ForEach(doc, entry =>
{
string s = entry.Text;
string[] splitter = SPString(s);
doc2.Add(new Dataframe2 {Name = entry.Name, Text = splitter, Cat =entry.Cat});
} );
}
}
是否有更有效的方法使用并行LINQ向列表添加内容,其中Dataframe2继承了我没有修改的属性?
答案 0 :(得分:5)
您可以尝试使用 PLinq 添加并行并保留List<T>
:
// Do NOT create and then fill the List<T> (which is not thread-safe) in parallel manually,
// Let PLinq do it for you
List<Dataframe2> doc2 = doc
.AsParallel()
.Select(entry => {
//TODO: make Dataframe2 from given Dataframe (entry)
...
return new Dataframe2 {Name = entry.Name, Text = splitter, Cat = entry.Cat};
})
.ToList();