即使我使用Parallel.Foreach,我也有超过7分钟的完成代码。我迭代的“final_products”列表包含大约7000个产品。
public void GenerateTreeFromAllFinalProducts()
{
XmlSerializer serializer = new XmlSerializer(typeof(ImageFeature<float>[]));
DSTableAdapters.Products_UniqueTableAdapter pft = new DSTableAdapters.Products_UniqueTableAdapter();
DSTableAdapters.Products_Unique_SURFTableAdapter pus = new DSTableAdapters.Products_Unique_SURFTableAdapter();
DS.Products_UniqueDataTable final_products = pft.GetData();
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
Parallel.ForEach(final_products.AsParallel(), row =>
{
//Get SURF data for all images found similar to this image
Types.Products_Unique_SURFRow surfData = GetDataByUniqueProductID(row.id);
ImageFeature<float>[] row_features = (ImageFeature<float>[])serializer.Deserialize(new StringReader(Decompress(surfData.SURF)));
if (row_features != null)
flann.AddSurfDescriptors(row_features, row.id);
});
stopwatch.Stop();
Console.WriteLine("Time elapsed: {0}", stopwatch.Elapsed);
}
这是否正常,需要这么长时间,如果不能,我如何优化代码?
GetDataByUniqueProductID(row.id)是对我的数据库的一次调用,返回1行。
private static Types.Products_Unique_SURFRow GetDataByUniqueProductID(int rowid)
{
Types.Products_Unique_SURFRow ret = new Types.Products_Unique_SURFRow();
string sqlText = "SET ROWCOUNT 1 SELECT SURF from Products_Unique_SURF WHERE unique_product_id =" + rowid;
using (SqlConnection myConn = new SqlConnection(global::SCBot.Properties.Settings.Default.DataConnectionString))
{
myConn.Open();
SqlCommand cmd = new SqlCommand(sqlText, myConn);
try
{
cmd.CommandType = CommandType.Text;
SqlDataReader reader = cmd.ExecuteReader();
while (reader.Read())
{
Types.Products_Unique_SURFRow row = new Types.Products_Unique_SURFRow();
row.SURF = Convert.ToString(reader["SURF"]);
ret = row;
}
}
catch (Exception e)
{
MessageBox.Show(e.ToString());
}
}
return ret;
}
我的初始代码如下
public void GenerateTreeFromAllFinalProducts()
{
XmlSerializer serializer = new XmlSerializer(typeof(ImageFeature<float>[]));
DSTableAdapters.Products_UniqueTableAdapter pft = new DSTableAdapters.Products_UniqueTableAdapter();
DSTableAdapters.Products_Unique_SURFTableAdapter pus = new DSTableAdapters.Products_Unique_SURFTableAdapter();
DS.Products_UniqueDataTable final_products = pft.GetData();
foreach (DS.Products_UniqueRow row in final_products)
{
//Get SURF data for all images found similar to this image
List<DS.Products_Unique_SURFRow> surfData = pus.GetDataByUniqueProductID(row.id).ToList();
foreach (DS.Products_Unique_SURFRow data in surfData)
{
ImageFeature<float>[] row_features = (ImageFeature<float>[])serializer.Deserialize(new StringReader(Decompress(data.SURF)));
flann.AddSurfDescriptors(row_features, row.id);
}
}
}
但这太慢了,这就是为什么我试图做一个Parallel.Foreach
答案 0 :(得分:0)
您应该测量内部代码所花费的时间。并行不会加速内码。您可以单独测量它们,而不是嵌套方法Decompress / Deserialize。
编辑后:
每个线程都会创建一个新连接。我认为这是一个很好的例子,不使用并行加速。但改变算法会有所帮助。因为查询不同连接/线程上的单个行所花费的时间比查询它们所有(或选择)和单线程foreach循环它们要花费更多时间。
我的观点:
我将收集final_products enemration的所有id并使用stringbuilder构建一个字符串。
StringBuilder sb = new StringBuiler();
bool isFirst = true;
sb.Append("(");
foreach(var prod in final_products)
{
if(isFirst)
isFirst = false;
else
sb.Append(", ");
sb.Append(prod.Id);
}
sb.Append(")");
string query = "SELECT SURF FROM Products_Unique_SURF WHERE Id in "+sb.ToString();
// execute the query
// foreach row, decompress, deserialize etc...