我正在从这种格式的 excel 中提取数据
product1 | unnamedcol2 | product2 | unnamedcol4 | product3 | unnamedcol6 | ------------------------------------------------------------------------------- @1foo | 1.10 | @1foo | 0.3 | @1foo | 0.3 @2foo | 1.00 | @2foo | 2 | @2foo | @3foo | 1.52 | @3foo | 2.53 | @3foo | @4foo | 1.47 | | | @4foo | 1.31 @5foo | 1.49 | | | @5foo | 1.31
The file uses all 255 fields. Using dapper-dot-net i get the data through this code
IEnumerable<IDictionary<string, object>> excelDataRaw =
conn.Query(string.Format("select * from {0}", table)).Cast<IDictionary<string, object>>();
我将这些数据传递给这些测试方法。数据作为 IDictionaries 的 IEnumerable 返回,其中每个键是一个产品,每个值都是一个 IDictionary,其中每个键是来自产品列的值,对应的值是来自产品列右侧的 unnamedcol 的值。
var excelDataRefined = new List<IDictionary<string, IDictionary<string, decimal>>>();
excelDataRefined.Add(new Dictionary<string, IDictionary<string, decimal>>());
excelDataRefined[0].Add( "product", new Dictionary<string, decimal>());
excelDataRefined[0]["product"].Add("@1foo", 1.1m);
方法:
private static Dictionary<string, IDictionary<string, decimal>> Benchmark_foreach(IEnumerable<IDictionary<string, object>> excelDataRaw)
{
Console.WriteLine("1. Using foreach");
var watch = new Stopwatch();
watch.Start();
List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
bool isEven = false;
List<string> products = headers.Where(h => isEven = !isEven).ToList();
var dates = new List<IEnumerable<object>>();
var prices = new List<IEnumerable<object>>();
foreach (string field in headers)
{
string product1 = field;
if (headers.IndexOf(field) % 2 == 0)
{
dates.Add(
excelDataRaw.AsParallel().AsOrdered().Select(col => col[product1]).Where(row => row != null));
}
if (headers.IndexOf(field) % 2 == 1)
{
prices.Add(
excelDataRaw.AsParallel().AsOrdered().Select(col => col[product1] ?? 0m).Take(dates.Last().Count()));
}
}
watch.Stop();
Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds);
watch.Restart();
var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>();
foreach (IEnumerable<object> datelist in dates)
{
decimal num;
IEnumerable<object> datelist1 = datelist;
IEnumerable<object> pricelist =
prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where(
content => decimal.TryParse(content.ToString(), out num));
Dictionary<string, decimal> dict =
datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary(
x => (string)x.k, x => decimal.Parse(x.v.ToString()));
if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)]))
{
excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict);
}
}
watch.Stop();
Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds);
return excelDataRefined;
}
private static Dictionary<string, IDictionary<string, decimal>> Benchmark_AsParallel(IEnumerable<IDictionary<string, object>> excelDataRaw)
{
Console.WriteLine("2. Using AsParallel().AsOrdered().ForAll");
var watch = new Stopwatch();
watch.Start();
List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
bool isEven = false;
List<string> products = headers.Where(h => isEven = !isEven).ToList();
var dates = new List<IEnumerable<object>>();
var prices = new List<IEnumerable<object>>();
headers.AsParallel().AsOrdered().ForAll(
field =>
dates.Add(
excelDataRaw.AsParallel().AsOrdered().TakeWhile(x => headers.IndexOf(field) % 2 == 0).Select(
col => col[field]).Where(row => row != null).ToList()));
headers.AsParallel().AsOrdered().ForAll(
field =>
prices.Add(
excelDataRaw.AsParallel().AsOrdered().TakeWhile(x => headers.IndexOf(field) % 2 == 1).Select(
col => col[field] ?? 0m).Take(256).ToList()));
dates.RemoveAll(x => x.Count() == 0);
prices.RemoveAll(x => x.Count() == 0);
watch.Stop();
Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds);
watch.Restart();
var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>();
foreach (IEnumerable<object> datelist in dates)
{
decimal num;
IEnumerable<object> datelist1 = datelist;
IEnumerable<object> pricelist =
prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where(
content => decimal.TryParse(content.ToString(), out num));
Dictionary<string, decimal> dict =
datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary(
x => (string)x.k, x => decimal.Parse(x.v.ToString()));
if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)]))
{
excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict);
}
}
watch.Stop();
Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds);
return excelDataRefined;
}
private static Dictionary<string, IDictionary<string, decimal>> Benchmark_ForEach(IEnumerable<IDictionary<string, object>> excelDataRaw)
{
Console.WriteLine("3. Using ForEach");
var watch = new Stopwatch();
watch.Start();
List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
bool isEven = false;
List<string> products = headers.Where(h => isEven = !isEven).ToList();
var dates = new List<IEnumerable<object>>();
var prices = new List<IEnumerable<object>>();
headers.ForEach(
field =>
dates.Add(
excelDataRaw.TakeWhile(x => headers.IndexOf(field) % 2 == 0).Select(col => col[field]).Where(
row => row != null).ToList()));
headers.ForEach(
field =>
prices.Add(
excelDataRaw.TakeWhile(x => headers.IndexOf(field) % 2 == 1).Select(col => col[field] ?? 0m).
Take(256).ToList()));
dates.RemoveAll(x => x.Count() == 0);
prices.RemoveAll(x => x.Count() == 0);
watch.Stop();
Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds);
watch.Restart();
var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>();
foreach (IEnumerable<object> datelist in dates)
{
decimal num;
IEnumerable<object> datelist1 = datelist;
IEnumerable<object> pricelist =
prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where(
content => decimal.TryParse(content.ToString(), out num));
Dictionary<string, decimal> dict =
datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary(
x => (string)x.k, x => decimal.Parse(x.v.ToString()));
if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)]))
{
excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict);
}
}
watch.Stop();
Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds);
return excelDataRefined;
}
为什么它的行为是这样的?我希望 AsParallel 是最快的,因为它并行执行而不是顺序执行。我如何优化这个?
最佳答案
为了进行并行计算,您必须拥有多个处理器或内核,否则您只是在线程池中排队等待 CPU 的任务。 IE。单核机器上的 AsParallel 是顺序的,加上线程池和线程上下文切换的开销。即使在双核机器上,您也可能无法同时获得两个核心,因为许多其他东西都在同一台机器上运行。
真正的 .AsParallel()
只有在您长时间运行带有阻塞操作 (I/O) 的任务时才有用,操作系统可以挂起阻塞线程并让另一个线程运行。
关于c# - 为什么在这种情况下使用 AsParallel() 比 foreach 慢?,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/7176828/