我最初写了一个功能强力搜索(城市的ADT表示,城市元组作为距离数组的索引,懒惰地用Data.List.permutations
产生排列并用严格的左折叠消耗它们),但这很痛苦慢。我设法通过以更迫切的方式连续重写它并使用Heap的算法进行排列来加速它3倍,但直接将结果转换为(写得不好)C更快另外10倍!发生了什么事?
{-# LANGUAGE TupleSections, BangPatterns, ForeignFunctionInterface #-}
import Data.Array.Unboxed
import Data.Array.Base (unsafeAt)
import qualified Data.Vector.Unboxed.Mutable as M
import qualified Data.Vector.Unboxed as V
import Data.Word (Word)
import Data.IORef
import GHC.Arr (Ix(..))
import Control.Arrow
import Control.Monad
import Data.Array.Storable
import Foreign.Ptr (Ptr)
import Foreign.C (CInt)
import Criterion.Main
distances :: UArray Int Word
distances = listArray (0, 255) [0,629,1023,1470,1276,915,894,1199,760,795,676,903,1394,350,647,922,629,
0,556,1312,674,1097,317,946,784,504,1228,276,1254,878,712,1236,1023,556,
0,861,408,977,280,480,1340,288,1424,533,822,1346,1267,1198,1470,1312,861,
0,1196,751,1125,382,2052,809,1476,1381,78,1817,1957,980,1276,674,408,1196,
0,1384,383,837,1370,676,1777,469,1173,1551,1327,1600,915,1097,977,751,1384,
0,1103,722,1641,719,730,1303,676,1223,1530,249,894,317,280,1125,383,1103,
0,743,1083,392,1409,256,1079,1178,1019,1293,1199,946,480,382,837,722,743,
0,1708,454,1366,999,343,1549,1618,972,760,784,1340,2052,1370,1641,1083,1708,
0,1253,1392,901,1986,640,113,1679,795,504,288,809,676,719,392,454,1253,
0,1138,623,749,1136,1164,925,676,1228,1424,1476,1777,730,1409,1366,1392,1138,
0,1502,1399,786,1283,551,903,276,533,1381,469,1303,256,999,901,623,1502,
0,1335,1127,857,1469,1394,1254,822,78,1173,676,1079,343,1986,749,1399,1335,
0,1741,1889,908,350,878,1346,1817,1551,1223,1178,1549,640,1136,786,1127,1741,
0,543,1182,647,712,1267,1957,1327,1530,1019,1618,113,1164,1283,857,1889,543,
0,1566,922,1236,1198,980,1600,249,1293,972,1679,925,551,1469,908,1182,1566,0]
tsp_mut :: [Int] -> IO ([Int], Word)
tsp_mut [] = error "empty list"
tsp_mut cs = do
route <- V.thaw . V.fromList $ cs
let l = length cs -1
dis a b= unsafeAt distances $ 16*a + b
f (prev, !acc) c = (c, acc + dis prev c)
len = V.unsafeFreeze route >>= return . snd . (\v -> V.foldl' f (V.unsafeLast v, 0) v)
ref <- newIORef . (cs,) =<< len
let permut 0 = do
!l <- len
(_, ol) <- readIORef ref
when (l < ol) (writeIORef ref . (,l) . V.toList =<< V.freeze route)
permut n = let op = if odd n then const 0 else id
in forM_ [0..n] (\ i -> permut (n-1) >> M.unsafeSwap route (op i) n)
permut l >> readIORef ref
foreign import ccall unsafe "tsp_c"
c_tsp_c :: Int -> Ptr CInt -> IO Word
tsp_c :: [Int] -> IO ([Int], Word)
tsp_c cs = do
let l=length cs
marr <- newListArray (0, l-1) $ map fromIntegral cs
r <- withStorableArray marr $ c_tsp_c l
list <-getElems marr
return (map fromIntegral list, fromIntegral r)
main = defaultMain [ pertestIO "tsp_mut" tsp_mut,
pertestIO "tsp_c" tsp_c ]
where
pertestIO str f = bgroup str $ map (uncurry bench . (show &&& nfIO . (f . (`take` [0..15])))) [6..11]
这里是C代码:
#include <stdlib.h>
#include <string.h>
typedef unsigned int word;
word distances[] = { 0,629,1023,1470,1276,915,894,1199,760,795,676,903,1394,350,647,922,629,
0,556,1312,674,1097,317,946,784,504,1228,276,1254,878,712,1236,1023,556,
0,861,408,977,280,480,1340,288,1424,533,822,1346,1267,1198,1470,1312,861,
0,1196,751,1125,382,2052,809,1476,1381,78,1817,1957,980,1276,674,408,1196,
0,1384,383,837,1370,676,1777,469,1173,1551,1327,1600,915,1097,977,751,1384,
0,1103,722,1641,719,730,1303,676,1223,1530,249,894,317,280,1125,383,1103,
0,743,1083,392,1409,256,1079,1178,1019,1293,1199,946,480,382,837,722,743,
0,1708,454,1366,999,343,1549,1618,972,760,784,1340,2052,1370,1641,1083,1708,
0,1253,1392,901,1986,640,113,1679,795,504,288,809,676,719,392,454,1253,
0,1138,623,749,1136,1164,925,676,1228,1424,1476,1777,730,1409,1366,1392,1138,
0,1502,1399,786,1283,551,903,276,533,1381,469,1303,256,999,901,623,1502,
0,1335,1127,857,1469,1394,1254,822,78,1173,676,1079,343,1986,749,1399,1335,
0,1741,1889,908,350,878,1346,1817,1551,1223,1178,1549,640,1136,786,1127,1741,
0,543,1182,647,712,1267,1957,1327,1530,1019,1618,113,1164,1283,857,1889,543,
0,1566,922,1236,1198,980,1600,249,1293,972,1679,925,551,1469,908,1182,1566,0};
word dist (int a, int b) {
return distances[16*a+b];
}
int len (int cities[], int length) {
int l = dist(cities[length-1], cities[0]);
int i;
for (i=0; i < length-1; i++ ) {
l +=dist(cities[i], cities[i+1]);
}
return l;
}
int *route, *bestRoute;
word minL;
int sz, size, cntr;
void permut (int n){
if (n==0) {
cntr++;
int l=len(route, sz);
if (l<minL) {
memcpy(bestRoute, route,size);
minL=l;
}
return;
}
int i, swap, temp;
for (i=0; i<=n; i++) {
permut(n-1);
swap=n% 2 ? i : 0;
temp=route[swap];
route[swap]=route[n];
route[n]=temp;
}
}
word tsp_c (int length, int cities[]) {
size= length * sizeof(int);
cntr=0;
sz=length;
route = malloc(size);
bestRoute = malloc(size);
memcpy( route, cities, size);
memcpy( bestRoute, cities, size);
minL = len (cities, length);
permut(length-1);
memcpy(cities, bestRoute, length * sizeof(int));
free(route);
free(bestRoute);
/* printf("permutations: %d", cntr); */
return minL;
}
我在Windows上使用64位版本的GHC 7.8.3 -O2
答案 0 :(得分:1)
引用Haskell网站:
在不需要任何成本或者需要性能的应用中 目标是详细调整低级算法,这是必要的 像C这样的语言可能是比Haskell更好的选择, 正因为它提供了对确切方式的更密切控制 在其中进行计算......
另一种看待它的方式:C&#39;指令&#39;更接近硬件,因此它本身具有优势。当你从一个高的翻译时,这一点尤其明显。级别语言直接到C。
另一方面,高级语言的优势在于您可以专注于更大的图景,您可以(通常)更快地完成更多优化。