我有一个主要版本,其顺序版本运行良好。我终于想出了如何使内循环并行运行,但是(因为我担心基于其他语言的先前经验)单线程版本更快。
Rust中的这个并行版本可以优化吗?
extern crate crossbeam;
fn main() {
let residues = [1, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67,
71, 73, 79, 83, 89, 97,101,103,107,109,113,121,127,131,137,139,
143,149,151,157,163,167,169,173,179,181,187,191,193,197,199,209,211];
let val = 1_000_000;
let md = 210;
let rescnt = 48;
println!("val = {}, mod = {}, rescnt = {}", val, md, rescnt);
let mut posn = [0; 210];
for i in 1..rescnt {posn[residues[i]] = i - 1;}
posn[1] = rescnt - 1;
let mut modk; let mut r; let mut k;
let num = val - 1 | 1;
k = num / md; modk = md * k; r = 1;
while num >= modk + residues[r] {r += 1;}
let maxpcs = k * rescnt + r - 1;
let prms: Vec<u8> = vec![0; maxpcs];
println!("num = {}, k = {}, modk = {}, maxpcs = {}", num, k, modk, maxpcs);
let sqrt_n = (num as f32).sqrt() as usize;
modk = 0; r = 0; k = 0;
// sieve to identify/eliminate nonprimes/locations in prms array
for i in 0..maxpcs {
r += 1; if r > rescnt {r = 1; modk += md; k += 1;};
if prms[i] == 1 {continue;}
let prm_r = residues[r];
let prime = modk + prm_r;
if prime > sqrt_n {break;}
let prmstep = prime * rescnt;
for ri in &residues[1..rescnt + 1] {
let prms = &mut prms;
crossbeam::scope(|scope| {
scope.spawn(move || {
let prod = prm_r * ri;
let mut np = (k * (prime + ri) + (prod - 2) / md) * rescnt + posn[prod % md];
while np < maxpcs {prms[np] = 1; np += prmstep;}
});
});
}
}
// the prms array now has all the positions for primes r1..N
// extract prime numbers and count from prms into prims array
let mut prmcnt = 4;
modk = 0; r = 0;
for i in 0..maxpcs {
r += 1; if r > rescnt {r = 1; modk += md;};
if prms[i] == 0 {prmcnt += 1;}
}
println!("{}", prmcnt);
}
在Linux上使用Rust 1.6。