Question

教授说，这不是检查数字是否可被100,000-150,000整除的有效算法。我在寻找更好的方法时遇到了麻烦。任何帮助将不胜感激。

unsigned short divisibility_check(unsigned long n) {
    unsigned long i;
    for (i = 100000; i <= 150000; i++) {
        if (n % i == 0) {
            return 0;
        }
    }
    return 1;
}

Answer 1

比方说，您需要确定正整数 K 是否可被100,000到150,000之间的整数整除，而且这种操作非常少见，因此进行预计算根本不值得处理器时间或使用的内存。

如果 K <100,000，则不能被100,000到150,000之间的数字整除。

如果100,000≤ K ≤150,000，则其自身可以被整除。由您决定这是否重要。

对于 K > 150,000可被 M 整除的情况，当100,000≤ M ≤150,000时， K 还必须被 L = K / M 整除。这是因为 K = L × M ，并且所有三个都是正整数。因此，您只需要通过一组 L 测试 K 的可整性，其中where K / 150,000≤≤ L < / em>≤⌊ K / 100,000⌋。

但是，当K> = 15,000,000,000时，那组 L s变得大于可能的 M s集合。然后，只需测试 K 与每个 M 的可分性，就不再那么麻烦了，就像现在OP的代码一样。

将其作为程序实施时，实际上，最重要的是添加的注释。不要编写描述代码功能的注释；编写注释以解释您要尝试实现的模型或算法（例如，在功能级别），以及您对每小段代码应该完成什么的意图。

在这种情况下，您应该像上面我所做的那样，在每个if子句中添加一条注释，解释您的推理。

初学者程序员通常会完全忽略注释。这很不幸，因为写出好的评论是事后难以养成的习惯。学习注释您的代码绝对是个好主意（如上所述），描述代码功能的注释没有多用；噪声多于帮助。

一个程序员的代码是可维护的，值得十个天才来编写只写代码。这是因为所有代码都有错误，因为人类会犯错误。要成为一名高效的开发人员，您的代码必须是可维护的。否则，您将不得不从头开始重写每个越野车零件，从而浪费大量时间。而且，正如您在上面看到的那样，在算法级别进行“优化”，即思考如何避免必须做的工作，可以比尝试优化循环或类似方法产生更好的结果。（您会发现在现实生活中，经常会出乎意料的是，以适当的方式优化循环，可以完全消除循环。）

即使是在练习中，正确的注释也可能是“没点，这不起作用” 和”之间的区别，好吧，我会对此表示赞赏，因为您在N行上有一个错别字/一处错误/一字不清，但是否则您的解决方案将奏效。” 。

由于bolov不理解上面的内容如何导致“ naive_with_checks”函数，因此我将在此处展示它的实现。

为便于测试，我将展示完整的测试程序。将要测试的整数范围和接受的除数范围作为程序的参数（即thisprogram 1 500000 100000 150000来复制bolov的测试）。

#include <stdlib.h> #include <inttypes.h> #include <limits.h> #include <locale.h> #include <ctype.h> #include <stdio.h> #include <errno.h> int is_divisible(const uint64_t number, const uint64_t minimum_divisor, const uint64_t maximum_divisor) { uint64_t divisor, minimum_result, maximum_result, result; if (number < minimum_divisor) { return 0; } if (number <= maximum_divisor) { /* Number itself is a valid divisor. */ return 1; } minimum_result = number / maximum_divisor; if (minimum_result < 2) { minimum_result = 2; } maximum_result = number / minimum_divisor; if (maximum_result < minimum_result) { maximum_result = minimum_result; } if (maximum_result - minimum_result > maximum_divisor - minimum_divisor) { /* The number is so large that it is the least amount of work to check each possible divisor. */ for (divisor = minimum_divisor; divisor <= maximum_divisor; divisor++) { if (number % divisor == 0) { return 1; } } return 0; } else { /* There are fewer possible results than divisors, so we check the results instead. */ for (result = minimum_result; result <= maximum_result; result++) { if (number % result == 0) { divisor = number / result; if (divisor >= minimum_divisor && divisor <= maximum_divisor) { return 1; } } } return 0; } } int parse_u64(const char *s, uint64_t *to) { unsigned long long value; const char *end; /* Empty strings are not valid. */ if (s == NULL || *s == '\0') return -1; /* Parse as unsigned long long. */ end = s; errno = 0; value = strtoull(s, (char **)(&end), 0); if (errno == ERANGE) return -1; if (end == s) return -1; /* Overflow? */ if (value > UINT64_MAX) return -1; /* Skip trailing whitespace. */ while (isspace((unsigned char)(*end))) end++; /* If the string does not end here, it has garbage in it. */ if (*end != '\0') return -1; if (to) *to = (uint64_t)value; return 0; } int main(int argc, char *argv[]) { uint64_t kmin, kmax, dmin, dmax, k, count; if (argc != 5) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: %s [ -h | --help | help ]\n", argv[0]); fprintf(stderr, " %s MIN MAX MIN_DIVISOR MAX_DIVISOR\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "This program counts which positive integers between MIN and MAX,\n"); fprintf(stderr, "inclusive, are divisible by MIN_DIVISOR to MAX_DIVISOR, inclusive.\n"); fprintf(stderr, "\n"); return EXIT_SUCCESS; } /* Use current locale. This may change which codes isspace() considers whitespace. */ if (setlocale(LC_ALL, "") == NULL) fprintf(stderr, "Warning: Your C library does not support your current locale.\n"); if (parse_u64(argv[1], &kmin) || kmin < 1) { fprintf(stderr, "%s: Invalid minimum positive integer to test.\n", argv[1]); return EXIT_FAILURE; } if (parse_u64(argv[2], &kmax) || kmax < kmin || kmax >= UINT64_MAX) { fprintf(stderr, "%s: Invalid maximum positive integer to test.\n", argv[2]); return EXIT_FAILURE; } if (parse_u64(argv[3], &dmin) || dmin < 2) { fprintf(stderr, "%s: Invalid minimum divisor to test for.\n", argv[3]); return EXIT_FAILURE; } if (parse_u64(argv[4], &dmax) || dmax < dmin) { fprintf(stderr, "%s: Invalid maximum divisor to test for.\n", argv[4]); return EXIT_FAILURE; } count = 0; for (k = kmin; k <= kmax; k++) count += is_divisible(k, dmin, dmax); printf("%" PRIu64 "\n", count); return EXIT_SUCCESS; }

需要注意的是，上面运行bolov的测试（即thisprogram 1 500000 100000 150000）只需要慢得多的Core i5-7200U处理器大约15毫秒的挂钟时间（13毫秒CPU时间）。对于真正大的数字（例如280,000,000,000到280,000,010,000），此测试将完成最大的工作量，并且在此计算机上每10,000个数字花费大约3.5秒。

换句话说，我不相信bolov的数字与正确编写测试用例的时间安排有任何关系。

重要的是要注意，对于介于1到500,000之间的任何 K ，bolov表示其代码所用的相同测试，上述代码最多可进行两个可除性测试找出 K 是否可被100,000到150,000之间的整数整除。

因此，此解决方案非常有效。当被测试的 K 相对较小（例如32位无符号整数或更小），或者无法使用预先计算的表时，绝对可以接受并且接近最佳。

即使可以使用预先计算的表，也不清楚prime factorization是否/何时比直接检查更快。预计算表的大小和内容当然需要权衡。 bolov声称它明显优于其他方法，但并未实施如上所示的适当“幼稚”可除性测试，并基于具有简单素数分解的非常小的整数（1到500,000）进行实验。

例如，一张预先检查了整数除数的表1至500,000只占用62500字节（对于150,000至500,000，则占用43750字节）。使用该表，每个测试花费的时间几乎不变（这仅取决于内存和缓存的影响）。将其扩展到所有32位无符号整数将需要512 GiB（536,870,912字节）；该表可以存储在内存映射的只读文件中，以使OS内核可以随时管理将其映射到RAM的数量。
当试验分割的数量超出可能除数的范围（在这种情况下为50,000除数）时，素分解本身（尤其是使用试验除法）变得比天真的方法昂贵。由于1到150,000之间有13848个素数（如果以1和2为素数），则对于足够大的输入值，试算的数量可以轻松接近除数的数量。

对于具有许多主要因子的数字，在组合阶段，发现是否有任何主要因子子集乘以100,000到150,000之间的数字会更加困难。可能组合的数量增长快于指数增长。如果没有仔细检查，仅此阶段就可以为每个较大的输入数字完成更多的工作，而不仅仅是每个可能的除数的尝试除法。

（例如，如果您有16个不同的素数，则您已经有65,535个不同的组合；这比直接试验除法的数量要多。但是，所有此类数都大于64位；最小的是2·3 ·5·7·11·13·17·19·23·29·31·37·41·43·47·53 = 32,589,158,477,190,044,730，是65位数字。）

还有代码复杂性的问题。代码越复杂，调试和维护就越困难。

Answer 2

好，所以我用m69的注释中提到的筛素数和因式分解实现了该版本，它比幼稚的方法要快得多。我必须承认，我完全没想到这一点。

我的符号：left == 100'000和right = 150'000

天真您的版本
naive_with_checks 您的版本：
- if (n < left)无除数
- else if (n <= right)除数
- else if (left * 2 >= right && n < left * 2)除数
因式分解（已执行检查）
1. 为right之前的所有素数预先计算Sieve of Eratosthenes。这次没有测量
2. 分解n（仅使用上一步中的素数）
3. 使用产品p1^0 * p2^0 * p3^0生成所有子集（回溯，深度优先：即首先生成p1^5，而不是首先生成< left），或者直到该产品进入[left, right]（找到除数）。
factorization_opt 优化（不生成子集（不创建子集矢量））。我只是将当前产品从一个回溯迭代传递到下一个。
Nominal Animal's version我也在系统上以相同范围运行了他的版本。

我已经用C++编写了程序，所以在这里我不会分享。

我使用std::uint64_t作为数据类型，并且检查了从1到500'000的所有数字，以查看每个数字是否可被间隔[100'000, 150'000]中的数字整除。所有版本都达到了相同的解决方案：170'836个数字，结果都是肯定的。

设置：

硬件：Intel Core i7-920，具有HT的4个内核（所有算法版本均为单线程），2.66 GHz（增强的2.93 GHz）， 8 MB SmartCache；内存：6 GB DDR3三通道。
编译器：Visual Studio 2017（v141），发布x64模式。

我还必须补充一点，我没有介绍这些程序，因此肯定有改进该实现的空间。但是，这里已经足够了，因为它的目的是找到一种更好的算法。

version                |  elapsed time (milliseconds)
-----------------------+--------------
naive                  |  167'378 ms (yes, it's thousands separator, aka 167 seconds)
naive_with_checks      |   97'197 ms
factorization          |    7'906 ms
factorization_opt      |    7'320 ms
                       |
Nominal Animal version |       14 ms

一些分析：

对于朴素vs naive_with_checks：[1 200'000]中的所有数字都可以通过简单的校验来解决。因为这些占所有检查数字的40％，所以naive_with_checks版本大约完成了朴素工作的60％。执行时间反映了这一点，因为naive_with_checks运行时是朴素版本的58％。

因子分解版本的速度提高了12.3倍。确实令人印象深刻。我还没有分析过算法的时间复杂度。

最终的优化带来了进一步的1.08倍加速。基本上，这是删除子因子的小向量的创建和复制所花费的时间。

对于有兴趣的人，上面没有包括的筛分预计算大约需要1 ms。这是Wikipedia的幼稚实现，没有任何优化。

Answer 3

这是带有质数的递归方法。这里的想法是，如果一个数字可被100000到150000之间的数字整除，则存在一条除以除法的方法，即仅将通过目标范围内状态的相关素数的乘积除以。（注意：以下代码用于大于100000 * 150000的数字）。在测试中，我找不到堆栈执行600次以上迭代的实例。

# Euler sieve
def getPrimes():
  n = 150000
  a = (n+1) * [None]
  ps = ([],[])
  s = []
  p = 1

  while (p < n):
    p = p + 1

    if not a[p]:
      s.append(p)
      # Save primes less
      # than half
      # of 150000, the only
      # ones needed to construct
      # our candidates.
      if p < 75000:
        ps[0].append(p);
      # Save primes between
      # 100000 and 150000
      # in case our candidate
      # is prime.
      elif p > 100000:
        ps[1].append(p)

      limit = n / p
      new_s = []

      for i in s:
        j = i
        while j <= limit:
          new_s.append(j)
          a[j*p] = True
          j = j * p
      s = new_s

  return ps

ps1, ps2 = getPrimes()

def f(n):
  # Prime candidate
  for p in ps2:
    if not (n % p):
      return True

  # (primes, prime_counts)
  ds = ([],[])
  prod = 1
  # Prepare only prime
  # factors that could
  # construct a composite
  # candidate.
  for p in ps1:
    while not (n % p):
      prod *= p
      if (not ds[0] or ds[0][-1] != p):
        ds[0].append(p)
        ds[1].append(1)
      else:
        ds[1][-1] += 1
      n /= p

  # Reduce the primes product to
  # a state where it's between
  # our target range.
  stack = [(prod,0)]

  while stack:
    prod, i = stack.pop()

    # No point in reducing further
    if prod < 100000:
      continue
    # Exit early
    elif prod <= 150000:
      return True
    # Try reducing the product
    # by different prime powers
    # one prime at a time
    if i < len(ds[0]):
      for p in xrange(ds[1][i] + 1):
        stack.append((prod / ds[0][i]**p, i + 1))

  return False

输出：

c = 0

for ii in xrange(1099511627776, 1099511628776):
  f_i = f(ii)

  if f_i:
    c += 1

print c # 239

Answer 4

为进行比较，这是我发表有关使用素因数分解的评论时所想到的。使用gcc -std=c99 -O3 -m64 -march=haswell进行编译时，与使用64位范围（3.469 vs 3.624秒）中的最后10,000个整数进行测试时，这比使用检查和求反的幼稚方法要快一些。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdbool.h>

void eratosthenes(bool *ptr, uint64_t size) {
    memset(ptr, true, size);
    for (uint64_t i = 2; i * i < size; i++) {
        if (ptr[i]) {
            for (uint64_t j = i * i; j < size; j += i) {
                ptr[j] = false;
            }
        }
    }
}

bool divisible(uint64_t n, uint64_t a, uint64_t b) {
    /* check for trivial cases first                                    */
    if (n < a) {
        return false;
    }
    if (n <= b) {
        return true;
    }
    if (n < 2 * a) {
        return false;
    }

    /* Inversion: use range n/b ~ n/a; see Nominal Animal's answer      */
    if (n < a * b) {
        uint64_t c = a;
        a = (n + b - 1) / b; // n/b rounded up
        b = n / c;
    }

    /* Create prime sieve when first called, or re-calculate it when    */
    /* called with a higher value of b; place before inversion in case  */
    /* of a large sequential test, to avoid repeated re-calculation.    */
    static bool *prime = NULL;
    static uint64_t prime_size = 0;
    if (prime_size <= b) {
        prime_size = b + 1;
        prime = realloc(prime, prime_size * sizeof(bool));
        if (!prime) {
            printf("Out of memory!\n");
            return false;
        }
        eratosthenes(prime, prime_size);
    }

    /* Factorize n into prime factors up to b, using trial division;    */
    /* there are more efficient but also more complex ways to do this.  */
    /* You could return here, if a factor in the range a~b is found.    */
    static uint64_t factor[63];
    uint8_t factors = 0;
    for (uint64_t i = 2; i <= n && i <= b; i++) {
        if (prime[i]) {
            while (n % i == 0) {
                factor[factors++] = i;
                n /= i;
            }
        }
    }

    /* Prepare divisor sieve when first called, or re-allocate it when  */
    /* called with a higher value of b; in a higher-level language, you */
    /* would probably use a different data structure for this, because  */
    /* this method iterates repeatedly over a potentially sparse array. */
    static bool *divisor = NULL;
    static uint64_t div_size = 0;
    if (div_size <= b / 2) {
        div_size = b / 2 + 1;
        divisor = realloc(divisor, div_size * sizeof(bool));
        if (!divisor) {
            printf("Out of memory!\n");
            return false;
        }
    }
    memset(divisor, false, div_size);
    divisor[1] = true;
    uint64_t max = 1;

    /* Iterate over each prime factor, and for every divisor already in */
    /* the sieve, add the product of the divisor and the factor, up to  */
    /* the value b/2. If the product is in the range a~b, return true.  */
    for (uint8_t i = 0; i < factors; i++) {
        for (uint64_t j = max; j > 0; j--) {
            if (divisor[j]) {
                uint64_t product = factor[i] * j;
                if (product >= a && product <= b) {
                    return true;
                }
                if (product < div_size) {
                    divisor[product] = true;
                    if (product > max) {
                        max = product;
                    }
                }
            }
        }
    }
    return false;
}

int main() {
    uint64_t count = 0;
    for (uint64_t n = 18446744073709541615LLU; n <= 18446744073709551614LLU; n++) {
        if (divisible(n, 100000, 150000)) ++count;
    }
    printf("%llu", count);
    return 0;
}

这是我比较过天真的+检查+反转的实现：

#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>

bool divisible(uint64_t n, uint64_t a, uint64_t b) {
    if (n < a) {
        return false;
    }
    if (n <= b) {
        return true;
    }
    if (n < 2 * a) {
        return false;
    }
    if (n < a * b) {
        uint64_t c = a;
        a = (n + b - 1) / b;
        b = n  / c;
    }
    while (a <= b) {
        if (n % a++ == 0) return true;
    }
    return false;
}

int main() {
    uint64_t count = 0;
    for (uint64_t n = 18446744073709541615LLU; n <= 18446744073709551614LLU; n++) {
        if (divisible(n, 100000, 150000)) ++count;
    }
    printf("%llu", count);
    return 0;
}

Answer 5

这是一个非常简单的带有筛网缓存的解决方案。如果您为序列中的许多数字调用divisibility_check函数，这应该非常有效：

#include <string.h>

int divisibility_check_sieve(unsigned long n) {
    static unsigned long sieve_min = 1, sieve_max;
    static unsigned char sieve[1 << 19]; /* 1/2 megabyte */
    if (n < sieve_min || n > sieve_max) {
        sieve_min = n & ~(sizeof(sieve) - 1);
        sieve_max = sieve_min + sizeof(sieve) - 1;
        memset(sieve, 1, sizeof sieve);
        for (unsigned long m = 100000; m <= 150000; m++) {
            unsigned long i = sieve_min % m;
            if (i != 0)
                i = m - i;
            for (; i < sizeof sieve; i += m) {
                sieve[i] = 0;
            }
        }
    }
    return sieve[n - sieve_min];
}

这是一个比较基准：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

int divisibility_check_naive(unsigned long n) {
    for (unsigned long i = 100000; i <= 150000; i++) {
        if (n % i == 0) {
            return 0;
        }
    }
    return 1;
}

int divisibility_check_small(unsigned long n) {
    unsigned long i, min = n / 150000, max = n / 100000;
    min += (min == 0);
    max += (max == 0);
    if (max - min > 150000 - 100000) {
        for (i = 100000; i <= 150000; i++) {
            if (n % i == 0) {
                return 0;
            }
        }
        return 1;
    } else {
        for (i = min; i <= max; i++) {
            if (n % i == 0) {
                unsigned long div = n / i;
                if (div >= 100000 && div <= 150000)
                    return 0;
            }
        }
        return 1;
    }
}

int divisibility_check_sieve(unsigned long n) {
    static unsigned long sieve_min = 1, sieve_max;
    static unsigned char sieve[1 << 19]; /* 1/2 megabyte */
    if (n < sieve_min || n > sieve_max) {
        sieve_min = n & ~(sizeof(sieve) - 1);
        sieve_max = sieve_min + sizeof(sieve) - 1;
        memset(sieve, 1, sizeof sieve);
        for (unsigned long m = 100000; m <= 150000; m++) {
            unsigned long i = sieve_min % m;
            if (i != 0)
                i = m - i;
            for (; i < sizeof sieve; i += m) {
                sieve[i] = 0;
            }
        }
    }
    return sieve[n - sieve_min];
}

int main(int argc, char *argv[]) {
    unsigned long n, count = 0, lmin, lmax, range[2] = { 1, 500000 };
    int pos = 0, naive = 0, small = 0, sieve = 1;
    clock_t t;
    char *p;

    for (int i = 1; i < argc; i++) {
        n = strtoul(argv[i], &p, 0);
        if (*p == '\0' && pos < 2)
            range[pos++] = n;
        else if (!strcmp(argv[i], "naive"))
            naive = 1;
        else if (!strcmp(argv[i], "small"))
            small = 1;
        else if (!strcmp(argv[i], "sieve"))
            sieve = 1;
        else
            printf("invalid argument: %s\n", argv[i]);
    }
    lmin = range[0];
    lmax = range[1] + 1;
    if (naive) {
        t = clock();
        for (count = 0, n = lmin; n != lmax; n++) {
            count += divisibility_check_naive(n);
        }
        t = clock() - t;
        printf("naive: [%lu..%lu] -> %lu non-divisible numbers, %10.2fms\n",
               lmin, lmax - 1, count, t * 1000.0 / CLOCKS_PER_SEC);
    }
    if (small) {
        t = clock();
        for (count = 0, n = lmin; n != lmax; n++) {
            count += divisibility_check_small(n);
        }
        t = clock() - t;
        printf("small: [%lu..%lu] -> %lu non-divisible numbers, %10.2fms\n",
               lmin, lmax - 1, count, t * 1000.0 / CLOCKS_PER_SEC);
    }
    if (sieve) {
        t = clock();
        for (count = 0, n = lmin; n != lmax; n++) {
            count += divisibility_check_sieve(n);
        }
        t = clock() - t;
        printf("sieve: [%lu..%lu] -> %lu non-divisible numbers, %10.2fms\n",
               lmin, lmax - 1, count, t * 1000.0 / CLOCKS_PER_SEC);
    }
    return 0;
}

以下是一些运行时间：

naive: [1..500000] -> 329164 non-divisible numbers,  158174.52ms
small: [1..500000] -> 329164 non-divisible numbers,      12.62ms
sieve: [1..500000] -> 329164 non-divisible numbers,       1.35ms
sieve: [0..4294967295] -> 3279784841 non-divisible numbers,    8787.23ms
sieve: [10000000000000000000..10000000001000000000] -> 765978176 non-divisible numbers,    2205.36ms

寻找除数的有效方法

5 个答案: