Question

我们知道Duff's device使用隔行开关和循环的结构隔行扫描：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

int **matrix_allocate(int row, int column) {
    int **matrix = malloc(row * sizeof(*matrix));
    for (int i = 0; i < row; i++) {
        matrix[i] = calloc(column, sizeof(*matrix[i]));
    }
    return matrix;
}

void matrix_free(int **matrix, int row) {
    for (int i = 0; i < row; i++) {
        free(matrix[i]);
    }
    free(matrix);
}

void matrix_print(const char *str, int **a, int row) {
    int min, max, w = 0, n1, n2, nw;
    min = max = a[0][0];
    for (int i = 0; i < row; i++) {
        for (int j = 0; j < row; j++) {
            if (min > a[i][j])
                min = a[i][j];
            if (max < a[i][j])
                max = a[i][j];
        }
    }
    n1 = snprintf(NULL, 0, "%d", min);
    n2 = snprintf(NULL, 0, "%d", max);
    nw = n1 > n2 ? n1 : n2;

    for (int i = 0; i < row; i++) {
        if (i == 0)
            w = printf("%s = ", str);
        else
            printf("%*s", w, "");

        for (int j = 0; j < row; j++) {
            printf(" %*d", nw, a[i][j]);
        }
        printf("\n");
    }
    fflush(stdout);
}

int **matrix_add(int **a, int **b, int row, int deallocate) {
    int **c = matrix_allocate(row, row);
    for (int i = 0; i < row; i++) {
        for (int j = 0; j < row; j++) {
            c[i][j] = a[i][j] + b[i][j];
        }
    }
    if (deallocate & 1) matrix_free(a, row);
    if (deallocate & 2) matrix_free(b, row);

    return c;
}

int **matrix_multiply(int **A, int **B, int row, int deallocate) {
    int **C = matrix_allocate(row, row);
    if (row == 1) {
        C[0][0] = A[0][0] * B[0][0];
    } else {
        int row2 = row / 2;
        int **a11 = matrix_allocate(row2, row2);
        int **a12 = matrix_allocate(row2, row2);
        int **a21 = matrix_allocate(row2, row2);
        int **a22 = matrix_allocate(row2, row2);
        int **b11 = matrix_allocate(row2, row2);
        int **b12 = matrix_allocate(row2, row2);
        int **b21 = matrix_allocate(row2, row2);
        int **b22 = matrix_allocate(row2, row2);

        for (int i = 0; i < row2; i++) {
            for (int j = 0; j < row2; j++) {
                a11[i][j] = A[i][j];
                a12[i][j] = A[i][j + row2];
                a21[i][j] = A[i + row2][j];
                a22[i][j] = A[i + row2][j + row2];
                b11[i][j] = B[i][j];
                b12[i][j] = B[i][j + row2];
                b21[i][j] = B[i + row2][j];
                b22[i][j] = B[i + row2][j + row2];
            }
        }

        int **c11 = matrix_add(matrix_multiply(a11, b11, row2, 0),
                               matrix_multiply(a12, b21, row2, 0), row2, 1+2);
        int **c12 = matrix_add(matrix_multiply(a11, b12, row2, 1),
                               matrix_multiply(a12, b22, row2, 1), row2, 1+2);
        int **c21 = matrix_add(matrix_multiply(a21, b11, row2, 2),
                               matrix_multiply(a22, b21, row2, 2), row2, 1+2);
        int **c22 = matrix_add(matrix_multiply(a21, b12, row2, 1+2),
                               matrix_multiply(a22, b22, row2, 1+2), row2, 1+2);

        for (int i = 0; i < row2; i++) {
            for (int j = 0; j < row2; j++) {
                C[i][j] = c11[i][j];
                C[i][j + row2] = c12[i][j];
                C[i + row2][j] = c21[i][j];
                C[i + row2][j + row2] = c22[i][j];
            }
        }
        matrix_free(c11, row2);
        matrix_free(c12, row2);
        matrix_free(c21, row2);
        matrix_free(c22, row2);
    }
    if (deallocate & 1) matrix_free(A, row);
    if (deallocate & 2) matrix_free(B, row);

    return C;
}

int **matrix_multiply_direct(int **A, int **B, int row, int deallocate) {
    int **C = matrix_allocate(row, row);
    for (int i = 0; i < row; i++) {
        for (int j = 0; j < row; j++) {
            int x = 0;
            for (int k = 0; k < row; k++) {
                x += A[i][k] * B[k][j];
            }
            C[i][j] = x;
        }
    }
    if (deallocate & 1) matrix_free(A, row);
    if (deallocate & 2) matrix_free(B, row);

    return C;
}

int main(int argc, char **argv) {
    int n = argc < 2 ? 8 : atoi(argv[1]);
    int **A = matrix_allocate(n, n);
    int **B = matrix_allocate(n, n);

    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            A[i][j] = i + j;
            B[i][j] = i + j;
        }
    }

    matrix_print("A", A, n);
    matrix_print("B", B, n);

    if ((n & (n - 1)) == 0) {
        /* recursive method can be applied only to powers of 2 */
        clock_t ticks = -clock();
        int **C = matrix_multiply(A, B, n, 0);
        ticks += clock();
        matrix_print("C = A * B", C, n);
        printf("%d ticks\n", ticks);
        matrix_free(C, n);
    }

    clock_t ticks = -clock();
    int **D = matrix_multiply_direct(A, B, n, 1+2);
    ticks += clock();

    matrix_print("D = A * B", D, n);
    printf("%d ticks\n", ticks);
    matrix_free(D, n);

    return 0;
}

现在，在Swif 2.1中，switch-case control flows并没有像我们在Swift文档中读到的那样隐含起来：

没有隐含的堕落

与C和Objective-C中的switch语句相比，切换   Swift中的语句不会落在每个案例的底部   默认情况下进入下一个。相反，整个switch语句   一旦第一个匹配的开关盒就完成它的执行   完成后，无需明确的中断声明。这使得   switch语句比C语言更安全，更易于使用，并且避免使用   错误地执行了多个开关盒。

现在，鉴于在Swift中明确产生了一个突破性副作用，这是一个明细条款：

下通

Swift中的切换语句不会落在每个案例的底部   并进入下一个。相反，整个switch语句完成   第一个匹配案例完成后立即执行。通过   对比，C要求你在。处插入一个显式的break语句   每个开关盒的末端以防止穿透。避免默认   fallthrough意味着Swift switch语句更加简洁   并且比C中的对应者可预测，因此他们避免   错误地执行多个开关案例。

非常像：

$row = mysqli_fetch_all($result,MYSQLI_ASSOC);
foreach($row as $value)
     echo $value;

考虑到维基百科提醒我们，这些设备来自问题

send(to, from, count)
register short *to, *from;
register count;
{
    register n = (count + 7) / 8;
    switch (count % 8) {
    case 0: do { *to = *from++;
    case 7:      *to = *from++;
    case 6:      *to = *from++;
    case 5:      *to = *from++;
    case 4:      *to = *from++;
    case 3:      *to = *from++;
    case 2:      *to = *from++;
    case 1:      *to = *from++;
            } while (--n > 0);
    }
}

这将是Swift 中 Duff设备的确切实现？

这只是一种语言＆amp;编码问题，它不适用于真正的Swift应用程序。

Answer 1

Duffs装置远不止是优化。如果您查看IdentityHashMap，则是有关使用此机制实现协同例程的讨论（有关Duff先生的评论，请参见第8段）。

如果您尝试编写没有此功能的便携式协同例程程序包。您将最终得到汇编或重写jmpbuf条目[都不是可移植的]。

像go和swift这样的现代语言比C语言具有更多的限制性内存模型，因此这种机制（我想）会引起各种跟踪问题。甚至在clang，gcc中类似lambda的块结构最终都与线程本地存储纠缠在一起，除非您坚持琐碎的应用程序，否则可能引起各种破坏。

Answer 2

您可以在最高级别的代码中表达您的意图，并相信Swift编译器会为您优化它，而不是尝试自己优化它。 Swift是一种高级语言。你不用高级语言进行低级循环展开。

特别是在Swift中，您不必担心复制数组（Duff设备的原始应用程序），因为Swift假装在分配数组时使用“copy on write”复制数组。这意味着只要您只是从它们中读取它就会对两个变量使用相同的数组，但只要您修改其中一个变量，就会在后台创建副本。

例如，来自https://developer.apple.com/documentation/swift/array 修改数组副本

Each array has an independent value that includes the values of all
of its elements. For simple types such as integers and other structures,
this means that when you change a value in one array, the value of that
element does not change in any copies of the array. For example:

var numbers = [1, 2, 3, 4, 5]
var numbersCopy = numbers
numbers[0] = 100
print(numbers)
// Prints "[100, 2, 3, 4, 5]"
print(numbersCopy)
// Prints "[1, 2, 3, 4, 5]"

斯威夫特的Duff设备

2 个答案: