Question

我今天对D语言很好奇，所以我看了一下网站，并在网站上看到了以下wc实现：

import std.stdio;
import std.stream;

int main (string[] args)
{
    int w_total;
    int l_total;
    ulong c_total;
    int[string] dictionary;

    writefln("   lines   words   bytes file");
    foreach (arg; args[1 .. args.length])
    {
        int w_cnt, l_cnt;
        bool inword;

        auto c_cnt = std.file.getSize(arg);
        if (c_cnt < 10_000_000)
        {
            size_t wstart;
            auto input = cast(string)std.file.read(arg);

            foreach (j, c; input)
            {
                if (c == '\n')
                ++l_cnt;
                if (c >= '0' && c <= '9')
                {
                }
                else if (c >= 'a' && c <= 'z' ||
                    c >= 'A' && c <= 'Z')
                {
                    if (!inword)
                    {
                        wstart = j;
                        inword = true;
                        ++w_cnt;
                    }
                }
                else if (inword)
                {   
                    auto word = input[wstart .. j];

                    dictionary[word]++;
                    inword = false;
                }
            }
            if (inword)
            {   
                auto w = input[wstart .. input.length];
                dictionary[w]++;
            }
        }
        else
        {
            auto f = new BufferedFile(arg);
            string buf;

            while (!f.eof())
            {   
                char c;

                f.read(c);
                if (c == '\n')
                ++l_cnt;
                if (c >= '0' && c <= '9')
                {
                    if (inword)
                    buf ~= c;
                }
                else if (c >= 'a' && c <= 'z' ||
                    c >= 'A' && c <= 'Z')
                {
                    if (!inword)
                    {
                        buf.length = 0;
                        buf ~= c;
                        inword = 1;
                        ++w_cnt;
                    }
                    else
                        buf ~= c;
                }
                else if (inword)
                {
                    if (++dictionary[buf] == 1)
                        buf = null;
                    inword = 0;
                }
            }
            if (inword)
            {
                dictionary[buf]++;
            }
        }
        writefln("%8s%8s%8s %s\n", l_cnt, w_cnt, c_cnt, arg);
        l_total += l_cnt;
        w_total += w_cnt;
        c_total += c_cnt;
    }

    if (args.length > 2)
    {
        writefln("--------------------------------------\n%8s%8s%8s total",
        l_total, w_total, c_total);
    }

    writefln("--------------------------------------");

    foreach (word1; dictionary.keys.sort)
    {
        writefln("%3s %s", dictionary[word1], word1);
    }
    return 0;
}

无论如何，在第86行，代码将buf设置为null，这是字典中第一次出现的单词。

             if (++dictionary[buf] == 1)
                buf = null;
             inword = 0;
            }

这样做有什么好处？我测试了该部分遗漏的方法，得到了相同的结果。

Answer 1

我只是在这里猜测。但是，这个相当古老的代码，所以原因可能与不可变的有关。在旧版本的D中，类型系统中没有不可变的，因此字符串只是普通char []的别名。

在一个关联数组中，如果你更改了一个键，它可以打破它，因为哈希不匹配 - 你可以在树中得到两个条目，其中只有一个，其他很难找到错误（因此在较新的D版本中，如果你尝试int [char []]，它会抱怨密钥必须是不可变的。）

将长度更改为零（代码为新单词执行的操作）可能会重用现有缓冲区。我很确定它现在不会，但也许它当时做了。这可能会覆盖哈希表中的现有条目。将其设置为null可确保实际分配了新的缓冲区。

底线：在撰写本文时，如果没有该行，它可能会随机无效。

为什么在这个wc示例中字符串设置为null？

1 个答案: