Question

我正在从K＆amp; R书中学习C，我遇到了代码来计算数字。白色空格字符（空白，制表符，换行符）和所有其他字符的出现代码是这样的：

#include <stdio.h>
/* count digits, white space, others */
main()
{
    int c, i, nwhite, nother;
    int ndigit[10];  
    nwhite = nother = 0;
    for (i = 0; i < 10; ++i)
        ndigit[i] = 0;
    while ((c = getchar()) != EOF)
        if (c >= '0' && c <= '9')
            ++ndigit[c-'0'];
        else if (c == ' ' || c == '\n' || c == '\t')
            ++nwhite;
        else
            ++nother;
    printf("digits =");
    for (i = 0; i < 10; ++i)
        printf(" %d", ndigit[i]);
    printf(", white space = %d, other = %d\n",
    nwhite, nother);
}

我需要问两个问题..
第一个问题：

if (c >= '0' && c <= '9')
++ndigit[c-'0'];

我非常清楚＆＃39; 0＆＃39; 0和＆＃39; 9＆＃39;表示0和0的ASCII值。分别为9。但我似乎不明白为什么我们甚至需要使用ASCII值而不是整数本身。就像为什么我们不能简单地使用

if (c >= 0 && c <= 9)

查找c是否在0到9之间？

第二个问题：

++ndigit[c-'0']

以上陈述的作用是什么？为什么我们不在这里取c的ASCII值？
因为如果我们这样做，它应该写成['c'-'0']。

Answer 1

1。 C是一个字符，不是整数。因此，我们需要将它们与ASCII值进行比较。整数0和9对应Nul和Tab，而不是我们正在寻找的东西。

2。通过减去ASCII值，增加了对应于整数的索引。例如，如果我们的号码是'1'。然后'1' - '0' = 1所以一个索引增加，这是跟踪字符的便捷方式。我们不放['c' - '0']因为我们关心变量c而不关注字符'c'

此表显示字符的表示方式，它们与整数不同。主要内容是'1' != 1 http://www.asciitable.com/

Answer 2

使用当前的C标准，这将是本地化广泛输入的完美练习：

#include <stdlib.h>
#include <locale.h>
#include <stdio.h>
#include <wchar.h>
#include <wctype.h>
#include "wdigit.h"

int main(void)
{
    size_t  num_space = 0; /* Spaces, tabs, newlines */
    size_t  num_letter = 0;
    size_t  num_punct = 0; /* Punctuation */
    size_t  num_digit[10] = { 0, }; /* Digits - all initialized to zero */
    size_t  num_other = 0; /* Other printable characters */
    size_t  total = 0;
    wint_t  wc;
    int     digit;

    if (!setlocale(LC_ALL, "")) {
        fprintf(stderr, "Current locale is not supported by the C library.\n");
        return EXIT_FAILURE;
    }
    if (fwide(stdin, 1) < 1) {  
        fprintf(stderr, "The C library does not support wide input for this locale.\n");
        return EXIT_FAILURE;
    }

    while ((wc = fgetwc(stdin)) != WEOF) {
        total++;
        digit = wdigit(wc);
        if (digit >= 0 && digit <= 9)
            num_digit[digit]++;
        else
        if (iswspace(wc))
            num_space++;
        else
        if (iswpunct(wc))
            num_punct++;
        else
        if (iswalpha(wc))
            num_letter++;
        else
        if (iswprint(wc))
            num_other++;
        /* All nonprintable non-whitespace characters are ignored */
    }

    printf("Read %zu wide characters total.\n", total);
    printf("%15zu letters\n", num_letter);
    printf("%15zu zeros (equivalent to '0')\n", num_digit[0]);
    printf("%15zu ones (equivalent to '1')\n", num_digit[1]);
    printf("%15zu twos (equivalent to '2')\n", num_digit[2]);
    printf("%15zu threes (equivalent to '3')\n", num_digit[3]);
    printf("%15zu fours (equivalent to '4')\n", num_digit[4]);
    printf("%15zu fives (equivalent to '5')\n", num_digit[5]);
    printf("%15zu sixes (equivalent to '6')\n", num_digit[6]);
    printf("%15zu sevens (equivalent to '7')\n", num_digit[7]);
    printf("%15zu eights (equivalent to '8')\n", num_digit[8]);
    printf("%15zu nines (equivalent to '9')\n", num_digit[9]);
    printf("%15zu whitespaces (including newlines and tabs)\n", num_space);
    printf("%15zu punctuation characters\n", num_punct);
    printf("%15zu other printable characters\n", num_other);

    return EXIT_SUCCESS;
}

您还需要 wdigit.h ，一个头文件，如果给定的宽字符是十进制数字，则返回十进制数字值（0到9，包括0和9），否则返回-1。如果这是一个练习，将提供头文件。

以下"wdigit.h"应该支持Unicode中定义的所有十进制数字（这是我们对通用字符集最接近的标准）。我不认为它是受版权保护的（因为它基本上只是Unicode标准的列表），但如果是，我dedicate it to public domain：

#ifndef   WDIGIT_H
#define   WDIGIT_H
#include <wchar.h>

/* wdigits[] are wide strings that contain all known versions of a decimal digit.
   For example, wdigits[0] is a wide string that contains all known zero decimal digit
   wide characters.  You can use e.g.
       wcschr(wdigits[0], wc)
   to determine if wc is a zero decimal digit wide character.
*/
static const wchar_t *const wdigits[10] = {
    L"0"  L"\u0660\u06F0\u07C0\u0966\u09E6\u0A66\u0AE6\u0B66\u0BE6\u0C66"
          L"\u0CE6\u0D66\u0DE6\u0E50\u0ED0\u0F20\u1040\u1090\u17E0\u1810"
          L"\u1946\u19D0\u1A80\u1A90\u1B50\u1BB0\u1C40\u1C50\uA620\uA8D0"
          L"\uA900\uA9D0\uA9F0\uAA50\uABF0\uFF10"
          L"\U000104A0\U00011066\U000110F0\U00011136\U000111D0\U000112F0"
          L"\U00011450\U000114D0\U00011650\U000116C0\U00011730\U000118E0"
          L"\U00011C50\U00011D50\U00016A60\U00016B50\U0001D7CE\U0001D7D8"
          L"\U0001D7E2\U0001D7EC\U0001D7F6\U0001E950",
    L"1"  L"\u0661\u06F1\u07C1\u0967\u09E7\u0A67\u0AE7\u0B67\u0BE7\u0C67"
          L"\u0CE7\u0D67\u0DE7\u0E51\u0ED1\u0F21\u1041\u1091\u17E1\u1811"
          L"\u1947\u19D1\u1A81\u1A91\u1B51\u1BB1\u1C41\u1C51\uA621\uA8D1"
          L"\uA901\uA9D1\uA9F1\uAA51\uABF1\uFF11"
          L"\U000104A1\U00011067\U000110F1\U00011137\U000111D1\U000112F1"
          L"\U00011451\U000114D1\U00011651\U000116C1\U00011731\U000118E1"
          L"\U00011C51\U00011D51\U00016A61\U00016B51\U0001D7CF\U0001D7D9"
          L"\U0001D7E3\U0001D7ED\U0001D7F7\U0001E951",
    L"2"  L"\u0662\u06F2\u07C2\u0968\u09E8\u0A68\u0AE8\u0B68\u0BE8\u0C68"
          L"\u0CE8\u0D68\u0DE8\u0E52\u0ED2\u0F22\u1042\u1092\u17E2\u1812"
          L"\u1948\u19D2\u1A82\u1A92\u1B52\u1BB2\u1C42\u1C52\uA622\uA8D2"
          L"\uA902\uA9D2\uA9F2\uAA52\uABF2\uFF12"
          L"\U000104A2\U00011068\U000110F2\U00011138\U000111D2\U000112F2"
          L"\U00011452\U000114D2\U00011652\U000116C2\U00011732\U000118E2"
          L"\U00011C52\U00011D52\U00016A62\U00016B52\U0001D7D0\U0001D7DA"
          L"\U0001D7E4\U0001D7EE\U0001D7F8\U0001E952",
    L"3"  L"\u0663\u06F3\u07C3\u0969\u09E9\u0A69\u0AE9\u0B69\u0BE9\u0C69"
          L"\u0CE9\u0D69\u0DE9\u0E53\u0ED3\u0F23\u1043\u1093\u17E3\u1813"
          L"\u1949\u19D3\u1A83\u1A93\u1B53\u1BB3\u1C43\u1C53\uA623\uA8D3"
          L"\uA903\uA9D3\uA9F3\uAA53\uABF3\uFF13"
          L"\U000104A3\U00011069\U000110F3\U00011139\U000111D3\U000112F3"
          L"\U00011453\U000114D3\U00011653\U000116C3\U00011733\U000118E3"
          L"\U00011C53\U00011D53\U00016A63\U00016B53\U0001D7D1\U0001D7DB"
          L"\U0001D7E5\U0001D7EF\U0001D7F9\U0001E953",
    L"4"  L"\u0664\u06F4\u07C4\u096A\u09EA\u0A6A\u0AEA\u0B6A\u0BEA\u0C6A"
          L"\u0CEA\u0D6A\u0DEA\u0E54\u0ED4\u0F24\u1044\u1094\u17E4\u1814"
          L"\u194A\u19D4\u1A84\u1A94\u1B54\u1BB4\u1C44\u1C54\uA624\uA8D4"
          L"\uA904\uA9D4\uA9F4\uAA54\uABF4\uFF14"
          L"\U000104A4\U0001106A\U000110F4\U0001113A\U000111D4\U000112F4"
          L"\U00011454\U000114D4\U00011654\U000116C4\U00011734\U000118E4"
          L"\U00011C54\U00011D54\U00016A64\U00016B54\U0001D7D2\U0001D7DC"
          L"\U0001D7E6\U0001D7F0\U0001D7FA\U0001E954",
    L"5"  L"\u0665\u06F5\u07C5\u096B\u09EB\u0A6B\u0AEB\u0B6B\u0BEB\u0C6B"
          L"\u0CEB\u0D6B\u0DEB\u0E55\u0ED5\u0F25\u1045\u1095\u17E5\u1815"
          L"\u194B\u19D5\u1A85\u1A95\u1B55\u1BB5\u1C45\u1C55\uA625\uA8D5"
          L"\uA905\uA9D5\uA9F5\uAA55\uABF5\uFF15"
          L"\U000104A5\U0001106B\U000110F5\U0001113B\U000111D5\U000112F5"
          L"\U00011455\U000114D5\U00011655\U000116C5\U00011735\U000118E5"
          L"\U00011C55\U00011D55\U00016A65\U00016B55\U0001D7D3\U0001D7DD"
          L"\U0001D7E7\U0001D7F1\U0001D7FB\U0001E955",
    L"6"  L"\u0666\u06F6\u07C6\u096C\u09EC\u0A6C\u0AEC\u0B6C\u0BEC\u0C6C"
          L"\u0CEC\u0D6C\u0DEC\u0E56\u0ED6\u0F26\u1046\u1096\u17E6\u1816"
          L"\u194C\u19D6\u1A86\u1A96\u1B56\u1BB6\u1C46\u1C56\uA626\uA8D6"
          L"\uA906\uA9D6\uA9F6\uAA56\uABF6\uFF16"
          L"\U000104A6\U0001106C\U000110F6\U0001113C\U000111D6\U000112F6"
          L"\U00011456\U000114D6\U00011656\U000116C6\U00011736\U000118E6"
          L"\U00011C56\U00011D56\U00016A66\U00016B56\U0001D7D4\U0001D7DE"
          L"\U0001D7E8\U0001D7F2\U0001D7FC\U0001E956",
    L"7"  L"\u0667\u06F7\u07C7\u096D\u09ED\u0A6D\u0AED\u0B6D\u0BED\u0C6D"
          L"\u0CED\u0D6D\u0DED\u0E57\u0ED7\u0F27\u1047\u1097\u17E7\u1817"
          L"\u194D\u19D7\u1A87\u1A97\u1B57\u1BB7\u1C47\u1C57\uA627\uA8D7"
          L"\uA907\uA9D7\uA9F7\uAA57\uABF7\uFF17"
          L"\U000104A7\U0001106D\U000110F7\U0001113D\U000111D7\U000112F7"
          L"\U00011457\U000114D7\U00011657\U000116C7\U00011737\U000118E7"
          L"\U00011C57\U00011D57\U00016A67\U00016B57\U0001D7D5\U0001D7DF"
          L"\U0001D7E9\U0001D7F3\U0001D7FD\U0001E957",
    L"8"  L"\u0668\u06F8\u07C8\u096E\u09EE\u0A6E\u0AEE\u0B6E\u0BEE\u0C6E"
          L"\u0CEE\u0D6E\u0DEE\u0E58\u0ED8\u0F28\u1048\u1098\u17E8\u1818"
          L"\u194E\u19D8\u1A88\u1A98\u1B58\u1BB8\u1C48\u1C58\uA628\uA8D8"
          L"\uA908\uA9D8\uA9F8\uAA58\uABF8\uFF18"
          L"\U000104A8\U0001106E\U000110F8\U0001113E\U000111D8\U000112F8"
          L"\U00011458\U000114D8\U00011658\U000116C8\U00011738\U000118E8"
          L"\U00011C58\U00011D58\U00016A68\U00016B58\U0001D7D6\U0001D7E0"
          L"\U0001D7EA\U0001D7F4\U0001D7FE\U0001E958",
    L"9"  L"\u0669\u06F9\u07C9\u096F\u09EF\u0A6F\u0AEF\u0B6F\u0BEF\u0C6F"
          L"\u0CEF\u0D6F\u0DEF\u0E59\u0ED9\u0F29\u1049\u1099\u17E9\u1819"
          L"\u194F\u19D9\u1A89\u1A99\u1B59\u1BB9\u1C49\u1C59\uA629\uA8D9"
          L"\uA909\uA9D9\uA9F9\uAA59\uABF9\uFF19"
          L"\U000104A9\U0001106F\U000110F9\U0001113F\U000111D9\U000112F9"
          L"\U00011459\U000114D9\U00011659\U000116C9\U00011739\U000118E9"
          L"\U00011C59\U00011D59\U00016A69\U00016B59\U0001D7D7\U0001D7E1"
          L"\U0001D7EB\U0001D7F5\U0001D7FF\U0001E959",
};

static int wdigit(const wint_t  wc)
{
    int  i;
    for (i = 0; i < 10; i++)
        if (wcschr(wdigits[i], wc))
            return i;
    return -1;
}

#endif /* WDIGIT_H */

在Linux，* BSD或Mac机器上，您可以使用例如

编译上述内容

gcc -std=c99 -Wall -Wextra -pedantic example.c -o example

或

clang -std=c99 -Wall -Wextra -pedantic example.c -o example

并使用例如

进行测试

printf 'Bengali decimal digit five is ৫.\n' | ./example

输出

Read 33 wide characters total.
         25 letters
          0 zeros (equivalent to '0')
          0 ones (equivalent to '1')
          0 twos (equivalent to '2')
          0 threes (equivalent to '3')
          0 fours (equivalent to '4')
          1 fives (equivalent to '5')
          0 sixes (equivalent to '6')
          0 sevens (equivalent to '7')
          0 eights (equivalent to '8')
          0 nines (equivalent to '9')
          6 whitespaces (including newlines and tabs)
          1 punctuation characters
          0 other printable characters

以上代码完全符合ISO C99（以及ISO C标准的更高版本），并且应完全可移植。

但请注意，并非所有C库都完全支持C99;人们遇到问题的主要问题是Microsoft C.我自己不使用Windows，但如果你使用Windows，请尝试使用UTF-8代码页（chcp 65001）。这完全是一个Microsoft问题，因为它显然可以支持带有一些非标准Windows扩展的UTF-8输入。它们似乎不希望你编写可移植的代码。

Answer 3

我需要问两个问题..

第一个问题：我非常清楚＆＃39; 0＆＃39; 0和＆＃39; 9＆＃39;表示0和0的ASCII值。分别为9。但我似乎不明白为什么我们甚至需要使用ASCII值而不是整数本身。就像为什么我们不能简单地使用

if (c >= 0 && c <= 9)

让我们从基础开始。所有用户输入，文件输入等都以字符给出，因此当您需要比较刚读过的字符时，必须将其与另一个字符进行比较。在字符集中，数字0-9用ASCII值48-57表示，因此字符'0'由48表示，依此类推。

上面的测试测试c是否为数字，48-57之间的ASCII值，因此您必须在比较中使用字符本身，例如if ('0' <= c && c <= '9')您知道c是一个数字。这将我们带到：

第二个问题：

++ndigit[c-'0']

在你遇到的任何分类问题中，你通常会使用一个初始化为零的数组，其中至少有足够的元素（这里是字符）。您可以将它们拆分为十个元素的数组，以保存您的数字，大写，小写等...

您的ndigit数组开始初始化为全零，计划是在每次读取数字时增加数组中的正确元素。这是您使用ASCII值作为数字底部'0'（48）的地方。由于您的ndigit数组可能在每次遇到数字时被编入索引0-9，因此必须将其缩放（或映射）到正确的ndigit索引（以便映射'0'到0，'1'映射到1，依此类推。

通过您的测试，在这种情况下，我们确定c持有一个数字，因此要对该数字进行分类并将其映射到ndigit数组的正确元素，我们使用c - '0'。如果c中的数字为'3'（ASCII 51），则递增

++ndigit[c-'0'];

实际上是索引

++ndigit[51 - 48];

或

++ndigit[3];   /* since c was 3, we no increment ndigit[3] adding one more
                  occurrence of '3' to the data stored at ndigit[3] */

完成后，ndigit数组将保留输入中找到的0, 1, 2, 3, 4, ...位数的确切数字。需要一点时间来围绕这个方案，但总而言之，你只需要从零开始计数，以存储每个字符，数字，标点，看到的总数，以及为字符集大小的数组完成后将完全保留这些值，因为每个字符都已被分类，相应的++ndigits[]元素会增加，以便在您进行操作时捕获信息。

在一般意义上，这些被称为频率数组，因为它们用于存储集合中各个成员出现的频率。除了简单的字符分类之外，它们有很多很多应用程序。

查看所有答案，如果您仍然感到困惑，请告诉我，我非常乐意为您提供进一步的帮助。

Answer 4

getchar()返回字符代码和标记值（EOF）。所以，我们知道c在循环中包含一个字符代码。
c-'0'是字符代码的距离＆＃34;数字行＆＃34;从c（字符代码）的值到＆＃39; 0＆＃39;的代码。根据C标准，字符代码必须按连续顺序包含这些数字＆＃39; 0＆＃39;，＆＃39; 1＆＃39;，＆＃39; 2＆＃39;，＆＃39; 3＆＃39;，＆＃39; 4＆＃39;，＆＃39; 5＆＃39;，＆＃39; 6＆＃39;，＆＃39; 7＆＃39;，＆＃39; 8＆＃39;，＆＃39; 9＆＃39 ;.因此，表达式计算数字字符的整数值。

＆＃39; <letter>＆＃39;

4 个答案: