Question

如何用（scanf）替换（FILE）和（fopen）来获取输入值并发送这两个函数？

我想在Objective-c代码中使用此函数。有关详细信息，您可以在此处查看整个代码link

static void stemfile(FILE * f)
{  while(TRUE)
{  int ch = getc(f);
    if (ch == EOF) return;
    if (LETTER(ch))
    {  int i = 0;
        while(TRUE)
        {  if (i == i_max) increase_s();

            ch = tolower(ch); /* forces lower case */

            s[i] = ch; i++;
            ch = getc(f);
            if (!LETTER(ch)) { ungetc(ch,f); break; }
        }
        s[stem(s,0,i-1)+1] = 0;
        /* the previous line calls the stemmer and uses its result to
         zero-terminate the string in s */
        printf("%s",s);
    }
    else putchar(ch);
}
}

int main(int argc, char * argv[])
{  int i;
    s = (char *) malloc(i_max+1);
    for (i = 1; i < argc; i++)
    {  FILE * f = fopen(argv[i],"r");
        if (f == 0) { fprintf(stderr,"File %s not found\n",argv[i]); exit(1); }
        stemfile(f);
    }
    free(s);
    return 0;
}

Answer 1

scanf()函数不能直接替代现有代码。现有代码（IMO编写得不是很好），将输入字符流拆分为字母（由LETTER()宏定义为大写或小写字符）和非字母，并转换这些字母序列在将stem()函数应用于它们之前将其转换为小写。

另一方面，scanf()函数从输入流中提取原始类型（int，char，double等）和显式分隔的字符串。给定代码中的分隔符（即任何不是LETTER()的分隔符）对scanf()来说过于模糊（尽管不是正则表达式）。 scanf()需要在子字符串的每一端查找特定字符。此外，scanf()无法自动转换为小写。

假设您的输入仍然是文件，我认为最简单的解决方案可能是保持代码原样并使用它，尽可能复杂。没有什么可以作为更大的Objective-C计划的一部分运行。毕竟，Objective-C仍然提供对C标准库的访问，至少在操作系统设置的限制范围内（如果你在Apple平台上，iOS远远超过MacOS）。

这里的一般问题是标记化：将未分类符号（如字符）的输入序列分解为分类标记（如单词和空格）的序列。解决该问题的常用方法是使用finite state machine/automaton（FSA / FSM）将解析逻辑应用于输入序列，并在遇到令牌时提取令牌。 FSA可能有点难以设置，但它非常强大且通用。

Answer 2

我仍然不确定您为什么要在scanf()中使用main()。这可能意味着更改stemfile()的接口（包括名称，因为它不再处理文件）将字符串作为输入。而scanf()将使生活变得困难;它将读取由空格分隔的字符串，这可能是其吸引力的一部分，但它将包括“单词”中包含的任何标点符号。

正如兰德尔所说，现有功能中的代码有点晦涩;我认为可以更简单地写成如下：

#include <stdio.h>
#include <ctype.h>
#define LETTER(x) isalpha(x)

extern int stem(char *s, int lo, int hi);

static void stemfile(FILE * f)
{
    int ch;
    while ((ch = getc(f)) != EOF)
    {
        if (LETTER(ch))
        {
            char s[1024];
            int i = 0;
            s[i++] = ch; 
            while ((ch = getc(f)) != EOF && LETTER(ch))
                s[i++] = ch;
            if (ch != EOF)
                ungetc(ch, f);
            s[i] = '\0';
            s[stem(s, 0, i-1)+1] = 0;
            /* the previous line calls the stemmer and uses its result to
               zero-terminate the string in s */
            printf("%s", s);
        }
        else
            putchar(ch);
    }
}

通过将s变成一个简单的局部变量（它似乎是全局的，imax），删除imax和{{1}，我略微简化了一些事情。功能。这些很大程度上是函数操作的附带条件。

如果您希望此处理一个（以null结尾的）字符串，那么：

increase_s()

系统地将static void stemstring(const char *src) { char ch; while ((ch = *src++) != '\0') { if (LETTER(ch)) { int i = 0; char s[1024]; s[i++] = ch; while ((ch = *src++) != '\0' && LETTER(ch)) s[i++] = ch; if (ch != '\0') src--; s[i-1] = '\0'; s[stem(s,0,i-1)+1] = 0; /* the previous line calls the stemmer and uses its result to zero-terminate the string in s */ printf("%s",s); } else putchar(ch); } }更改为getc(f)，将*src++更改为EOF，将\0更改为ungetc()。它还（安全地）将src--的类型从ch（I / O必需）更改为int。如果你担心缓冲区溢出，你必须在函数中稍微努力一点，但实际上很少有单词甚至是1024字节（你可以像1024一样容易使用4096，相应地更小 - 无穷小 - 真实数据的机会溢出缓冲区。您需要判断这对您来说是否是“真正的”风险。

主程序可以很简单：

char

显然，由于格式中的'1023'，这将永远不会溢出内部缓冲区。（注意：在此答案的第一个版本中从 ~~int main(void) { char string[1024]; while (scanf("%1023s", string) == 1) stemstring(string); return(0); }~~ 中删除了.; "%.1023s"与scanf()不同！）。

挑战：这有用吗？

是的 - 下面的代码（添加虚拟printf()功能并略微修改打印）对我来说效果很好：

stem()

示例对话

#include <stdio.h>
#include <ctype.h>
#include <assert.h>

#define LETTER(x) isalpha(x)
#define MAX(x, y) (((x) > (y)) ? (x) : (y))

static int stem(const char *s, int begin, int end)
{
    assert(s != 0);
    return MAX(end - begin - 3, 3);
}

static void stemstring(const char *src)
{
    char ch;
    while ((ch = *src++) != '\0')
    {
        if (LETTER(ch))
        {
            int i = 0;
            char s[1024];
            s[i++] = ch;
            while ((ch = *src++) != '\0' && LETTER(ch))
                s[i++] = ch;
            if (ch != '\0')
                src--;
            s[i-1] = '\0';
            s[stem(s,0,i-1)+1] = 0;
            /* the previous line calls the stemmer and uses its result to
               zero-terminate the string in s */
            printf("<<%s>>\n",s);
        }
        else
            putchar(ch);
    }
    putchar('\n');
}

int main(void)
{
    char string[1024];
    while (scanf("%1023s", string) == 1)
        stemstring(string);
    return(0);
}

标记为H: assda23 C: <<assd>> C: 23 H: 3423///asdrrrf12312 C: 3423///<<asdr>> C: 12312 H: 12//as//12 C: 12//<<a>> C: //12的行是人为输入（H:不是输入的一部分）;标记为H:的行是计算机输出。

下一次尝试

专注于漫长的超长单词（1023个字符以上）的麻烦在于你可以忽略这个简单。使用C:读取数据，您将自动获得单个“单词”，其中没有空格作为输入。这是scanf()的调试版本，其中包含调试打印代码。问题是两个一个一个错误。一个是在作业stemstring()中，其中不需要s[i-1] = '\0';。另一个是处理一串字母的结尾;这里的-1 src while ((ch = *src++) != '\0') left词干（）函数只是打印输入和输出，并返回字符串的全长（因此没有出现词干）。

one place too far, which led to interesting effects with short words entered after long words (when the difference in length was 2 or more).  There's a fairly detailed trace of the test case I devised, using words such as 'great' and 'book' which you diagnosed (correctly) as being mishandled.  The

显示调试负载输出（第一行是输入的输入;其余的是程序的输出）：

#include <stdio.h>
#include <ctype.h>
#include <assert.h>

#define LETTER(x) isalpha(x)
#define MAX(x, y) (((x) > (y)) ? (x) : (y))

static int stem(const char *s, int begin, int end)
{
    int len = end - begin + 1;
    assert(s != 0);
    printf("ST (%d,%d) <<%*.*s>> RV %d\n", begin, end, len, len, s, len);
//    return MAX(end - begin - 3, 3);
    return len;
}

static void stemstring(const char *src)
{
    char ch;
    printf("-->> stemstring: <<%s>>\n", src);
    while ((ch = *src++) != '\0')
    {
        if (ch != '\0')
            printf("LP <<%c%s>>\n", ch, src);
        if (LETTER(ch))
        {
            int i = 0;
            char s[1024];
            s[i++] = ch;
            while ((ch = *src++) != '\0' && LETTER(ch))
                s[i++] = ch;
            src--;
            s[i] = '\0';
            printf("RD (%d) <<%s>>\n", i, s);
            s[stem(s, 0, i-1)+1] = '\0';
            /* the previous line calls the stemmer and uses its result to
               zero-terminate the string in s */
            printf("RS <<%s>>\n", s);
        }
        else
            printf("NL <<%c>>\n", ch);
    }
    //putchar('\n');
    printf("<<-- stemstring\n");
}

int main(void)
{
    char string[1024];
    while (scanf("%1023s", string) == 1)
        stemstring(string);
    return(0);
}

显示的技术 - 在程序的关键点打印诊断信息 - 是调试此类程序的一种方法。另一种方法是使用源代码调试器 - what a great book this is! What.hast.thou.done? -->> stemstring: <<what>> LP <<what>> RD (4) <<what>> ST (0,3) <<what>> RV 4 RS <<what>> <<-- stemstring -->> stemstring: <<a>> LP <<a>> RD (1) <<a>> ST (0,0) <<a>> RV 1 RS <<a>> <<-- stemstring -->> stemstring: <<great>> LP <<great>> RD (5) <<great>> ST (0,4) <<great>> RV 5 RS <<great>> <<-- stemstring -->> stemstring: <<book>> LP <<book>> RD (4) <<book>> ST (0,3) <<book>> RV 4 RS <<book>> <<-- stemstring -->> stemstring: <<this>> LP <<this>> RD (4) <<this>> ST (0,3) <<this>> RV 4 RS <<this>> <<-- stemstring -->> stemstring: <<is!>> LP <<is!>> RD (2) <<is>> ST (0,1) <<is>> RV 2 RS <<is>> LP <<!>> NL <<!>> <<-- stemstring -->> stemstring: <<What.hast.thou.done?>> LP <<What.hast.thou.done?>> RD (4) <<What>> ST (0,3) <<What>> RV 4 RS <<What>> LP <<.hast.thou.done?>> NL <<.>> LP <<hast.thou.done?>> RD (4) <<hast>> ST (0,3) <<hast>> RV 4 RS <<hast>> LP <<.thou.done?>> NL <<.>> LP <<thou.done?>> RD (4) <<thou>> ST (0,3) <<thou>> RV 4 RS <<thou>> LP <<.done?>> NL <<.>> LP <<done?>> RD (4) <<done>> ST (0,3) <<done>> RV 4 RS <<done>> LP <<?>> NL <<?>> <<-- stemstring或其等效代码逐步执行代码。我可能更经常使用print语句，但我发现IDE太难以使用（因为它们的行为不像我习惯的命令行）。

当然，它不再是你的代码，但我认为你应该能够自己做大部分的调试。我很感激你用我的代码报告了问题。但是，您还需要学习如何诊断其他人的代码中的问题;如何测量它;如何表征和定位问题。然后，您可以精确地报告问题 - “您对单词条件的结束感到困惑，并且......”。

如何更改此程序的输入？（C语言）

2 个答案:

示例对话

下一次尝试

如何更改此程序的输入？ （C语言）

2 个答案:

示例对话

下一次尝试

如何更改此程序的输入？（C语言）