Question

此问题基于以下代码：http://nlp.stanford.edu/projects/glove/

以下代码，表现如我所料。它回显来自：stdin的用户输入。

stdin：标准输入流是应用程序的默认数据源。在大多数系统中，它通常默认指向键盘。

键入文本并按Enter键，该文本回显到控制台。正常，预期。

// _CRT_SECURE_NO_WARNINGS:
#pragma warning(disable : 4996)

#include <stdio.h>
#include <stdlib.h>

int main()
{

// Char as int:
int ch;

// File Pointer:
FILE *fid;

// Open the File: Stream
fid = fopen("<Path to simple text file>/text.txt", "r");

// Loop through Chars:
while (1)
{

// Check valid Stream:
if (fid == NULL)
{
printf("Stream Error: File was not opened!\n");
break;
}

// If EOF:
if (feof(fid))
{
break;
}

// Get C:
ch = fgetc(fid);

// Print C:
printf("%c", ch);
}

// Close the File:
fclose(fid);


// Open the File: Stream
fid = stdin;

// Loop through Chars:
while (1)
{

// Check valid Stream:
if (fid == NULL)
{
printf("Stream Error: File was not opened!\n");
break;
}

// If EOF:
if (feof(fid))
{
break;
}

// Get C:
ch = fgetc(fid);

// Print C:
printf("%c", ch);
}


int i = 0;

return i;
}

示例源代码： http://nlp.stanford.edu/projects/glove/ - 特别是来自 coocur.c 代码文件的第301行

在此代码中：

fid = fopen(vocab_file,"r");
if(fid == NULL) {fprintf(stderr,"Unable to open vocab file %s.\n",vocab_file); return 1;}
while(fscanf(fid, format, str, &id) != EOF) hashinsert(vocab_hash, str, ++j); // Here id is not used: inserting vocab words into hash table with their frequency rank, j
fclose(fid);
vocab_size = j;
j = 0;
if(verbose > 1) fprintf(stderr, "loaded %lld words.\nBuilding lookup table...", vocab_size);

/* Build auxiliary lookup table used to index into bigram_table */
lookup = (long long *)calloc( vocab_size + 1, sizeof(long long) );
if (lookup == NULL) {
    fprintf(stderr, "Couldn't allocate memory!");
    return 1;
}
lookup[0] = 1;
for(a = 1; a <= vocab_size; a++) {
    if((lookup[a] = max_product / a) < vocab_size) lookup[a] += lookup[a-1];
    else lookup[a] = lookup[a-1] + vocab_size;
}
if(verbose > 1) fprintf(stderr, "table contains %lld elements.\n",lookup[a-1]);

/* Allocate memory for full array which will store all cooccurrence counts for words whose product of frequency ranks is less than max_product */
bigram_table = (real *)calloc( lookup[a-1] , sizeof(real) );
if (bigram_table == NULL) {
    fprintf(stderr, "Couldn't allocate memory!");
    return 1;
}

fid = stdin; // <<<--- STDIN Stream Redirect
sprintf(format,"%%%ds",MAX_STRING_LENGTH);
sprintf(filename,"%s_%04d.bin",file_head, fidcounter);
foverflow = fopen(filename,"w");
if(verbose > 1) fprintf(stderr,"Processing token: 0");

/* For each token in input stream, calculate a weighted cooccurrence sum within window_size */
while (1) {
    if(ind >= overflow_length - window_size) { // If overflow buffer is (almost) full, sort it and write it to temporary file
        qsort(cr, ind, sizeof(CREC), compare_crec);
        write_chunk(cr,ind,foverflow);
        fclose(foverflow);
        fidcounter++;
        sprintf(filename,"%s_%04d.bin",file_head,fidcounter);
        foverflow = fopen(filename,"w");
        ind = 0;
    }
    flag = get_word(str, fid); // <<<--- Reading from the Vocab, not STDIN
    if(feof(fid)) break;
    if(flag == 1) {j = 0; continue;} // Newline, reset line index (j)
    counter++;
    if((counter%100000) == 0) if(verbose > 1) fprintf(stderr,"\033[19G%lld",counter);
    htmp = hashsearch(vocab_hash, str); // <<<--- Using the str that was read in the function: 'get_word'
    if (htmp == NULL) continue; // Skip out-of-vocabulary words
    w2 = htmp->id; // Target word (frequency rank)
    for(k = j - 1; k >= ( (j > window_size) ? j - window_size : 0 ); k--) { // Iterate over all words to the left of target word, but not past beginning of line
        w1 = history[k % window_size]; // Context word (frequency rank)
        if ( w1 < max_product/w2 ) { // Product is small enough to store in a full array
            bigram_table[lookup[w1-1] + w2 - 2] += 1.0/((real)(j-k)); // Weight by inverse of distance between words
            if(symmetric > 0) bigram_table[lookup[w2-1] + w1 - 2] += 1.0/((real)(j-k)); // If symmetric context is used, exchange roles of w2 and w1 (ie look at right context too)
        }
        else { // Product is too big, data is likely to be sparse. Store these entries in a temporary buffer to be sorted, merged (accumulated), and written to file when it gets full.
            cr[ind].word1 = w1;
            cr[ind].word2 = w2;
            cr[ind].val = 1.0/((real)(j-k));
            ind++; // Keep track of how full temporary buffer is
            if(symmetric > 0) { // Symmetric context
                cr[ind].word1 = w2;
                cr[ind].word2 = w1;
                cr[ind].val = 1.0/((real)(j-k));
                ind++;
            }
        }
    }

我想知道，在将流更改为str之后，在flag = get_word(str, fid);方法中为stdin分配了一个单词，然后使用了两行之后：htmp = hashsearch(vocab_hash, str);

此代码对大型语料库进行了数百万次迭代，用户不会坐在那里手动输入每个单词。

如果有人能够在fid = stdin;流更改之后解释这种情况，我将非常感激。

Answer 1

对某些人来说很简单，但对其他人则不然......

stdin是默认输入流，因此可以使用stdin变量直接访问代码。所以，当有人看到，我现在有几次：

FILE *fid;
fid = stdin;

stdin流已重定向：＆＃34;某处＆＃34;如果此流不是默认流。通常在大多数机器上，默认为键盘。

在线：301 fid = fopen(vocab_file,"r"); vocab文件成为流数据源，由fopen函数返回。该文件被读取和处理。

在线：304流已关闭：fclose(fid);

在线：329 fid = stdin; stdin被指定为fid的输入流。

从那里开始，没有流变化的迹象，但有str的分配，这是来自其中一个文本文件，方法是：get_word从str分配./cooccur -verbose 2 -symmetric 0 -window-size 10 -vocab-file vocab.txt -memory 8.0 -overflow-file tempoverflow < corpus.txt > cooccurrences.bin语料库...

命令行输入是答案： -overflow-file tempoverflow＆lt; corpus.txt＆gt; cooccurrences.bin

stdin

使用：cplusplus.com


标准输入流

标准输入流是默认的数据源   应用。在大多数系统中，它通常默认指向   键盘。

stdin可以用作任何需要的函数的参数   输入流（FILE *）作为其参数之一，如fgets或fscanf。

虽然通常假设stdin的数据源是   将成为键盘，即使在常规情况下也可能不是这样   控制台系统，因为stdin通常可以在大多数情况下重定向   调用应用程序时的操作系统。对于   例如，许多系统，其中包括DOS / Windows和大多数UNIX shell，   支持以下命令语法：

myapplication＆lt; example.txt

使用文件example.txt的内容作为主要来源   myapplication而不是控制台键盘的数据。

还可以将stdin重定向到其他一些数据源   从程序中使用freopen函数。

如果已知stdin不引用交互设备，则表示流   是完全缓冲的。否则，它是依赖于库的   stream默认为行缓冲或不缓冲（请参阅setvbuf）。

所以你去了，-overflow-file tempoverflow < corpus.txt流被命令行参数重定向：corpus.txt

因此：stdin是cooccurrences.bin Stream的重定向数据源！

另外值得注意的是，stdout是fout = stdout; Stream - vial Line的重定向数据源：232 fwrite(&old, sizeof(CREC), 1, fout);并在线写入：270 def add_model(request): if request.method=="POST": form=UserForm(request.POST) if form.is_valid: model_instance=form.save(commit=False) model_instance.save()

有关以下内容的详细信息：＆＃34; Standard Input and Output Redirection＆＃34;

注意：如果您想运行此代码，请记住将控制台应用程序设置为64位 - 它不会另外分配内存！

了解代码文件中的流重定向 - 从`stdin`读取

1 个答案: