了解代码文件中的流重定向 - 从`stdin`读取

时间:2017-01-28 07:38:35

标签: c

此问题基于以下代码:http://nlp.stanford.edu/projects/glove/

以下代码,表现如我所料。它回显来自:stdin的用户输入。

  

stdin:标准输入流是应用程序的默认数据源。在大多数系统中,它通常默认指向键盘。

键入文本并按Enter键,该文本回显到控制台。正常,预期。

// _CRT_SECURE_NO_WARNINGS:
#pragma warning(disable : 4996)

#include <stdio.h>
#include <stdlib.h>

int main()
{

// Char as int:
int ch;

// File Pointer:
FILE *fid;

// Open the File: Stream
fid = fopen("<Path to simple text file>/text.txt", "r");

// Loop through Chars:
while (1)
{

// Check valid Stream:
if (fid == NULL)
{
printf("Stream Error: File was not opened!\n");
break;
}

// If EOF:
if (feof(fid))
{
break;
}

// Get C:
ch = fgetc(fid);

// Print C:
printf("%c", ch);
}

// Close the File:
fclose(fid);


// Open the File: Stream
fid = stdin;

// Loop through Chars:
while (1)
{

// Check valid Stream:
if (fid == NULL)
{
printf("Stream Error: File was not opened!\n");
break;
}

// If EOF:
if (feof(fid))
{
break;
}

// Get C:
ch = fgetc(fid);

// Print C:
printf("%c", ch);
}


int i = 0;

return i;
}

示例源代码: http://nlp.stanford.edu/projects/glove/ - 特别是来自 coocur.c 代码文件的第301行

在此代码中:

fid = fopen(vocab_file,"r");
if(fid == NULL) {fprintf(stderr,"Unable to open vocab file %s.\n",vocab_file); return 1;}
while(fscanf(fid, format, str, &id) != EOF) hashinsert(vocab_hash, str, ++j); // Here id is not used: inserting vocab words into hash table with their frequency rank, j
fclose(fid);
vocab_size = j;
j = 0;
if(verbose > 1) fprintf(stderr, "loaded %lld words.\nBuilding lookup table...", vocab_size);

/* Build auxiliary lookup table used to index into bigram_table */
lookup = (long long *)calloc( vocab_size + 1, sizeof(long long) );
if (lookup == NULL) {
    fprintf(stderr, "Couldn't allocate memory!");
    return 1;
}
lookup[0] = 1;
for(a = 1; a <= vocab_size; a++) {
    if((lookup[a] = max_product / a) < vocab_size) lookup[a] += lookup[a-1];
    else lookup[a] = lookup[a-1] + vocab_size;
}
if(verbose > 1) fprintf(stderr, "table contains %lld elements.\n",lookup[a-1]);

/* Allocate memory for full array which will store all cooccurrence counts for words whose product of frequency ranks is less than max_product */
bigram_table = (real *)calloc( lookup[a-1] , sizeof(real) );
if (bigram_table == NULL) {
    fprintf(stderr, "Couldn't allocate memory!");
    return 1;
}

fid = stdin; // <<<--- STDIN Stream Redirect
sprintf(format,"%%%ds",MAX_STRING_LENGTH);
sprintf(filename,"%s_%04d.bin",file_head, fidcounter);
foverflow = fopen(filename,"w");
if(verbose > 1) fprintf(stderr,"Processing token: 0");

/* For each token in input stream, calculate a weighted cooccurrence sum within window_size */
while (1) {
    if(ind >= overflow_length - window_size) { // If overflow buffer is (almost) full, sort it and write it to temporary file
        qsort(cr, ind, sizeof(CREC), compare_crec);
        write_chunk(cr,ind,foverflow);
        fclose(foverflow);
        fidcounter++;
        sprintf(filename,"%s_%04d.bin",file_head,fidcounter);
        foverflow = fopen(filename,"w");
        ind = 0;
    }
    flag = get_word(str, fid); // <<<--- Reading from the Vocab, not STDIN
    if(feof(fid)) break;
    if(flag == 1) {j = 0; continue;} // Newline, reset line index (j)
    counter++;
    if((counter%100000) == 0) if(verbose > 1) fprintf(stderr,"\033[19G%lld",counter);
    htmp = hashsearch(vocab_hash, str); // <<<--- Using the str that was read in the function: 'get_word'
    if (htmp == NULL) continue; // Skip out-of-vocabulary words
    w2 = htmp->id; // Target word (frequency rank)
    for(k = j - 1; k >= ( (j > window_size) ? j - window_size : 0 ); k--) { // Iterate over all words to the left of target word, but not past beginning of line
        w1 = history[k % window_size]; // Context word (frequency rank)
        if ( w1 < max_product/w2 ) { // Product is small enough to store in a full array
            bigram_table[lookup[w1-1] + w2 - 2] += 1.0/((real)(j-k)); // Weight by inverse of distance between words
            if(symmetric > 0) bigram_table[lookup[w2-1] + w1 - 2] += 1.0/((real)(j-k)); // If symmetric context is used, exchange roles of w2 and w1 (ie look at right context too)
        }
        else { // Product is too big, data is likely to be sparse. Store these entries in a temporary buffer to be sorted, merged (accumulated), and written to file when it gets full.
            cr[ind].word1 = w1;
            cr[ind].word2 = w2;
            cr[ind].val = 1.0/((real)(j-k));
            ind++; // Keep track of how full temporary buffer is
            if(symmetric > 0) { // Symmetric context
                cr[ind].word1 = w2;
                cr[ind].word2 = w1;
                cr[ind].val = 1.0/((real)(j-k));
                ind++;
            }
        }
    }

我想知道,在将流更改为str之后,在flag = get_word(str, fid);方法中为stdin分配了一个单词,然后使用了两行之后:htmp = hashsearch(vocab_hash, str);

此代码对大型语料库进行了数百万次迭代,用户不会坐在那里手动输入每个单词。

如果有人能够在fid = stdin;流更改之后解释这种情况,我将非常感激。

1 个答案:

答案 0 :(得分:0)

对某些人来说很简单,但对其他人则不然......

stdin是默认输入流,因此可以使用stdin变量直接访问代码。所以,当有人看到,我现在有几次:

FILE *fid;
fid = stdin;

stdin流已重定向:&#34;某处&#34;如果此流不是默认流。通常在大多数机器上,默认为键盘。

在线:301 fid = fopen(vocab_file,"r"); vocab文件成为流数据源,由fopen函数返回。该文件被读取和处理。

在线:304流已关闭:fclose(fid);

在线:329 fid = stdin; stdin被指定为fid的输入流。

从那里开始,没有流变化的迹象,但有str的分配,这是来自其中一个文本文件,方法是:get_wordstr分配./cooccur -verbose 2 -symmetric 0 -window-size 10 -vocab-file vocab.txt -memory 8.0 -overflow-file tempoverflow < corpus.txt > cooccurrences.bin 语料库...

命令行输入是答案: -overflow-file tempoverflow&lt; corpus.txt&gt; cooccurrences.bin

stdin

使用:cplusplus.com

  

标准输入流

     

标准输入流是默认的数据源   应用。在大多数系统中,它通常默认指向   键盘。

     

stdin可以用作任何需要的函数的参数   输入流(FILE *)作为其参数之一,如fgets或fscanf。

     

虽然通常假设stdin的数据源是   将成为键盘,即使在常规情况下也可能不是这样   控制台系统,因为stdin通常可以在大多数情况下重定向   调用应用程序时的操作系统。对于   例如,许多系统,其中包括DOS / Windows和大多数UNIX shell,   支持以下命令语法:

     

myapplication&lt; example.txt

     

使用文件example.txt的内容作为主要来源   myapplication而不是控制台键盘的数据。

     

还可以将stdin重定向到其他一些数据源   从程序中使用freopen函数。

     

如果已知stdin不引用交互设备,则表示流   是完全缓冲的。否则,它是依赖于库的   stream默认为行缓冲或不缓冲(请参阅setvbuf)。

所以你去了,-overflow-file tempoverflow < corpus.txt流被命令行参数重定向:corpus.txt

因此:stdincooccurrences.bin Stream的重定向数据源!

另外值得注意的是,stdoutfout = stdout; Stream - vial Line的重定向数据源:232 fwrite(&old, sizeof(CREC), 1, fout);并在线写入:270 def add_model(request): if request.method=="POST": form=UserForm(request.POST) if form.is_valid: model_instance=form.save(commit=False) model_instance.save()

有关以下内容的详细信息:&#34; Standard Input and Output Redirection&#34;

注意:如果您想运行此代码,请记住将控制台应用程序设置为64位 - 它不会另外分配内存!