此问题基于以下代码:http://nlp.stanford.edu/projects/glove/
以下代码,表现如我所料。它回显来自:stdin
的用户输入。
stdin:标准输入流是应用程序的默认数据源。在大多数系统中,它通常默认指向键盘。
键入文本并按Enter键,该文本回显到控制台。正常,预期。
// _CRT_SECURE_NO_WARNINGS:
#pragma warning(disable : 4996)
#include <stdio.h>
#include <stdlib.h>
int main()
{
// Char as int:
int ch;
// File Pointer:
FILE *fid;
// Open the File: Stream
fid = fopen("<Path to simple text file>/text.txt", "r");
// Loop through Chars:
while (1)
{
// Check valid Stream:
if (fid == NULL)
{
printf("Stream Error: File was not opened!\n");
break;
}
// If EOF:
if (feof(fid))
{
break;
}
// Get C:
ch = fgetc(fid);
// Print C:
printf("%c", ch);
}
// Close the File:
fclose(fid);
// Open the File: Stream
fid = stdin;
// Loop through Chars:
while (1)
{
// Check valid Stream:
if (fid == NULL)
{
printf("Stream Error: File was not opened!\n");
break;
}
// If EOF:
if (feof(fid))
{
break;
}
// Get C:
ch = fgetc(fid);
// Print C:
printf("%c", ch);
}
int i = 0;
return i;
}
示例源代码: http://nlp.stanford.edu/projects/glove/ - 特别是来自 coocur.c 代码文件的第301行
在此代码中:
fid = fopen(vocab_file,"r");
if(fid == NULL) {fprintf(stderr,"Unable to open vocab file %s.\n",vocab_file); return 1;}
while(fscanf(fid, format, str, &id) != EOF) hashinsert(vocab_hash, str, ++j); // Here id is not used: inserting vocab words into hash table with their frequency rank, j
fclose(fid);
vocab_size = j;
j = 0;
if(verbose > 1) fprintf(stderr, "loaded %lld words.\nBuilding lookup table...", vocab_size);
/* Build auxiliary lookup table used to index into bigram_table */
lookup = (long long *)calloc( vocab_size + 1, sizeof(long long) );
if (lookup == NULL) {
fprintf(stderr, "Couldn't allocate memory!");
return 1;
}
lookup[0] = 1;
for(a = 1; a <= vocab_size; a++) {
if((lookup[a] = max_product / a) < vocab_size) lookup[a] += lookup[a-1];
else lookup[a] = lookup[a-1] + vocab_size;
}
if(verbose > 1) fprintf(stderr, "table contains %lld elements.\n",lookup[a-1]);
/* Allocate memory for full array which will store all cooccurrence counts for words whose product of frequency ranks is less than max_product */
bigram_table = (real *)calloc( lookup[a-1] , sizeof(real) );
if (bigram_table == NULL) {
fprintf(stderr, "Couldn't allocate memory!");
return 1;
}
fid = stdin; // <<<--- STDIN Stream Redirect
sprintf(format,"%%%ds",MAX_STRING_LENGTH);
sprintf(filename,"%s_%04d.bin",file_head, fidcounter);
foverflow = fopen(filename,"w");
if(verbose > 1) fprintf(stderr,"Processing token: 0");
/* For each token in input stream, calculate a weighted cooccurrence sum within window_size */
while (1) {
if(ind >= overflow_length - window_size) { // If overflow buffer is (almost) full, sort it and write it to temporary file
qsort(cr, ind, sizeof(CREC), compare_crec);
write_chunk(cr,ind,foverflow);
fclose(foverflow);
fidcounter++;
sprintf(filename,"%s_%04d.bin",file_head,fidcounter);
foverflow = fopen(filename,"w");
ind = 0;
}
flag = get_word(str, fid); // <<<--- Reading from the Vocab, not STDIN
if(feof(fid)) break;
if(flag == 1) {j = 0; continue;} // Newline, reset line index (j)
counter++;
if((counter%100000) == 0) if(verbose > 1) fprintf(stderr,"\033[19G%lld",counter);
htmp = hashsearch(vocab_hash, str); // <<<--- Using the str that was read in the function: 'get_word'
if (htmp == NULL) continue; // Skip out-of-vocabulary words
w2 = htmp->id; // Target word (frequency rank)
for(k = j - 1; k >= ( (j > window_size) ? j - window_size : 0 ); k--) { // Iterate over all words to the left of target word, but not past beginning of line
w1 = history[k % window_size]; // Context word (frequency rank)
if ( w1 < max_product/w2 ) { // Product is small enough to store in a full array
bigram_table[lookup[w1-1] + w2 - 2] += 1.0/((real)(j-k)); // Weight by inverse of distance between words
if(symmetric > 0) bigram_table[lookup[w2-1] + w1 - 2] += 1.0/((real)(j-k)); // If symmetric context is used, exchange roles of w2 and w1 (ie look at right context too)
}
else { // Product is too big, data is likely to be sparse. Store these entries in a temporary buffer to be sorted, merged (accumulated), and written to file when it gets full.
cr[ind].word1 = w1;
cr[ind].word2 = w2;
cr[ind].val = 1.0/((real)(j-k));
ind++; // Keep track of how full temporary buffer is
if(symmetric > 0) { // Symmetric context
cr[ind].word1 = w2;
cr[ind].word2 = w1;
cr[ind].val = 1.0/((real)(j-k));
ind++;
}
}
}
我想知道,在将流更改为str
之后,在flag = get_word(str, fid);
方法中为stdin
分配了一个单词,然后使用了两行之后:htmp = hashsearch(vocab_hash, str);
此代码对大型语料库进行了数百万次迭代,用户不会坐在那里手动输入每个单词。
如果有人能够在fid = stdin;
流更改之后解释这种情况,我将非常感激。
答案 0 :(得分:0)
对某些人来说很简单,但对其他人则不然......
stdin
是默认输入流,因此可以使用stdin
变量直接访问代码。所以,当有人看到,我现在有几次:
FILE *fid;
fid = stdin;
stdin
流已重定向:&#34;某处&#34;如果此流不是默认流。通常在大多数机器上,默认为键盘。
在线:301 fid = fopen(vocab_file,"r");
vocab文件成为流数据源,由fopen
函数返回。该文件被读取和处理。
在线:304流已关闭:fclose(fid);
在线:329 fid = stdin;
stdin
被指定为fid
的输入流。
从那里开始,没有流变化的迹象,但有str
的分配,这是来自其中一个文本文件,方法是:get_word
从str
分配./cooccur -verbose 2 -symmetric 0 -window-size 10 -vocab-file vocab.txt -memory 8.0 -overflow-file tempoverflow < corpus.txt > cooccurrences.bin
语料库...
命令行输入是答案: -overflow-file tempoverflow&lt; corpus.txt&gt; cooccurrences.bin 强>
stdin
标准输入流
标准输入流是默认的数据源 应用。在大多数系统中,它通常默认指向 键盘。
stdin可以用作任何需要的函数的参数 输入流(FILE *)作为其参数之一,如fgets或fscanf。
虽然通常假设stdin的数据源是 将成为键盘,即使在常规情况下也可能不是这样 控制台系统,因为stdin通常可以在大多数情况下重定向 调用应用程序时的操作系统。对于 例如,许多系统,其中包括DOS / Windows和大多数UNIX shell, 支持以下命令语法:
myapplication&lt; example.txt
使用文件example.txt的内容作为主要来源 myapplication而不是控制台键盘的数据。
还可以将stdin重定向到其他一些数据源 从程序中使用freopen函数。
如果已知stdin不引用交互设备,则表示流 是完全缓冲的。否则,它是依赖于库的 stream默认为行缓冲或不缓冲(请参阅setvbuf)。
所以你去了,-overflow-file tempoverflow < corpus.txt
流被命令行参数重定向:corpus.txt
因此:stdin
是cooccurrences.bin
Stream的重定向数据源!
另外值得注意的是,stdout
是fout = stdout;
Stream - vial Line的重定向数据源:232 fwrite(&old, sizeof(CREC), 1, fout);
并在线写入:270 def add_model(request):
if request.method=="POST":
form=UserForm(request.POST)
if form.is_valid:
model_instance=form.save(commit=False)
model_instance.save()
有关以下内容的详细信息:&#34; Standard Input and Output Redirection&#34;
注意:如果您想运行此代码,请记住将控制台应用程序设置为64位 - 它不会另外分配内存!