将希腊语单词转换为大写

时间:2014-06-05 17:08:40

标签: c pointers crossword

我必须创建一个函数来读取一个名为grwords.txt的文件,其中包含大约540000个用希腊字母书写的单词。

我必须将这些单词转换为大写并填充一个名为char **words的数组。

这是我到目前为止所做的。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <windows.h>
#include <ctype.h>


void fp();

int main(int argc, char *argv[]) {

    SetConsoleOutputCP(1253);

    fp();
    return 0;
}

void fp(){
    char **words;
    words = malloc(546490 * sizeof(int *));
    for (i = 0; i < 546490; i++)
             words[i] = malloc(24 * sizeof(int));
    FILE *file;
    char *word;
    size_t cnt;

    file = fopen("grwords.txt", "rt");
    if (file == NULL){
        printf("File cannot be opened.\n");
        exit(1);
    }
    cnt = 0;
    while (1==fscanf(file, "%24s",word)){
        if (cnt == 546490)
            break;
        strcpy(words[cnt++], word);
    }
    fclose(file);
}

我还在试图找出指针。我知道&从值开始指针,*从指针生成一个值。更新了程序,它成功地用文件中的单词填充数组!我仍然不知道如何将希腊语小写转换为大写。

1 个答案:

答案 0 :(得分:1)

处理希腊语单词可能取决于您的平台。

首先,您需要了解文件处理的工作原理。这是我写的:

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define bufSize 1024 // max lenght of word
// we are going to receive the .txt from cmd line
int main(int argc, char *argv[])
{
  FILE *fp;

  // Assume file has max 10 words
  const size_t N = 10;

  // Allocate a 2D array of N rows
  // and bufSize columns.
  // You can think of it like an array
  // of N strings, where every string
  // has, at most, bufSize length.
  char buf[N][bufSize];

  // make sure we got the .txt
  if (argc != 2)
  {
    fprintf(stderr,
            "Usage: %s <soure-file>\n", argv[0]);
    return 1;
  }

  // open the file
  if ((fp = fopen(argv[1], "r")) == NULL)
  { /* Open source file. */
    perror("fopen source-file");
    return 1;
  }

  // we will use that for toupper()
  char c;

  // counters
  int i = 0, j;


  while (fscanf(fp, "%1024s", buf[i]) == 1)
  { /* While we don't reach the end of source. */
    /* Read characters from source file to fill buffer. */

    // print what we read
    printf("%s\n", buf[i]);

    j = 0;
    // while we are on a letter of word placed
    // in buf[i]
    while (buf[i][j])
    {
      // make the letter capital and print it
      c = buf[i][j];
      putchar (toupper(c));
      j++;
    }
    i++;
    printf("\ndone with this word\n");
  }
  // close the file
  fclose(fp);

  return 0;
}

对于此test.txt文件:

Georgios
Samaras
Γιώργος
Σαμαράς

代码将以:

运行
./exe test.txt
Georgios
GEORGIOS
done with this word
Samaras
SAMARAS
done with this word
Γιώργος
Γιώργος
done with this word
Σαμαράς
Σαμαράς
done with this word

正如你所看到的,我可以阅读希腊语单词,但未能将它们转换为大写单词。

一旦你掌握了文件处理的方式,就需要使用宽字符来读取希腊文字的文件。

因此,通过修改上面的代码,我们得到:

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <wchar.h>
#include <wctype.h>
#include <locale.h>

#define bufSize 1024

int main(int argc, char *argv[])
{
  setlocale(LC_CTYPE, "en_GB.UTF-8");
  FILE *fp;
  const size_t N = 15;
  wchar_t buf[N][bufSize];
  if (argc != 2)
  {
    fprintf(stderr,
            "Usage: %s <soure-file>\n", argv[0]);
    return 1;
  }
  if ((fp = fopen(argv[1], "r")) == NULL)
  {
    perror("fopen source-file");
    return 1;
  }
  wchar_t c;
  int i = 0, j;
  while (fwscanf(fp, L"%ls", buf[i]) == 1)
  {
    wprintf( L"%ls\n\n", buf[i]);
    j = 0;
    while (buf[i][j])
    {
      c = buf[i][j];
      putwchar (towupper(c));
      j++;
    }
    i++;
    wprintf(L"\ndone with this word\n");
  }
  fclose(fp);
  return 0;
}

现在输出是这样的:

Georgios

GEORGIOS
done with this word
Samaras

SAMARAS
done with this word
Γιώργος

ΓΙΏΡΓΟΣ
done with this word
Σαμαράς

ΣΑΜΑΡΆΣ
done with this word

我看到你可能想要创建一个读取单词的函数。如果您需要C语言中的简单函数示例,则可以访问我的伪站点here

至于我上面提到的2D阵列,这张图片可能会有所帮助:

enter image description here

其中N是行数(等于4),M是列数(等于5)。在上面的代码中,N是N,M是bufSize。我解释了更多here,您是否也可以找到动态分配2D数组的代码。

我知道你在 Windows 上。我在 Ubuntu 中测试了代码。

对于 Windows ,您可能需要仔细查看此question

因此,在阅读完所有内容并理解它们之后,您可以看到动态内存管理所要求的内容。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#include <locale.h>

#define bufSize 1024

wchar_t **get(int N, int M);
void free2Darray(wchar_t** p, int N);

int main(int argc, char *argv[])
{
  setlocale(LC_CTYPE, "en_GB.UTF-8");
  FILE *fp;
  const size_t N = 15;
  wchar_t** buf = get(N, bufSize);
  if (argc != 2)
  {
    fprintf(stderr,
            "Usage: %s <soure-file>\n", argv[0]);
    return 1;
  }
  if ((fp = fopen(argv[1], "r")) == NULL)
  {
    perror("fopen source-file");
    return 1;
  }
  wchar_t c;
  int i = 0, j;
  while (fwscanf(fp, L"%ls", buf[i]) == 1)
  {
    wprintf( L"%ls\n", buf[i]);
    j = 0;
    while (buf[i][j])
    {
      c = buf[i][j];
      putwchar (towupper(c));
      j++;
    }
    i++;
    wprintf(L"\ndone with this word\n");
  }
  fclose(fp);
  // NEVER FORGET, FREE THE DYNAMIC MEMORY
  free2Darray(buf, N);
  return 0;
}

// We return the pointer
wchar_t **get(int N, int M) /* Allocate the array */
{
    /* Check if allocation succeeded. (check for NULL pointer) */
    int i;
    wchar_t **table;
    table = malloc(N*sizeof(wchar_t *));
    for(i = 0 ; i < N ; i++)
        table[i] = malloc( M*sizeof(wchar_t) );
    return table;
}

void free2Darray(wchar_t** p, int N)
{
    int i;
    for(i = 0 ; i < N ; i++)
        free(p[i]);
    free(p);
}

请注意,此代码可用于 Linux (在Ubuntu 12.04上测试),而不适用于Windows(在Win 7上测试)。