如何创建具有特定长度的单词列表

时间:2018-09-03 07:51:32

标签: c

我目前正在编写C程序来创建具有特定长度的单词列表。

例如,我有一个十个字母的字符串

ABCDEFGHIJ

并想创建一个长度为2的单词列表

AB,BC,CD,DE,EF,FG,GH,HI,IJ

目前我只能创建

AB,CD,EF,GH,IJ

使用我的编码

#include<stdio.h>
#include<string.h>
int main()
{
    /*q=length of query sequence, W=length of substring, t=(q-w)+1*/
    char array[5][3],length[3];
    int i=1;
    FILE *input;

    input=fopen("string.txt", "r");

    /*stores and prints the data from the string*/
        while(fgets(length,3,input))
        {
            strcpy(array[i],length);
            printf(">%d\n%s\n\n",i,array[i]);
            i++;
         }
    return 0;
}

如何将数组指向从第二个字母而不是第三个字母开始的下一个单词?

谢谢。

2 个答案:

答案 0 :(得分:0)

鉴于您的问题,您需要逐个字符地读取文件字符,而不是2个字符地读取字符:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(void)
{
    char str[3] = "";
    FILE *input = NULL;

    if (!(input = fopen("string.txt", "r"))) {
        // Error
        return (EXIT_FAILURE);
    }

    if (!fgets(str,3,input)) {
        // Error ?
    }
    printf("%s\n", str);

    for(int c = fgetc(input); c != EOF; c = fgetc(input)) {
        str[0] = str[1];
        str[1] = c;
        printf("%s\n", str);
    }
    return (EXIT_SUCCESS);
}

但是更好的方法是读取整行(如在Linux上使用getline一样),并仅显示2 char by 2 char ...

----------


好的,这不是SO的实际工作方式,但是我要给你一段代码。 但在此之前,一些一般性建议:

首先以很少的数据测试代码。 我的意思是,用10个字符的输入和2的len进行测试是好的,用3K +字符的输入和100+的len做测试是疯狂的。您如何看待您的代码是好是坏?

将代码剪切成小函数。 并对这些功能进行一些“统一”测试。这样,如果发生故障,您将可以在没有调试器的情况下快速发现该错误。如果检索数据的功能起作用,但最终结果有误,那么构造解决方案时可能问题就出在这里。如果结果很复杂,但是文件的最终价值很低,那么可能是写函数出错了。等等

使用str *函数。 用len 2的字符串手动向左移动是可以的,但是使用115的字符串进行手动移动是疯狂的!您花了多少时间?


这是一段代码,它将字符串作为输入并执行您想要的操作。 由于数组可以有任何长度,因此数组的最后一个值为NULL。 这就像一个字符串:要知道len,您要搜索特殊值'\ 0'。在这里,它不是'\ 0',而是NULL。

由于calloc将结果数组和字符串的结果很好,这是因为calloc隐式将所有数组指针指向NULL,并将所有字符串内容设置为\ 0。 这就是为什么strncpy不会失败。

// TODO Do the description
char** Magic(const char * const input, size_t len)
{
    char   **array          = NULL;
    size_t arraySize        = 0;
    size_t inputLen         = strlen(input);
    char   **returnFunction = NULL;

    // Basic check (assert ?)
    if (!len) {
        // TODO Error message
        goto END_FUNCTION;
    }

    // Calcul of the array length
    if (len <= inputLen) {
        arraySize = inputLen - len + 1;
    }
    // Last array value must be NULL in order to know the array length
    ++arraySize;

    if (!(array = calloc(arraySize, sizeof(*array)))) {
        // TODO Error message
        goto END_FUNCTION;
    }

    for (size_t i = 0; i < arraySize - 1; ++i) {
        if (!(array[i] = calloc(len + 1, sizeof(*array[i])))) {
            // TODO Error message
            goto END_FUNCTION;
        }
        strncpy(array[i], input + i, len);
    }

    returnFunction = array;
    array = NULL;

    /* GOTO */END_FUNCTION:
    if (array) {
        for (size_t i = 0; i < arraySize; ++i) {
            free(array[i]);
        }
        free(array);
    }
    return (returnFunction);
}



int main(void)
{
    char   *input   = "ABCDEFGIHJ";
    size_t inputLen = strlen(input);
    char   **result = NULL;

    for (size_t len = 1; len <= inputLen; ++len) {
        // Get result
        if (!(result = Magic(input, len))) {
            // TODO error log ?
            continue;
        }
        // Display result
        printf("result for %zu len : ", len);
        for (size_t i = 0; result[i]; ++i) {
            printf("%s%s", result[i], result[i + 1] ? ", " : "\n");
        }
        // Free result
        for (size_t i = 0; result[i]; ++i) {
            free(result[i]);
        }
        free(result);
        result = NULL;
    }

    return (EXIT_SUCCESS);
}

剩下的就是:检索数据(然后在“ input”参数中使用它)并显示/写入数据。 对于这两项,执行功能

然后测试。很多测试。 不要忘记检查调用函数时是否失败。

答案 1 :(得分:0)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(void)
{
char wmers[115];
int i = 1, j = 2;

FILE *input = NULL;
FILE *output;

if (!(input = fopen("exon11.txt", "r"))) {
    // Error
    return (EXIT_FAILURE);
}

if (!fgets(wmers,115,input)) {
    // Error ?
}
printf(">%d\n%s\n\n",i,wmers);
i++;
output=fopen("2.txt", "w");
fputs(wmers,output);
for(int c = fgetc(input); c != EOF; c = fgetc(input))
    {
        wmers[0] = wmers[1];
        wmers[1] = wmers[2];
        wmers[2] = wmers[3];
        wmers[3] = wmers[4];
        wmers[4] = wmers[5];
        wmers[5] = wmers[6];
        wmers[6] = wmers[7];
        wmers[7] = wmers[8];
        wmers[8] = wmers[9];
        wmers[9] = wmers[10];
        wmers[10] = wmers[11];
        wmers[11] = wmers[12];
        wmers[12] = wmers[13];
        wmers[13] = wmers[14];
        wmers[14] = wmers[15];
        wmers[15] = wmers[16];
        wmers[16] = wmers[17];
        wmers[17] = wmers[18];
        wmers[18] = wmers[19];
        wmers[19] = wmers[20];
        wmers[20] = wmers[21];
        wmers[21] = wmers[22];
        wmers[22] = wmers[23];
        wmers[23] = wmers[24];
        wmers[24] = wmers[25];
        wmers[25] = wmers[26];
        wmers[26] = wmers[27];
        wmers[27] = wmers[28];
        wmers[28] = wmers[29];
        wmers[29] = wmers[30];
        wmers[30] = wmers[31];
        wmers[31] = wmers[32];
        wmers[32] = wmers[33];
        wmers[33] = wmers[34];
        wmers[34] = wmers[35];
        wmers[35] = wmers[36];
        wmers[36] = wmers[37];
        wmers[37] = wmers[38];
        wmers[38] = wmers[39];
        wmers[39] = wmers[40];
        wmers[40] = wmers[41];
        wmers[41] = wmers[42];
        wmers[42] = wmers[43];
        wmers[43] = wmers[44];
        wmers[44] = wmers[45];
        wmers[45] = wmers[46];
        wmers[46] = wmers[47];
        wmers[47] = wmers[48];
        wmers[48] = wmers[49];
        wmers[49] = wmers[50];
        wmers[50] = wmers[51];
        wmers[51] = wmers[52];
        wmers[52] = wmers[53];
        wmers[53] = wmers[54];
        wmers[54] = wmers[55];
        wmers[55] = wmers[56];
        wmers[56] = wmers[57];
        wmers[57] = wmers[58];
        wmers[58] = wmers[59];
        wmers[59] = wmers[60];
        wmers[60] = wmers[61];
        wmers[61] = wmers[62];
        wmers[62] = wmers[63];
        wmers[63] = wmers[64];
        wmers[64] = wmers[65];
        wmers[65] = wmers[66];
        wmers[66] = wmers[67];
        wmers[67] = wmers[68];
        wmers[68] = wmers[69];
        wmers[69] = wmers[70];
        wmers[70] = wmers[71];
        wmers[71] = wmers[72];
        wmers[72] = wmers[73];
        wmers[73] = wmers[74];
        wmers[74] = wmers[75];
        wmers[75] = wmers[76];
        wmers[76] = wmers[77];
        wmers[77] = wmers[78];
        wmers[78] = wmers[79];
        wmers[79] = wmers[80];
        wmers[80] = wmers[81];
        wmers[81] = wmers[82];
        wmers[82] = wmers[83];
        wmers[83] = wmers[84];
        wmers[84] = wmers[84];
        wmers[85] = wmers[86];
        wmers[86] = wmers[87];
        wmers[87] = wmers[88];
        wmers[88] = wmers[89];
        wmers[89] = wmers[90];
        wmers[90] = wmers[91];
        wmers[91] = wmers[92];
        wmers[92] = wmers[93];
        wmers[93] = wmers[94];
        wmers[94] = wmers[95];
        wmers[95] = wmers[96];
        wmers[96] = wmers[97];
        wmers[97] = wmers[98];
        wmers[98] = wmers[99];
        wmers[99] = wmers[100];
        wmers[100] = wmers[101];
        wmers[101] = wmers[102];
        wmers[102] = wmers[103];
        wmers[103] = wmers[104];
        wmers[104] = wmers[105];
        wmers[105] = wmers[106];
        wmers[106] = wmers[107];
        wmers[107] = wmers[108];
        wmers[108] = wmers[109];
        wmers[109] = wmers[110];
        wmers[110] = wmers[111];
        wmers[111] = wmers[112];
        wmers[112] = wmers[113];
        wmers[113] = wmers[114];
        wmers[114] = c;

    printf(">%d\n%s\n\n",j,wmers);
    j++;

    fputs(wmers,output);
    }

return (EXIT_SUCCESS);
}

这是exon11.txt的内容。

  

CTGCTTGTGAATTTTCTGAGACGGATGTAACAAATACTGAACATCATCAACCCAGTAATAATGATTTGAACACCACTGAGAAGCGTGCAGCTGAGAGGCATCCAGAAAAGTATCAGGGTAGTTCTGTTTCAAACTTGCATGTGGAGCCATGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGAAAAGGCTGAATTCTGTAATAAAAGCAAACAGCCTGGCTTAGCAAGGAGCCAACATAACAGATGGGCTGGAAGTAAGGAAACATGTAATGATAGGCGGACTCCCAGCACAGAAAAAAAGGTAGATCTGAATGCTGATCCCCTGTGTGAGAGAAAAGAATGGAATAAGCAGAAACTGCCATGCTCAGAGAATCCTAGAGATACTGAAGATGTTCCTTGGATAACACTAAATAGCAGCATTCAGAAAGTTAATGAGTGGTTTTCCAGAAGTGATGAACTGTTAGGTTCTGATGACTCACATGATGGGGAGTCTGAATCAAATGCCAAAGTAGCTGATGTATTGGACGTTCTAAATGAGGTAGATGAATATTCTGGTTCTTCAGAGAAAATAGACTTACTGGCCAGTGATCCTCATGAGGCTTTAATATGTAAAAGTGAAAGAGTTCACTCCAAATCAGTAGAGAGTAATATTGAAGACAAAATATTTGGGAAAACCTATCGGAAGAAGGCAAGCCTCCCCAACTTAAGCCATGTAACTGAAAATCTAATTATAGGAGCATTTGTTACTGAGCCACAGATAATACAAGAGCGTCCCCTCACAAATAAATTAAAGCGTAAAAGGAGACCTACATCAGGCCTTCATCCTGAGGATTTTATCAAGAAAGCAGATTTGGCAGTTCAAAAGACTCCTGAAATGATAAATCAGGGAACTAACCAAACGGAGCAGAATGGTCAAGTGATGAATATTACTAATAGTGGTCATGAGAATAA AACAAAAGGTGATTCTATTCAGAATGAGAAAAATCCTAACCCAATAGAATCACTCGAAAAAGAATCTGCTTTCAAAACGAAAGCTGAACCTATAAGCAGCAGTATAAGCAATATGGAACTCGAATTAAATATCCACAATTCAAAAGCACCTAAAAAGAATAGGCTGAGGAGGAAGTCTTCTACCAGGCATATTCATGCGCTTGAACTAGTAGTCAGTAGAAATCTAAGCCCACCTAATTGTACTGAATTGCAAATTGATAGTTGTTCTAGCAGTGAAGAGATAAAGAAAAAAAAGTACAACCAAATGCCAGTCAGGCACAGCAGAAACCTACAACTCATGGAAGGTAAAGAACCTGCAACTGGAGCCAAGAAGAGTAACAAGCCAAATGAACAGACAAGTAAAAGACATGACAGCGATACTTTCCCAGAGCTGAAGTTAACAAATGCACCTGGTTCTTTTACTAAGTGTTCAAATACCAGTGAACTTAAAGAATTTGTCAATCCTAGCCTTCCAAGAGAAGAAAAAGAAGAGAAACTAGAAACAGTTAAAGTGTCTAATAATGCTGAAGACCCCAAAGATCTCATGTTAAGTGGAGAAAGGGTTTTGCAAACTGAAAGATCTGTAGAGAGTAGCAGTATTTCATTGGTACCTGGTACTGATTATGGCACTCAGGAAAGTATCTCGTTACTGGAAGTTAGCACTCTAGGGAAGGCAAAAACAGAACCAAATAAATGTGTGAGTCAGTGTGCAGCATTTGAAAACCCCAAGGGACTAATTCATGGTTGTTCCAAAGATAATAGAAATGACACAGAAGGCTTTAAGTATCCATTGGGACATGAAGTTAACCACAGTCGGGAAACAAGCATAGAAATGGAAGAAAGTGAACTTGATGCTCAGTATTTGCAGAATACATTCAAGGTTTCAAAGCGCCAGTCATTTGCTCCGTTTTCAAATCCAGGAAATGCAGAAGAGGAATGTGCAACATTCTCTGCCCACTCT GGGTCCTTAAAGAAACAAAGTCCAAAAGTCACTTTTGAATGTGAACAAAAGGAAGAAAATCAAGGAAAGAATGAGTCTAATATCAAGCCTGTACAGACAGTTAATATCACTGCAGGCTTTCCTGTGGTTGGTCAGAAAGATAAGCCAGTTGATAATGCCAAATGTAGTATCAAAGGAGGCTCTAGGTTTTGTCTATCATCTCAGTTCAGAGGCAACGAAACTGGACTCATTACTCCAAATAAACATGGACTTTTACAAAACCCATATCGTATACCACCACTTTTTCCCATCAAGTCATTTGTTAAAACTAAATGTAAGAAAAATCTGCTAGAGGAAAACTTTGAGGAACATTCAATGTCACCTGAAAGAGAAATGGGAAATGAGAACATTCCAAGTACAGTGAGCACAATTAGCCGTAATAACATTAGAGAAAATGTTTTTAAAGAAGCCAGCTCAAGCAATATTAATGAAGTAGGTTCCAGTACTAATGAAGTGGGCTCCAGTATTAATGAAATAGGTTCCAGTGATGAAAACATTCAAGCAGAACTAGGTAGAAACAGAGGGCCAAAATTGAATGCTATGCTTAGATTAGGGGTTTTGCAACCTGAGGTCTATAAACAAAGTCTTCCTGGAAGTAATTGTAAGCATCCTGAAATAAAAAAGCAAGAATATGAAGAAGTAGTTCAGACTGTTAATACAGATTTCTCTCCATATCTGATTTCAGATAACTTAGAACAGCCTATGGGAAGTAGTCATGCATCTCAGGTTTGTTCTGAGACACCTGATGACCTGTTAGATGATGGTGAAATAAAGGAAGATACTAGTTTTGCTGAAAATGACATTAAGGAAAGTTCTGCTGTTTTTAGCAAAAGCGTCCAGAAAGGAGAGCTTAGCAGGAGTCCTAGCCCTTTCACCCATACACATTTGGCTCAGGGTTACCGAAGAGGGGCCAAGAAATTAGAGTCCTCAGAAGAGAACTTATCTAGTGAGGATGAAGAGC TTCCCTGCTTCCAACACTTGTTATTTGGTAAAGTAAACAATATACCTTCTCAGTCTACTAGGCATAGCACCGTTGCTACCGAGTGTCTGTCTAAGAACACAGAGGAGAATTTATTATCATTGAAGAATAGCTTAAATGACTGCAGTAACCAGGTAATATTGGCAAAGGCATCTCAGGAACATCACCTTAGTGAGGAAACAAAATGTTCTGCTAGCTTGTTTTCTTCACAGTGCAGTGAATTGGAAGACTTGACTGCAAATACAAACACCCAGGATCCTTTCTTGATTGGTTCTTCCAAACAAATGAGGCATCAGTCTGAAAGCCAGGGAGTTGGTCTGAGTGACAAGGAATTGGTTTCAGATGATGAAGAAAGAGGAACGGGCTTGGAAGAAAATAATCAAGAAGAGCAAAGCATGGATTCAAACTTAG