Question

给出的输入是：the lord of the rings
预期产出为：The Lord of the Rings
但我得到的是：The Lord ofof thethe Rings

CODE：

#include <cs50.h>
#include <string.h>
#include <stdio.h>

int main(void)
{
  string s = GetString();
  //to print an upper case char 
  printf("%c", toupper(s[0]));


  for (int i = 1, n = strlen(s); i <n; i++)
  {
    //not to capitalize and but of
    if (isspace(s[i]))
    {
        if (s[i + 1] == 't' && s[i + 2] == 'h' && s[i + 3] == 'e') {
            printf("%c%c%c%c", s[i], s[i + 1], s[i+2], s[i+3]);
        } else if (s[i + 1] == 'a' && s[i + 2] == 'n' && s[i + 3] == 'd') {
            printf("%c%c%c%c", s[i], s[i + 1], s[i+2], s[i+3]);
        } else if (s[i + 1] == 'b' && s[i + 2] == 'u' && s[i + 3] == 't') {
            printf("%c%c%c%c", s[i], s[i + 1], s[i+2], s[i+3]);
        } else if (s[i + 1] == 'o' && s[i + 2] == 'f') {
            printf("%c%c%c", s[i], s[i + 1], s[i+2]);
        } else { //capitalize rest of the code:
            printf("%c%c", s[i], toupper(s[i + 1]));
            i++;
        }            
    } else {
        if(   (s[i] == 't' && s[i + 1] == 'h' && s[i + 2] == 'e') 
           || (s[i] == 'a' && s[i + 1] == 'n' && s[i + 2] == 'd') 
           || (s[i] == 'b' && s[i + 1] == 'u' && s[i + 2] == 't') 
           || (s[i] == 'o' && s[i + 1] == 'f' ) )
            continue;          
        else
            printf("%c",s[i]);           
     }
   }
}

请帮我解决。

Answer 1

在每个if子句中，跳过停用词之后，您需要根据单词的长度调整索引，例如：

if (s[i + 1] == 't' && s[i + 2] == 'h' && s[i + 3] == 'e') {
   printf("%c%c%c%c", s[i], s[i + 1], s[i+2], s[i+3]);
   i += 3; // Skip the next 3 positions
}

这仍然无法解决没有将长时间的词语看作是＆＃34;看起来像＆＃34;停用单词（例如 android ， butter 或 offset ）。

Answer 2

我会考虑学习strtok()功能。您可以使用它来划分单词。然后，如果您有一个包含不应该大写的单词的数组，则可以使用strcmp()在此列表中检查每个单词，并在适当时大写，然后将其添加到包含大写行的数组中。下面的代码执行此操作，并始终将第一个单词大写。请注意，skip_list[]在最后一个单词后面包含NULL指针;这用于迭代列表。另请注意，用于存储大写结果的数组title[]已分配，因此它足以容纳input字符串，包括NUL终止符。此外，title首先被定义为空字符串，以便可以使用word将第一个strcat()令牌与其安全地连接起来。

这种方法可以通过编写一个upcase()函数来改进一个单词的第一个字母字符，并对其余字符进行下调，以及一个downcase()函数来降低单词中的所有字符。这样就可以更好地处理凌乱的输入，例如"tHe lorD oF The rINgs"。

，而不是盲目地仅提升第一个字符。

最后一点说明：函数strtok()会通过在其中写入input字符来修改'\0'。

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdbool.h>

#define MAX_SKIP  100

bool in_list(const char *word, const char **list);

int main(void)
{
    const char *skip_words[MAX_SKIP] = { "and", "but", "of", "the", NULL };
    char input[] = "the lord of the rings";
    size_t title_sz = strlen(input) + 1;
    char title[title_sz];
    char *word;

    title[0] = '\0';
    word = strtok(input, " ");
    word[0] = toupper(word[0]);
    strcat(title, word);
    while ((word = strtok(NULL, " "))) {
        if (!in_list(word, skip_words)) {
            word[0] = toupper(word[0]);
        }
        strcat(title, " ");
        strcat(title, word);
    }

    puts(title);

    return 0;
}

bool in_list(const char *word, const char **list)
{
    while (*list != NULL) {
        if (strcmp(word, *list) == 0) {
            break;
        }
        ++list;
    }

    return *list != NULL;
}

节目输出是：

指环王

Answer 3

我编写了一个基于Apache的WordUtils.capitalize（）方法的代码。您可以将分隔符设置为正则表达式字符串。如果您想要跳过[＆＃34;和＆＃34;，＆＃34;但＆＃34;，＆＃34;＆＃34;]之类的单词，请将它们设置为分隔符。

public static String capitalize(String str, final String delimitersRegex) {
    if (str == null || str.length() == 0) {
        return "";
    }

    final Pattern delimPattern;
    if (delimitersRegex == null || delimitersRegex.length() == 0){
        delimPattern = Pattern.compile("\\W");
    }else {
        delimPattern = Pattern.compile(delimitersRegex);
    }

    final Matcher delimMatcher = delimPattern.matcher(str);
    boolean delimiterFound = delimMatcher.find();

    int delimeterStart = -1;
    if (delimiterFound){
        delimeterStart = delimMatcher.start();
    }

    final int strLen = str.length();
    final StringBuilder buffer = new StringBuilder(strLen);

    boolean capitalizeNext = true;
    for (int i = 0; i < strLen; i++) {
        if (delimiterFound && i == delimeterStart) {
            final int endIndex = delimMatcher.end();

            buffer.append( str.substring(i, endIndex) );
            i = endIndex;

            if( (delimiterFound = delimMatcher.find()) ){
                delimeterStart = delimMatcher.start();
            }

            capitalizeNext = true;
        } else {
            final char ch = str.charAt(i);

            if (capitalizeNext) {
                buffer.append(Character.toTitleCase(ch));
                capitalizeNext = false;
            } else {
                buffer.append(ch);
            }
        }
    }
    return buffer.toString();
}

希望有助于：）

除了某些单词之外，如何将每个单词的首字母大写（＆＃34;和＆＃34;，＆＃34;但＆＃34;，＆＃34;＆＃34;）？

3 个答案: