Question

我正在寻找一种方法来使用boost来句子化句子中每个单词的第一个字母。为了使代码一致，最终输出在输入句子中不存在任何空格或制表符。为了获得单个单词，我使用boost :: alogrithm :: split并将它们组合回boost :: algorithm :: join。但是如何让每个首字母大写？

我试过这段代码

 #include <iostream>                                                              
 #include <vector>                                                                
 #include <boost/algorithm/string/split.hpp>                                      
 #include <boost/algorithm/string.hpp>

 int main()                                                                      
 {                                                                               
     using namespace std;                                                        

     string str("cONtainS            SoMe    CApiTaL WORDS");                    

     vector<string> strVec;                                                      
     using boost::is_any_of;                                                     
     using boost::algorithm::token_compress_on;                                  

     boost::algorithm::split(strVec, str, is_any_of("\t "), token_compress_on);  

     vector<string>::iterator i ;                                                

     for(i = strVec.begin() ; i != strVec.end(); i++)                            
     {
         (*i)[0] = boost::to_upper((*i)[0]);
         cout<<*i<<endl;                                                             
     }                                                                           

     return 0;                                                                   
 }

Answer 1

问题在于定义如何确定句子是什么。该最简单的解决方案是它是以常规结束的任何序列表达式"[.!?][\"\']*"（因为你已经消除了白色空间）;这实际上很简单，你可以不用它常用表达。然后记住你已经看过它，然后大写下一个词：

bool atEndOfSentence = true;
for ( std::vector<std::string>::const_iterator current = words.begin();
        current != words.end();
        ++ current ) {
    if ( atEndOfSentence ) {
        (*current)[0] == toupper( (*current)[0] );
    }
    std::cout << *current << std::endl;
    atEndOfSentence = isSentenceEnd( 
            *std::find_if( current->rbegin(), current->rend(),
                           IsNotQuoteChar() ).base() );
}

使用：

struct IsNotQuoteChar
{
    bool operator()( char ch ) const
    {
        return ch != '\'' and ch != '\"';
    }
};

和

bool
isSentenceEnd( char ch )
{
    return ch == '.' || ch == '!' || ch == '?';
}

Answer 2

我知道这不使用Boost，不能使用Unicode，但提供了使用标准库函数的基本解决方案。我打破isalpha以确定词语的划分。也许不是最好的方式，但它只是一个替代方案：

#include <string>
#include <iostream>

using namespace std;

int main()
{
    string str("  cONtainS            SoMe    CApiTaL WORDS");

    bool niw(true);
    string strC;
    for (size_t i = 0; i < str.size(); ++i)
    {
        if ( niw && isalpha( str[i] ) )
        {
            strC += toupper( str[i] );
            niw = false;
        }
        else if ( ! niw )
        {
            if  ( isalpha( str[i] ) )
                strC += tolower( str[i] );
            else
            {
                niw = true;
                strC += str[i];
            }
        }
        else
            strC += str[i];
    }

    cout << str << endl;
    cout << strC << endl;
}

Answer 3

这段代码让我工作

#include <iostream>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <vector>
#include <ctype.h>

int main()
{
    using namespace std;

    string str("contAins Some       CapItal WORDS");
    string result;

    vector<string> strVec;

    using boost::is_any_of;
    using boost::algorithm::token_compress_on;

    boost::algorithm::split(strVec, str, is_any_of("\t "), token_compress_on);

    vector<string>::iterator i; 

    for(i = strVec.begin(); i !=strVec.end(); ++i)
    {                                                                                

        boost::to_lower(*i);
        (*i)[0]=toupper((*i)[0]);

        cout<<(*i)<<endl;
        result += *i +" ";
    }  

    boost::trim_right(result);
    cout<<result;
    return 0; 
}

Answer 4

以下是我的C ++ 11解决方案，以防任何人感兴趣：

std::string s("some lowercase string");
s[0] = toupper(s[0]);
std::transform(s.begin()+1, s.end(),s.begin(),s.begin()+1, 
[](const char& a, const char& b) -> char
{
    if(b==' ' || b=='\t')
    {
        return toupper(a);
    }
    return a;
});

使用boost库将句子中每个单词的首字母大写

4 个答案: