C ++使用wstring拆分unicode分隔的字符串

时间:2013-03-08 14:26:20

标签: c++ unicode wchar-t wstring

我正在尝试完成主题任务,但我的代码没有分裂。这是主要功能:

#define SQL_TEXT Latin_Text
#include <iostream>
#define SQL_TEXT Latin_Text
#include <sqltypes_td.h>
#include "Split.h"
#include <string>
#include <stdio.h>
#include <vector>
#include <cstring>

using namespace std;
int main ()
{
    VARCHAR_LATIN *result = new VARCHAR_LATIN[512];
    wchar_t *s1 = (wchar_t *)"Myýnameýisýzeeshan";
    **splitstringwc s(s1);
vector<wstring> flds = s.splitwc((wchar_t)'ý');**
    wstring rs = flds[1];
    wcout<<rs<<endl;
for (int k = 0; k < flds.size(); k++)
        cout << k << " => " << flds[k].data() << endl;

    cout<<result;
    return 0;
}

splitstringwc类的代码如下:

public:
splitstringwc(wchar_t *s) : wstring(s) { };
vector<wstring>& splitwc(wchar_t delim, int rep=0);
};


vector<wstring>& splitstringwc::splitwc(wchar_t delim, int rep) {
if (!flds1.empty()) flds1.clear();  // empty vector if necessary
wstring ws = data();
wcout<<ws<<endl;
//wcout<<delim<<endl;

//wstring ws;
//int j = StringToWString(ws, work);
wstring buf = (wchar_t *)"";
int i = 0;
while (i < ws.size()) {
    if (ws.at(i) != delim)
        buf += ws.at(i);
    else if (rep == 1) {
        flds1.push_back(buf);
        buf = (wchar_t *)"";
    } else if (buf.size() > 0) {
        flds1.push_back(buf);
        buf = (wchar_t *)"";
    }
    i++;
}
if (!buf.empty())
    flds1.push_back(buf);
return flds1;

}

代码没有拆分输入字符串,当我尝试调试时,我得到分段错误:wstring ws = data();

请帮助...............

2 个答案:

答案 0 :(得分:1)

使用strtok而不是我自己的split函数,是根据unicode分隔符拆分字符串。

代码如下:

str = "Myýnameýisýzeeshan";
char *pch;
pch = strtok(str, "ý");
while (pch != NULL)
{
    printf("%s\n", pch);
    pch = strtok(NULL, "ý");
}

请注意,str由UNICODE分隔符分隔的ANSI字符串组成。

答案 1 :(得分:0)

处理宽字符字符串时,不能使用普通的字符串和字符文字。他们也必须是广泛的角色,比如

const wchar_t *s1 = L"Myýnameýisýzeeshan";

注意文字前面的L,这使得字符串成为一个宽字符串。

同样用于字符文字:

s.splitwc(L'ý')