Question

我有一个字符串

Ex：“我们更喜欢可以回答的问题;不仅仅是讨论过”

现在我想从“;”拆分此字符串喜欢我们更喜欢可以回答的问题和不仅讨论了

这在DXL中是可行的。

我正在学习DXL，所以我不知道我们是否可以分手。

注意：这不是家庭作业。

Answer 1

快速加入＆分裂我可以想出来。接缝工作正常。

int array_size(Array a){
    int size = 0;
    while( !null(get(a, size, 0) ) )
        size++;
    return size;
}

void array_push_str(Array a, string str){
    int array_index = array_size(a);

    put(a, str, array_index, 0);
}

string array_get_str(Array a, int index){
    return (string get(a, index, 0));
}

string str_join(string joiner, Array str_array){
    Buffer joined = create;
    int array_index = 0;

    joined += "";

    for(array_index = 0; array_index < array_size(str_array); array_index++){
        joined += array_get_str(str_array, array_index);
        if( array_index + 1 < array_size(str_array) )
            joined += joiner;
    }

    return stringOf(joined)
}

Array str_split(string splitter, string str){
    Array tokens = create(1, 1);
    Buffer buf = create;
    int str_index;

    buf = "";

    for(str_index = 0; str_index < length(str); str_index++){
        if( str[str_index:str_index] == splitter ){
            array_push_str(tokens, stringOf(buf));
            buf = "";
        }else{
            buf += str[str_index:str_index];
        }
    }
    array_push_str(tokens, stringOf(buf));

    delete buf;
    return tokens;
}

Answer 2

如果你只是拆分字符串，我就会这样做：

string s = "We prefer questions that can be answered; not just discussed"

string sub = ";"

int offset

int len

if ( findPlainText(s, sub, offset, len, false)) {

/* the reason why I subtract one and add one is to remove the delimiter from the out put.
 First print is to print the prefix and then second is the suffix.*/

print s[0 : offset -1]

print s[offset +1 :]


} else {
// no delimiter found
print "Failed to match"

}

您也可以使用正则表达式参考DXL参考手册。如果你想用多个分隔符分割字符串，比如str =“this; is an; example”，那么最好使用正则表达式

Answer 3

我很遗憾地发现了这篇文章。作为DXL的新手，我花了一些时间来应对同样的挑战。我注意到可用的实现具有不同的“拆分”字符串的规范。热爱Ruby language，我错过了一个至少接近Ruby version of String#split的实现。也许我的发现对任何人都有帮助。

这是

的功能比较

变体A： niol的实现（乍一看似乎与Capri Soft上常见的实现相同，
变体B： PJT的实施，
变体C： Brett的实施和
变体D：我的实现（提供正确的功能）。

为了消除结构差异，所有实现都在函数中实现，返回Skip列表或Array。

分裂结果

请注意，所有实现都会返回不同的结果，具体取决于它们对“拆分”的定义：

string mellow yellow;分隔符ello

    splitVariantA returns 1 elements: ["mellow yellow" ]
    splitVariantB returns 2 elements: ["m" "llow yellow" ]
    splitVariantC returns 3 elements: ["w" "w y" "" ]
    splitVariantD returns 3 elements: ["m" "w y" "w" ]

string now's the time;分隔符

    splitVariantA returns 3 elements: ["now's" "the" "time" ]
    splitVariantB returns 2 elements: ["" "now's  the time" ]
    splitVariantC returns 5 elements: ["time" "the" "" "now's" "" ]
    splitVariantD returns 3 elements: ["now's" "the" "time" ]

string 1,2,,3,4,,;分隔符,

    splitVariantA returns 4 elements: ["1" "2" "3" "4" ]
    splitVariantB returns 2 elements: ["1" "2,,3,4,," ]
    splitVariantC returns 7 elements: ["" "" "4" "3" "" "2" "" ]
    splitVariantD returns 7 elements: ["1" "2" "" "3" "4" "" "" ]

时序

在我的机器上将模式1,2,,3,4,,的字符串,拆分10000次给出了这些时间：

    splitVariantA() : 406 ms
    splitVariantB() : 46 ms
    splitVariantC() : 749 ms
    splitVariantD() : 1077 ms

不幸的是，我的实施D是最慢的。令人惊讶的是，正则表达式实现C非常快。

源代码

// niol, modified
Array splitVariantA(string splitter, string str){
    Array tokens = create(1, 1);
    Buffer buf = create;
    int str_index;
    buf = "";

    for(str_index = 0; str_index < length(str); str_index++){
        if( str[str_index:str_index] == splitter ){
            array_push_str(tokens, stringOf(buf));
            buf = "";
        } 
        else
            buf += str[str_index:str_index];
    }
    array_push_str(tokens, stringOf(buf));
    delete buf;
    return tokens;
}

// PJT, modified
Skip splitVariantB(string s, string delimiter) {

    int offset  
    int len
    Skip skp = create

    if ( findPlainText(s, delimiter, offset, len, false)) {
        put(skp, 0, s[0 : offset -1])
        put(skp, 1, s[offset +1 :])
    }

    return skp  
}

// Brett, modified
Skip splitVariantC (string s, string delim) {

    Skip skp = create
    int i = 0
    Regexp split = regexp "^(.*)" delim "(.*)$"
    while (split s) {
        string temp_s = s[match 1]
        put(skp, i++, s[match 2])
        s = temp_s
    }
    put(skp, i++, s[match 2])
    return  skp
}

Skip splitVariantD(string str, string pattern) {

    if (null(pattern) || 0 == length(pattern))
        pattern = " ";

    if (pattern == " ")
        str = stringStrip(stringSqueeze(str, ' '));

    Skip result = create;
    int i = 0; // index for searching in str
    int j = 0; // index counter for result array
    bool found = true;

    while (found) {
        // find pattern     
        int pos = 0;
        int len = 0;
        found = findPlainText(str[i:], pattern, pos, len, true);

        if (found) {
            // insert into result
            put(result, j++, str[i:i+pos-1]);
            i += pos + len;
        }
    }
    // append the rest after last found pattern
    put(result, j, str[i:]);

    return result;
}

Answer 4

实际工作：

如果字符串中不存在分隔符，此解决方案将根据需要分割多次，或者不分割。

这是我使用的，而不是传统的“拆分”命令。它实际上跳过了数组的创建，只是循环遍历数组中的每个字符串，并在每个字符串上调用“someFunction”。

string s = "We prefer questions that can be answered; not just discussed"

// for this example, ";" is used as the delimiter
Regexp split = regexp "^(.*);(.*)$"

// while a ";" exists in s
while (split s) {

    // save the text before the last ";"
    string temp_s = s[match 1]

    // call someFunction on the text after the last ";"
    someFunction(s[match 2])

    // remove the text after the last ";" (including ";")
    s = temp_s
}

// call someFunction again for the last (or only) string
someFunction(s)

抱歉找到一个旧帖子;我只是没有发现其他答案有用。

Answer 5

也许有人会找到方便的融合解决方案。它基于分隔符在Skip中拆分字符串，实际上长度可以超过一个。

Skip splitString(string s1, string delimit)
{
    int offset, len
    Skip splited = create

    while(findPlainText(s1, delimit, offset, len, false))
    {
        put(splited, s1[0:offset-1], s1[0:offset-1])
        s1 = s1[offset+length(delimit):length(s1)-1]
    }


    if(length(s1)>0)
    {
        put (splited, s1, s1)
    }

    return splited
}

Answer 6

我尝试过了，为我努力了...

string s = "We prefer questions that can be answered,not just discussed,hiyas"

string sub = ","
int offset

int len

string s1=s

while(length(s1)>0){

    if ( findPlainText(s1, sub, offset, len, false)) {

        print s1[0 : offset -1]"\n"

        s1= s1[offset+1:length(s1)]

    }

    else

    {

        print s1

        s1=""

    }

}

Answer 7

这是一个更好的实现。这是通过搜索关键字来对字符串进行递归拆分。

pragma runLim, 10000
string s = "We prefer questions that can be answered,not just discussed,hiyas;
Next Line,Var1,Nemesis;
Next Line,Var2,Nemesis1;
Next Line,Var3,Nemesis2;
New,Var4,Nemesis3;
Next Line,Var5,Nemesis4;
New,Var5,Nemesis5;"
string sub = "," 
int offset
int len

string searchkey=null
string curr=s
string nxt=s
string searchline=null
string Modulename=""
string Attributename=""
string Attributevalue=""

while(findPlainText(curr,"Next Line", offset,len,false))
{
    int intlen=offset

    searchkey=curr[offset:length(curr)]

    if(findPlainText(searchkey,"Next Line",offset,len,false))
    {
        curr=searchkey[offset+1:length(searchkey)]
    }

    if(findPlainText(searchkey,";",offset,len,false))
    {       
        searchline=searchkey[0:offset]  
    }

    int counter=0
    while(length(searchline)>0)
    {   
        if (findPlainText(searchline, sub, offset, len, false))
        {
            if(counter==0)
            {
                Modulename=searchline[0 : offset -1]
                counter++
            }
            else if(counter==1)
            {
                Attributename=searchline[0 : offset -1]
                counter++
            }
            searchline= searchline[offset+1:length(searchline)]
        }
        else
        {

            if(counter==2)
            {
                Attributevalue=searchline[0:length(searchline)-2]
                counter++
            }
            searchline=""
        }       
    }
    print "Modulename="Modulename " Attributename=" Attributename " Attributevalue= "Attributevalue "\n"
}

DXL中的字符串拆分

7 个答案:

分裂结果

时序

源代码