我有以下代码,需要删除所有非字母数字字符。它在delphi 2009中不起作用
unit Unit2;
//Used information from
// http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls;
Type
TExplodeArray = Array Of String;
TForm2 = class(TForm)
Memo1: TMemo;
ListBox1: TListBox;
Button1: TButton;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
Function Explode ( Const cSeparator, vString : String ) : TExplodeArray;
Function Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
Function StripHTML ( S : String ) : String;
function allwords(data:string):integer;
end;
var
Form2: TForm2;
allword, allphrase: TExplodeArray;
implementation
{$R *.dfm}
Function TForm2.StripHTML ( S : String ) : String;
Var
TagBegin, TagEnd, TagLength : Integer;
Begin
TagBegin := Pos ( '<', S ); // search position of first <
While ( TagBegin > 0 ) Do
Begin // while there is a < in S
TagEnd := Pos ( '>', S ); // find the matching >
TagLength := TagEnd - TagBegin + 1;
Delete ( S, TagBegin, TagLength ); // delete the tag
TagBegin := Pos ( '<', S ); // search for next <
End;
Result := S; // give the result
End;
Function TForm2.Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
Var
i : Integer;
Begin
Result := '';
For i := 0 To Length ( cArray ) - 1 Do
Begin
Result := Result + cSeparator + cArray [i];
End;
System.Delete ( Result, 1, Length ( cSeparator ) );
End;
Function TForm2.Explode ( Const cSeparator, vString : String ) : TExplodeArray;
Var
i : Integer;
S : String;
Begin
S := vString;
SetLength ( Result, 0 );
i := 0;
While Pos ( cSeparator, S ) > 0 Do
Begin
SetLength ( Result, Length ( Result ) + 1 );
Result[i] := Copy ( S, 1, Pos ( cSeparator, S ) - 1 );
Inc ( i );
S := Copy ( S, Pos ( cSeparator, S ) + Length ( cSeparator ), Length ( S ) );
End;
SetLength ( Result, Length ( Result ) + 1 );
Result[i] := Copy ( S, 1, Length ( S ) );
End;
//Copied from JclStrings
function StrKeepChars(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
var
Source, Dest: PChar;
begin
SetLength(Result, Length(S));
UniqueString(Result);
Source := PChar(S);
Dest := PChar(Result);
while (Source <> nil) and (Source^ <> #0) do
begin
if Source^ in Chars then
begin
Dest^ := Source^;
Inc(Dest);
end;
Inc(Source);
end;
SetLength(Result, (Longint(Dest) - Longint(PChar(Result))) div SizeOf(AnsiChar));
end;
function ReplaceNewlines(const AValue: string): string;
var
SrcPtr, DestPtr: PChar;
begin
SrcPtr := PChar(AValue);
SetLength(Result, Length(AValue));
DestPtr := PChar(Result);
while SrcPtr <> {greater than less than} #0 do begin
if (SrcPtr[0] = #13) and (SrcPtr[1] = #10) then begin
DestPtr[0] := '\';
DestPtr[1] := 't';
Inc(SrcPtr);
Inc(DestPtr);
end else
DestPtr[0] := SrcPtr[0];
Inc(SrcPtr);
Inc(DestPtr);
end;
SetLength(Result, DestPtr - PChar(Result));
end;
function StripNonAlphaNumeric(const AValue: string): string;
var
SrcPtr, DestPtr: PChar;
begin
SrcPtr := PChar(AValue);
SetLength(Result, Length(AValue));
DestPtr := PChar(Result);
while SrcPtr <> #0 do begin
if SrcPtr[0] in ['a'..'z', 'A'..'Z', '0'..'9'] then begin
DestPtr[0] := SrcPtr[0];
Inc(DestPtr);
end;
Inc(SrcPtr);
end;
SetLength(Result, DestPtr - PChar(Result));
end;
function TForm2.allwords(data:string):integer;
var i:integer;
begin
listbox1.Items.add(data);
data:= StripHTML ( data );
listbox1.Items.add(data);
//////////////////////////////////////////////////////////////
data := StrKeepChars(data, ['A'..'Z', 'a'..'z', '0'..'9']);
// Strips out everything data comes back blank in Delphi 2009
//////////////////////////////////////////////////////////////
listbox1.Items.add(data);
data := stringreplace(data,' ',' ', [rfReplaceAll, rfIgnoreCase] );
//Replace two spaces with one.
listbox1.Items.add(data);
allword:= explode(' ',data);
{ // Converting the following PHP code to Delphi
$text = ereg_replace("[^[:alnum:]]", " ", $text);
while(strpos($text,' ')!==false) $text = ereg_replace(" ", " ", $text);
$text=$string=strtolower($text);
$text=explode(" ",$text);
return count($text);
}
for I := 0 to Length(allword) - 1 do
listbox1.Items.Add(allword[i]);
end;
procedure TForm2.Button1Click(Sender: TObject);
begin
//[^[:alnum:]]
allwords(memo1.Text);
end;
end.
我还能怎样做呢?
答案 0 :(得分:1)
最容易想到的解决方案是定义一个正则表达式,返回输入字符串减去其中的任何非字母字符。
答案 1 :(得分:1)
我用Delphi做了很多次 - 第5版是我的游乐场。
默认情况下,它不是Delphi 2009的主要功能之一,它现在是Unicode。
这对尝试逐个字符处理的任何事物都有影响。它可能是你问题的根源吗?
答案 2 :(得分:1)
Uses StrUtils; //StuffString
var
Regex: TPerlRegEx;
I:Integer;
begin
Regex := TPerlRegEx.Create(nil);
Regex.RegEx := '[^[:alnum:]]';
Regex.Options := [preMultiLine];
Regex.Subject := data;
if Regex.Match then begin
repeat
data := StuffString(data,Regex.MatchedExpressionOffset,Regex.MatchedExpressionLength,' ');
until not Regex.MatchAgain;
end;