Question

在获得MP3 ID3 v2实施时，我遇到了一些问题。除了这一个问题之外，我大部分时间都在工作，这可能与此根本无关。无论如何，我使用下面的代码来处理检索涉及文本的标题标记的数据。

我遇到的是（我猜？）我在一些不同的字符串中遇到Unicode字符。我试图在下面转换它，它的工作原理。但是我在字符串之前获得了3F的奖金，之后获得了$ 3F $ 3F。我可以对下面的代码做些什么来解析那些或者我必须自己做吗？文件由ITunes编码，如果这有助于任何。

function Id3v2_string(currp: pointer; datasize: integer): string;
{ handles string processing for ID3v2 data }
  const
    IS_TEXT_UNICODE_UNICODE_MASK = $0F;
  var
    outstr: string;
    uscan: integer;
  begin
    outstr := '';
    SetLength(outstr, datasize);
    uscan := IS_TEXT_UNICODE_UNICODE_MASK;
    if IsTextUnicode(currp, datasize, @uscan) then
      outstr := WideCharToString(currp)
    else
      move(currp^, outstr[1], datasize);
    Result := outstr;
  end;

注意，我真的对媒体库不感兴趣，因为我要做的就是编辑ID3标签而不是播放文件 - 除了像这样的一些小问题之外，实现已经完成。

Answer 1

根据正在使用的ID3 v2的版本，文本字符串可能会或可能不会以字节开头，以告诉您字符串的实际编码。请勿使用IsTextUnicode()来猜测编码是什么（特别是因为它可以报告false results）。

在ID3 v2到v2.3之前，没有编码字节，文本是ISO-8859-1或UCS-2，UCS-2字符串始终以BOM开头，因此您知道字节排序。例如：

// prior to Delphi 2009 - String is Ansi
function Id3v2_string(currp: Pointer; datasize: Integer): String; 
var
  W: WideString;
  I: Integer;
  Ch: WideChar;
begin 
  Result := '';
  if (datasize >= SizeOf(Word)) and ((PWord(currp)^ = $FEFF) or (PWord(currp)^= $FFFE)) then begin
    // UCS-2 with BOM
    W := WideCharLenToString(PWideChar(Integer(currp) + SizeOf(Word)), (datasize - SizeOf(Word)) div SizeOf(WideChar)); 
    if PWord(currp)^ = $FFFE then begin
      // BE, convert to LE
      for I := 1 to Length(W) do begin
        Ch := W[I];
        W[I] := WideChar(((Word(Ch) and $FF) shl 8) or (Word(Ch) shr 8));
      end;
    end;
  end else begin
    // ISO-8859-1
    I := MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, nil, 0);
    if I > 0 then begin
      SetLength(W, I);
      MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, PWideChar(W), I);
    end;
  end;
  Result := TrimRight(W);
end;

// Delphi 2009+ - String is Unicode
function Id3v2_string(currp: Pointer; datasize: Integer): String; 
var
  Enc: TEncoding;

  function Convert(P: Pointer; Size: Integer): String;
  var
    Buf: TBytes;
  begin
    SetLength(Buf, Size);
    if Size > 0 then Move(P^, Buf[0], Size);
    Result := Enc.GetString(Buf);
  end;

begin 
  Result := '';
  if (datasize >= SizeOf(Word)) and ((PWord(currp)^ = $FEFF) or (PWord(currp)^ = $FFFE)) then begin
    // UCS-2 with BOM
    if PWord(currp)^ = $FFFE then begin
      // BE
      Enc := TEncoding.BigEndianUnicode;
    end else begin
      // LE
      Enc := TEncoding.Unicode;
    end;
    Result := Convert(PWord(currp)+1, datasize - SizeOf(Word));
  end else begin
    // ISO-8859-1
    Enc := TEncoding.GetEncoding(28591);
    try
      Result := Convert(currp, datasize);
    finally
      Enc.Free;
    end;
  end;
end;

ID3 v2.4将UCS-2切换为UTF-16，并在没有BOM的情况下添加对UTF-8和UTF-16BE的支持，例如：

// prior to Delphi 2009 - String is Ansi
function Id3v2_string(currp: Pointer; datasize: Integer; Encoding: Byte): String; 
var
  W: WideString;
  I: Integer;
  Ch: WideChar;
begin 
  Result := '';

  case Encoding of
    $00: begin
      // ISO-8859-1
      I := MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, nil, 0);
      if I > 0 then begin
        SetLength(W, I);
        MultiByteToWideChar(28591, 0, PAnsiChar(currp), datasize, PWideChar(W), I);
      end;
    end;
    $01: begin
      // UTF-16 with BOM
      SetString(W, PWideChar(Integer(currp) + SizeOf(Word)), (datasize - SizeOf(Word)) div SizeOf(WideChar));
      if PWord(currp)^ = $FFFE then begin
        // BE, convert to LE
        for I := 1 to Length(W) do begin
          Ch := W[I];
          W[I] := WideChar(((Word(Ch) and $FF) shl 8) or (Word(Ch) shr 8));
        end;
      end;
    end;
    $02: begin
      // UTF-16BE without BOM, convert to LE
      SetString(W, PWideChar(currp), datasize div SizeOf(WideChar));
      for I := 1 to Length(W) do begin
        Ch := W[I];
        W[I] := WideChar(((Word(Ch) and $FF) shl 8) or (Word(Ch) shr 8));
      end;
    end;
    $03: begin
      // UTF-8
      I := MultiByteToWideChar(65001, 0, PAnsiChar(currp), datasize, nil, 0);
      if I > 0 then begin
        SetLength(W, I);
        MultiByteToWideChar(65001, 0, PAnsiChar(currp), datasize, PWideChar(W), I);
      end;
    end;
  end;
  Result := TrimRight(W);
end;

// Delphi 2009+ - String is Unicode
function Id3v2_string(currp: Pointer; datasize: Integer; Encoding: Byte): String; 
var
  Enc: TEncoding;

  function Convert(P: Pointer; Size: Integer): String;
  var
    Buf: TBytes;
  begin
    SetLength(Buf, Size);
    if Size > 0 then Move(P^, Buf[0], Size);
    Result := Enc.GetString(Buf);
  end;

begin 
  Result := '';

  case Encoding of
    $00: begin
      // ISO-8859-1
      Enc := TEncoding.GetEncoding(28591);
      try
        Result := Convert(currp, datasize);
      finally
        Enc.Free;
      end;
    end;
    $01: begin
      // UTF-16 with BOM
      if PWord(currp)^ = $FFFE then begin
        // BE
        Enc := TEncoding.BigEndianUnicode;
      end else begin
        // LE
        Enc := TEncoding.Unicode;
      end;
      Result := Convert(PWord(currp)+1, datasize - SizeOf(Word));
    end;
    $02: begin
      // UTF-16BE without BOM
      Enc := TEncoding.BigEndianUnicode;
      Result := Convert(currp, datasize);
    end;
    $03: begin
      // UTF-8
      Enc := TEncoding.UTF8;
      Result := Convert(currp, datasize);
    end;
  end;
  Result := TrimRight(Result);
end;

Unicode字符串中的奇数字符

1 个答案: