SevenZipSharp无法解压缩某些tar存档

时间:2018-11-15 15:57:18

标签: c# archive 7zip

我使用SevenZipSharp将文件打包到7z档案中,并从各种档案中解压缩。多年来效果很好。

今天,我有一个.tgz存档,在第二阶段解压缩失败:
从.tgz提取.tar可行,但是解压缩.tar失败。只是此单个存档受到了影响。所有其他.tgz都可以正常工作。 .tar本身并不存在问题,因为使用7-zip软件解压缩也可以。

1 个答案:

答案 0 :(得分:5)

经过大量测试,我和一位同事发现了原因:
我们必须调试SevenZipSharp DLL来查找其中的故障。 DLL通过读取前16个字节并将其与签名列表进行比较来检测档案的类型。这对于大多数类型的存档都是正确的,但对于.tar存档来说是错误的,因为.tar文件头以存档的文件名:TAR @ Wikipedia开头。签名“ ustar”(如果存在)位于地址257(0x0101)。

SevenZipSharp知道这一点,并在此地址检查“ ustar”,但前提是先前的检测失败。不幸的是,我们的TAR档案的名称是“ x42202.tar”。 .dmg文件(Apple Disk Image)的标头由单个“ x”组成(这很愚蠢,仅使用一个字节作为签名?)。因此实际上可以成功检测到文件类型,只是检测结果是错误的。
(我知道,链接的Wikipedia说.dmg标头签名是“ koly”,但我在网上找到了下载的.dmg文件来确认。)

因此,我们修改了FileSignatureChecker.cs中的代码,以避免对.tar存档进行错误的存档类型检测。
在下面找到原始代码和修改后的代码。
代码库是最新的SevenZipSharp版本,可以在CodePlex archive中找到。显然,它不再处于活跃的开发中,因为版本号多年来没有变化,如果它仍然处于活跃状态,它将在CodePlex退役后迁移。

更新2018-11-16
修改后的代码中的错误修正:如果找到,则未返回enSpecialFormat。

原始代码

public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
  offset = 0;
  if (!stream.CanRead)
  {
    throw new ArgumentException ("The stream must be readable.");
  }
  if (stream.Length < SIGNATURE_SIZE)
  {
    throw new ArgumentException ("The stream is invalid.");
  }

  #region Get file signature

  var signature = new byte[SIGNATURE_SIZE];
  int bytesRequired = SIGNATURE_SIZE;
  int index = 0;
  stream.Seek (0, SeekOrigin.Begin);
  while (bytesRequired > 0)
  {
    int bytesRead = stream.Read (signature, index, bytesRequired);
    bytesRequired -= bytesRead;
    index += bytesRead;
  }
  string actualSignature = BitConverter.ToString (signature);

  #endregion

  InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
  isExecutable = false;

  foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
  {
    if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
        actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
        Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
    {
      if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
      {
        suspectedFormat = InArchiveFormat.PE;
        isExecutable = true;
      }
      else
      {
        return Formats.InSignatureFormats[expectedSignature];
      }
    }
  }

  // Many Microsoft formats
  if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
  {
    suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
  }

  #region SpecialDetect
  try
  {
    SpecialDetect (stream, 257, InArchiveFormat.Tar);
  }
  catch (ArgumentException) { }
  if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
  {
    return InArchiveFormat.Iso;
  }
  if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
  {
    return InArchiveFormat.Iso;
  }
  if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
  {
    return InArchiveFormat.Iso;
  }
  if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
  {
    return InArchiveFormat.Iso;
  }
  if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
  {
    return InArchiveFormat.Hfs;
  }
  #region Last resort for tar - can mistake
  if (stream.Length >= 1024)
  {
    stream.Seek (-1024, SeekOrigin.End);
    byte[] buf = new byte[1024];
    stream.Read (buf, 0, 1024);
    bool istar = true;
    for (int i = 0; i < 1024; i++)
    {
      istar = istar && buf[i] == 0;
    }
    if (istar)
    {
      return InArchiveFormat.Tar;
    }
  }
  #endregion
  #endregion

  #region Check if it is an SFX archive or a file with an embedded archive.
  if (suspectedFormat != InArchiveFormat.XZ)
  {
    #region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
    var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
    signature = new byte[scanLength];
    bytesRequired = (int)scanLength;
    index = 0;
    stream.Seek (0, SeekOrigin.Begin);
    while (bytesRequired > 0)
    {
      int bytesRead = stream.Read (signature, index, bytesRequired);
      bytesRequired -= bytesRead;
      index += bytesRead;
    }
    actualSignature = BitConverter.ToString (signature);
    #endregion

    foreach (var format in new InArchiveFormat[]
    {
                    InArchiveFormat.Zip,
                    InArchiveFormat.SevenZip,
                    InArchiveFormat.Rar,
                    InArchiveFormat.Cab,
                    InArchiveFormat.Arj
    })
    {
      int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
      if (pos > -1)
      {
        offset = pos / 3;
        return format;
      }
    }
    // Nothing
    if (suspectedFormat == InArchiveFormat.PE)
    {
      return InArchiveFormat.PE;
    }
  }
  #endregion

  throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}

修改后的代码

public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
  offset = 0;
  if (!stream.CanRead)
  {
    throw new ArgumentException ("The stream must be readable.");
  }
  if (stream.Length < SIGNATURE_SIZE)
  {
    throw new ArgumentException ("The stream is invalid.");
  }

  #region Get file signature

  var signature = new byte[SIGNATURE_SIZE];
  int bytesRequired = SIGNATURE_SIZE;
  int index = 0;
  stream.Seek (0, SeekOrigin.Begin);
  while (bytesRequired > 0)
  {
    int bytesRead = stream.Read (signature, index, bytesRequired);
    bytesRequired -= bytesRead;
    index += bytesRead;
  }
  string actualSignature = BitConverter.ToString (signature);

  #endregion Get file signature

  InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
  isExecutable = false;

  InArchiveFormat enDetectedFormat = (InArchiveFormat)(-1);
  InArchiveFormat enSpecialFormat = (InArchiveFormat)(-1);

  foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
  {
    if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
        actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
        Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
    {
      if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
      {
        suspectedFormat = InArchiveFormat.PE;
        isExecutable = true;
      }
      else
      {
        enDetectedFormat = Formats.InSignatureFormats[expectedSignature];
        break;
      }
    }
  }

  // Many Microsoft formats
  if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
  {
    suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
  }

  #region SpecialDetect

  if (SpecialDetect (stream, 257, InArchiveFormat.Tar))
  {
    enSpecialFormat = InArchiveFormat.Tar;
  }
  else if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
  {
    enSpecialFormat = InArchiveFormat.Iso;
  }
  else if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
  {
    enSpecialFormat = InArchiveFormat.Iso;
  }
  else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
  {
    enSpecialFormat = InArchiveFormat.Iso;
  }
  else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
  {
    enSpecialFormat = InArchiveFormat.Iso;
  }
  else if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
  {
    enSpecialFormat = InArchiveFormat.Hfs;
  }

  #region Last resort for tar - can mistake

  bool bPossiblyTAR = false;
  if (stream.Length >= 1024)
  {
    stream.Seek (-1024, SeekOrigin.End);
    byte[] buf = new byte[1024];
    stream.Read (buf, 0, 1024);
    bPossiblyTAR = true;
    for (int i = 0; i < 1024; i++)
    {
      bPossiblyTAR = bPossiblyTAR && buf[i] == 0;
    }
  }

  // TAR header starts with the filename of the archive.
  // The filename can be anything, including the Identifiers of the various archive formats.
  // This means that a TAR can be misinterpreted as any type of archive.
  if (enSpecialFormat == InArchiveFormat.Tar
  || bPossiblyTAR)
  {
    var fs = stream as FileStream;
    if (fs != null)
    {
      string sStreamFilename = fs.Name;
      if (sStreamFilename.EndsWith (".tar", StringComparison.InvariantCultureIgnoreCase))
        enDetectedFormat = InArchiveFormat.Tar;
    }
  }

  #endregion Last resort for tar - can mistake

  if (enDetectedFormat != (InArchiveFormat)(-1))
    return enDetectedFormat;
  if (enSpecialFormat != (InArchiveFormat)(-1))
    return enSpecialFormat;

  #endregion SpecialDetect

  #region Check if it is an SFX archive or a file with an embedded archive.

  if (suspectedFormat != InArchiveFormat.XZ)
  {
    #region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes

    var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
    signature = new byte[scanLength];
    bytesRequired = (int)scanLength;
    index = 0;
    stream.Seek (0, SeekOrigin.Begin);
    while (bytesRequired > 0)
    {
      int bytesRead = stream.Read (signature, index, bytesRequired);
      bytesRequired -= bytesRead;
      index += bytesRead;
    }
    actualSignature = BitConverter.ToString (signature);

    #endregion Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes

    foreach (var format in new InArchiveFormat[]
    {
                InArchiveFormat.Zip,
                InArchiveFormat.SevenZip,
                InArchiveFormat.Rar,
                InArchiveFormat.Cab,
                InArchiveFormat.Arj
    })
    {
      int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
      if (pos > -1)
      {
        offset = pos / 3;
        return format;
      }
    }
    // Nothing
    if (suspectedFormat == InArchiveFormat.PE)
    {
      return InArchiveFormat.PE;
    }
  }

  #endregion Check if it is an SFX archive or a file with an embedded archive.

  throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}