我正在尝试使用[http://www.zlib.net/]的minzip包装来压缩文件夹。只要文件名是英文,它就可以正常工作。有没有人试图修改minizip来支持unicode? p>
修改后的代码发布在下面。问题在于这个函数,第二个参数将const char *作为输入。当我进行转换时,它会丢失数据,文件名也不一样。
例如:中文 - 统一码.txt成为zip中的中文-t +ƒS+Çtáü.txt。
err = zipOpenNewFileInZip3_64( zf,outstr.c_str(),&zi,
NULL,0,NULL,0,NULL /* comment*/,
(opt_compress_level != 0) ? Z_DEFLATED : 0,
opt_compress_level,0,
/* -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, */
-MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY,
password,crcFile, zip64);
minizip.c
Version 1.1, February 14h, 2010
sample part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html )
Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html )
Modifications of Unzip for Zip64
Copyright (C) 2007-2008 Even Rouault
Modifications for Zip64 support on both zip and unzip
Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com )
*/
#if (!defined(_WIN32)) && (!defined(WIN32)) && (!defined(__APPLE__))
#ifndef __USE_FILE_OFFSET64
#define __USE_FILE_OFFSET64
#endif
#ifndef __USE_LARGEFILE64
#define __USE_LARGEFILE64
#endif
#ifndef _LARGEFILE64_SOURCE
#define _LARGEFILE64_SOURCE
#endif
#ifndef _FILE_OFFSET_BIT
#define _FILE_OFFSET_BIT 64
#endif
#endif
#ifdef __APPLE__
// In darwin and perhaps other BSD variants off_t is a 64 bit value, hence no need for specific 64 bit functions
#define FOPEN_FUNC(filename, mode) fopen(filename, mode)
#define FTELLO_FUNC(stream) ftello(stream)
#define FSEEKO_FUNC(stream, offset, origin) fseeko(stream, offset, origin)
#else
#define FOPEN_FUNC(filename, mode) fopen64(filename, mode)
#define FTELLO_FUNC(stream) ftello64(stream)
#define FSEEKO_FUNC(stream, offset, origin) fseeko64(stream, offset, origin)
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <errno.h>
#include <fcntl.h>
#ifdef _WIN32
# include <direct.h>
# include <io.h>
#define GetCurrentDir _getcwd
#else
# include <unistd.h>
# include <utime.h>
# include <sys/types.h>
# include <sys/stat.h>
#endif
#include "zip.h"
#include "Shlwapi.h"
#ifdef _WIN32
#define USEWIN32IOAPI
#include "iowin32.h"
#endif
#include <windows.h>
#include <string>
#include <iostream>
#include <list>
#include <fstream>
#include <sstream>
#include <set>
using namespace std;
#define WRITEBUFFERSIZE (16384)
#define MAXFILENAME (256)
#ifdef _WIN32
uLong filetime(
wchar_t *f, /* name of file to get info on */
tm_zip *tmzip, /* return value: access, modific. and creation times */
uLong *dt) /* dostime */
{
int ret = 0;
{
FILETIME ftLocal;
HANDLE hFind;
_WIN32_FIND_DATAW ff32;
hFind = FindFirstFileW(f,&ff32);
if (hFind != INVALID_HANDLE_VALUE)
{
FileTimeToLocalFileTime(&(ff32.ftLastWriteTime),&ftLocal);
FileTimeToDosDateTime(&ftLocal,((LPWORD)dt)+1,((LPWORD)dt)+0);
FindClose(hFind);
ret = 1;
}
}
return ret;
}
#else
#ifdef unix || __APPLE__
uLong filetime(f, tmzip, dt)
char *f; /* name of file to get info on */
tm_zip *tmzip; /* return value: access, modific. and creation times */
uLong *dt; /* dostime */
{
int ret=0;
struct stat s; /* results of stat() */
struct tm* filedate;
time_t tm_t=0;
if (strcmp(f,"-")!=0)
{
char name[MAXFILENAME+1];
int len = strlen(f);
if (len > MAXFILENAME)
len = MAXFILENAME;
strncpy(name, f,MAXFILENAME-1);
/* strncpy doesnt append the trailing NULL, of the string is too long. */
name[ MAXFILENAME ] = '\0';
if (name[len - 1] == '/')
name[len - 1] = '\0';
/* not all systems allow stat'ing a file with / appended */
if (stat(name,&s)==0)
{
tm_t = s.st_mtime;
ret = 1;
}
}
filedate = localtime(&tm_t);
tmzip->tm_sec = filedate->tm_sec;
tmzip->tm_min = filedate->tm_min;
tmzip->tm_hour = filedate->tm_hour;
tmzip->tm_mday = filedate->tm_mday;
tmzip->tm_mon = filedate->tm_mon ;
tmzip->tm_year = filedate->tm_year;
return ret;
}
#else
uLong filetime(f, tmzip, dt)
char *f; /* name of file to get info on */
tm_zip *tmzip; /* return value: access, modific. and creation times */
uLong *dt; /* dostime */
{
return 0;
}
#endif
#endif
void addFileToList(list<wstring>& fileList, const wstring& directory, const set<wstring>& excludeFilterSet, const wstring& fileName )
{
wstring fileExtension = fileName.substr(fileName.find_last_of(L".") + 1);
if (!fileExtension.empty())
{
if (excludeFilterSet.find(fileExtension) != excludeFilterSet.end()) return;
}
fileList.push_back(directory + fileName);
}
void GetFileListing(list<wstring>& fileList, wstring directory,const set<wstring>& excludeFilterSet,bool recursively=true)
{
directory = directory + L"\\";
wstring filter = directory + L"*";
_WIN32_FIND_DATAW FindFileData;
HANDLE hFind = FindFirstFileW(filter.c_str(), &FindFileData);
if (hFind == INVALID_HANDLE_VALUE)
{
DWORD dwError = GetLastError();
if (dwError != ERROR_FILE_NOT_FOUND)
{
//cout << "Invalid file handle for filter " << filter << ". Error is " << GetLastError() << endl;
}
return;
}
do
{
if (FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
{
if ((recursively) && (wcscmp(FindFileData.cFileName, L".") != 0) && (wcscmp(FindFileData.cFileName, L"..") != 0))
{
GetFileListing(fileList, directory + FindFileData.cFileName, excludeFilterSet);
}
}
else
{
addFileToList(fileList, directory, excludeFilterSet,FindFileData.cFileName);
}
} while (FindNextFileW(hFind, &FindFileData) != 0);
DWORD dwError = GetLastError();
FindClose(hFind);
if (dwError != ERROR_NO_MORE_FILES)
{
//cout << "FindNextFile error. Error is "<< dwError << endl;
}
}
int check_exist_file(wchar_t* filename)
{
FILE* ftestexist;
int ret = 1;
//ftestexist = FOPEN_FUNC(filename,"rb");
ftestexist = _wfopen(filename,L"rb");
if (ftestexist==NULL)
ret = 0;
else
fclose(ftestexist);
return ret;
}
/* calculate the CRC32 of a file,
because to encrypt a file, we need known the CRC32 of the file before */
int getFileCrc(const wchar_t * filenameinzip,void*buf,unsigned long size_buf,unsigned long* result_crc)
{
unsigned long calculate_crc=0;
int err=ZIP_OK;
//FILE * fin = FOPEN_FUNC(filenameinzip,"rb");
FILE * fin = _wfopen(filenameinzip,L"rb");
unsigned long size_read = 0;
unsigned long total_read = 0;
if (fin==NULL)
{
err = ZIP_ERRNO;
}
if (err == ZIP_OK)
do
{
err = ZIP_OK;
size_read = (int)fread(buf,1,size_buf,fin);
if (size_read < size_buf)
if (feof(fin)==0)
{
printf("error in reading %s\n",filenameinzip);
err = ZIP_ERRNO;
}
if (size_read>0)
calculate_crc = crc32(calculate_crc,(const Bytef *)buf,size_read);
total_read += size_read;
} while ((err == ZIP_OK) && (size_read>0));
if (fin)
fclose(fin);
*result_crc=calculate_crc;
printf("file %s crc %lx\n", filenameinzip, calculate_crc);
return err;
}
int isLargeFile(const wchar_t * filename)
{
int largeFile = 0;
ZPOS64_T pos = 0;
//FILE* pFile = FOPEN_FUNC(filename, "rb");
FILE* pFile = _wfopen(filename, L"rb");
if(pFile != NULL)
{
int n = FSEEKO_FUNC(pFile, 0, SEEK_END);
pos = FTELLO_FUNC(pFile);
printf("File : %s is %lld bytes\n", filename, pos);
if(pos >= 0xffffffff)
largeFile = 1;
fclose(pFile);
}
return largeFile;
}
void split( const wstring& text, wchar_t delimiter,set<wstring>& result )
{
wstring::size_type start = 0;
wstring::size_type end = text.find( delimiter, start );
wstring token;
while( end != wstring::npos )
{
token = text.substr( start, end - start );
token.erase(0,2);
result.insert( token );
start = end + 1;
end = text.find( delimiter, start );
}
// Add the last string
token = text.substr(start);
token = token.erase(0,2);
result.insert(token);
}
//Do not call me.
long getUTF8size(const wchar_t *string){
if (!string)
return 0;
long res=0;
for (;*string;string++){
if (*string<0x80)
res++;
else if (*string<0x800)
res+=2;
else
res+=3;
}
return res;
}
char *WChar_to_UTF8(const wchar_t *string){
long fSize=getUTF8size(string);
char *res=new char[fSize+1];
res[fSize]=0;
if (!string)
return res;
long b=0;
for (;*string;string++,b++){
if (*string<0x80)
res[b]=(char)*string;
else if (*string<0x800){
res[b++]=(*string>>6)|192;
res[b]=*string&63|128;
}else{
res[b++]=(*string>>12)|224;
res[b++]=((*string&4095)>>6)|128;
res[b]=*string&63|128;
}
}
return res;
}
std::string utf8_encode(const std::wstring &wstr)
{
int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
std::string strTo( size_needed, 0 );
WideCharToMultiByte (CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL);
return strTo;
}
wstring zipper( const wstring& directoryPath, const wstring& strExcludeFilter, wstring & zipFileName )
{
int opt_overwrite=0,opt_compress_level=Z_BEST_COMPRESSION,opt_exclude_path=0,err=0,size_buf=0;
void* buf=NULL;
const char* password=NULL;
list<wstring> fileList;
DWORD dwRet;
wchar_t cCurrentPath[MAX_PATH];
dwRet = GetCurrentDirectoryW(MAX_PATH, cCurrentPath);
if( dwRet == 0 )
{
return wstring();
}
// Change the directory to the current folder
_wchdir(directoryPath.c_str());
set<wstring> excludeFilterSet;
split(strExcludeFilter,'|',excludeFilterSet);
GetFileListing(fileList, directoryPath,excludeFilterSet);
opt_overwrite = 1;
size_buf = WRITEBUFFERSIZE;
buf = (void*)malloc(size_buf);
if (buf==NULL) return wstring();
wchar_t tempDirPath[MAX_PATH];
dwRet = GetTempPathW (MAX_PATH, tempDirPath);
if( dwRet == 0 ) return wstring();
wstring directoryName,zipFilePath;
_WIN32_FIND_DATAW FindFileData;
HANDLE hFind = FindFirstFileW(directoryPath.c_str(), &FindFileData);
if (FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
{
directoryName = FindFileData.cFileName;
}
zipFilePath = wstring(tempDirPath)+L"\\"+directoryName+L".zip";
zipFile zf;
int errclose;
#ifdef USEWIN32IOAPI
zlib_filefunc64_def ffunc;
fill_win32_filefunc64W (&ffunc);
zf = zipOpen2_64(zipFilePath.c_str(),(opt_overwrite==2) ? 2 : 0,NULL,&ffunc);
# else
zf = zipOpen64(zipFilePath.c_str(),(opt_overwrite==2) ? 2 : 0);
# endif
if (zf == NULL)
{
//printf("error opening %s\n",filename_try);
err= ZIP_ERRNO;
}
else
{
//printf("creating %s\n",filename_try);
}
for(list<wstring>::iterator it = fileList.begin() ; it!=fileList.end();++it)
{
FILE * fin;
int size_read;
//const char* filenameinzip = (*it).c_str();
wstring filenameinzip = (*it).c_str();
wchar_t szOut[MAX_PATH];
PathRelativePathToW(szOut,
directoryPath.c_str(),
FILE_ATTRIBUTE_DIRECTORY,
filenameinzip.c_str(),
FILE_ATTRIBUTE_NORMAL);
wchar_t *savefilenameinzip;
zip_fileinfo zi;
unsigned long crcFile=0;
int zip64 = 0;
zi.tmz_date.tm_sec = zi.tmz_date.tm_min = zi.tmz_date.tm_hour =
zi.tmz_date.tm_mday = zi.tmz_date.tm_mon = zi.tmz_date.tm_year = 0;
zi.dosDate = 0;
zi.internal_fa = 0;
zi.external_fa = 0;
filetime(szOut,&zi.tmz_date,&zi.dosDate);
if ((password != NULL) && (err==ZIP_OK))
err = getFileCrc(szOut,buf,size_buf,&crcFile);
zip64 = isLargeFile(szOut);
/* The path name saved, should not include a leading slash. */
/*if it did, windows/xp and dynazip couldn't read the zip file. */
savefilenameinzip = szOut;
while( savefilenameinzip[0] == '\\' || savefilenameinzip[0] == '/' )
{
savefilenameinzip++;
}
string outstr = utf8_encode(savefilenameinzip);
//char * op = (char*)outstr.c_str();
err = zipOpenNewFileInZip3_64( zf,outstr.c_str(),&zi,
NULL,0,NULL,0,NULL /* comment*/,
(opt_compress_level != 0) ? Z_DEFLATED : 0,
opt_compress_level,0,
/* -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, */
-MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY,
password,crcFile, zip64);
if (err != ZIP_OK)
{
//printf("error in opening %s in zipfile\n",szOut);
}
else
{
//fin = FOPEN_FUNC(szOut,"rb");
fin = _wfopen(szOut,L"rb");
if (fin==NULL)
{
err=ZIP_ERRNO;
//printf("error in opening %s for reading\n",szOut);
}
}
if (err == ZIP_OK)
do
{
err = ZIP_OK;
size_read = (int)fread(buf,1,size_buf,fin);
if (size_read < size_buf)
if (feof(fin)==0)
{
//printf("error in reading %s\n",szOut);
err = ZIP_ERRNO;
}
if (size_read>0)
{
err = zipWriteInFileInZip (zf,buf,size_read);
if (err<0)
{
//printf("error in writing %s in the zipfile\n",szOut);
}
}
} while ((err == ZIP_OK) && (size_read>0));
if (fin)
fclose(fin);
if (err<0)
err=ZIP_ERRNO;
else
{
err = zipCloseFileInZip(zf);
if (err!=ZIP_OK)
{
//printf("error in closing %s in the zipfile\n",szOut);
}
}
}
errclose = zipClose(zf,NULL);
if (errclose != ZIP_OK)
{
//printf("error in closing %s\n",filename_try);
}
free(buf);
// Change back the executabe context
_wchdir(cCurrentPath);
return zipFilePath;
}
答案 0 :(得分:8)
根据the standard,将UTF-8文件名存储在ZIP文件中的官方方法是设置“通用位11”。看一下minizip的来源,在我看来minizip不会随时为你设置这个位,并且zipOpenNewFileInZip3_64
无法通过这个位。但是有zipOpenNewFileInZip4_64
接受另外两个参数versionMadeBy
和flagBase
。因此,您可以通过将呼叫更改为
err = zipOpenNewFileInZip4_64(zf, outstr.c_str(), […], crcFile, 36, 1<<11, zip64);
这假设outstr
实际上确实包含您的文件名的有效UTF-8编码,源代码建议它应该这样,但我还没有验证。我建议你打印outstr
字节的十六进制值来验证这一点。除非我在这个过程中乱码,你的“统一码.txt”应该以十六进制UTF-8变成e7 b5 b1 e4 b8 80 e7 a2 bc 2e 74 78 74
。
有关此versionMadeBy
字段(我在通话中设置为36
)的详细信息,请参阅标准的第4.4.2节。这取决于您使用的平台,zipfi
参数(在您的情况下为&zi
)具有的文件属性的格式,以及所有标准的所有版本。当您使用unicode文件名时,我会说您使用的是标准版本6.3。*,因此低位字节应为36
。由于minizip.c
包装器根本不存储任何文件属性,因此您不必在那里指定平台。从这些方面可以看出缺乏属性:
zi.internal_fa = 0;
zi.external_fa = 0;
请注意,尽管标准确实提供了一种表示unicode文件名的方法,但该部分仅在2006年添加,并且可能仍有许多ZIP应用程序不支持它。因此,即使您的存档是正确的,您的解压缩实用程序仍可能错误地解压缩此文件,将UTF-8字节解释为代码页437或拉丁语1或类似文件。
答案 1 :(得分:0)
更改程序以正确处理UTF-8并非易事,请查看Unicode FAQ。处理可能的恶意数据(如压缩/解压缩)的程序必须格外小心。值得努力,绝对不是微不足道的。