在Windows程序中,将从GetCommandLine获取的命令行解析为多个参数的规范方法是什么,类似于Unix中的argv数组?似乎CommandLineToArgvW为Unicode命令行执行此操作,但我找不到非Unicode等效项。我应该使用Unicode吗?如果没有,我该如何解析命令行?
答案 0 :(得分:16)
这是CommandLineToArgvA的一个实现,它将工作委托给CommandLineToArgvW,MultiByteToWideChar和WideCharToMultiByte。
LPSTR* CommandLineToArgvA(LPSTR lpCmdLine, INT *pNumArgs)
{
int retval;
retval = MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, lpCmdLine, -1, NULL, 0);
if (!SUCCEEDED(retval))
return NULL;
LPWSTR lpWideCharStr = (LPWSTR)malloc(retval * sizeof(WCHAR));
if (lpWideCharStr == NULL)
return NULL;
retval = MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, lpCmdLine, -1, lpWideCharStr, retval);
if (!SUCCEEDED(retval))
{
free(lpWideCharStr);
return NULL;
}
int numArgs;
LPWSTR* args;
args = CommandLineToArgvW(lpWideCharStr, &numArgs);
free(lpWideCharStr);
if (args == NULL)
return NULL;
int storage = numArgs * sizeof(LPSTR);
for (int i = 0; i < numArgs; ++ i)
{
BOOL lpUsedDefaultChar = FALSE;
retval = WideCharToMultiByte(CP_ACP, 0, args[i], -1, NULL, 0, NULL, &lpUsedDefaultChar);
if (!SUCCEEDED(retval))
{
LocalFree(args);
return NULL;
}
storage += retval;
}
LPSTR* result = (LPSTR*)LocalAlloc(LMEM_FIXED, storage);
if (result == NULL)
{
LocalFree(args);
return NULL;
}
int bufLen = storage - numArgs * sizeof(LPSTR);
LPSTR buffer = ((LPSTR)result) + numArgs * sizeof(LPSTR);
for (int i = 0; i < numArgs; ++ i)
{
assert(bufLen > 0);
BOOL lpUsedDefaultChar = FALSE;
retval = WideCharToMultiByte(CP_ACP, 0, args[i], -1, buffer, bufLen, NULL, &lpUsedDefaultChar);
if (!SUCCEEDED(retval))
{
LocalFree(result);
LocalFree(args);
return NULL;
}
result[i] = buffer;
buffer += retval;
bufLen -= retval;
}
LocalFree(args);
*pNumArgs = numArgs;
return result;
}
答案 1 :(得分:9)
This article声称提供了ANSI版本的CommandLineToArgvW。
答案 2 :(得分:6)
显然你可以在main()之外使用__argv来访问预先解析的参数向量......
答案 3 :(得分:4)
我跟踪了parse_cmd的源代码(请参阅最新SDK中的“argv_parsing.cpp”)并修改它以匹配CommandLineToArgW的范例和操作,并开发了以下内容。注意:根据Microsoft的建议(参见https://msdn.microsoft.com/en-us/library/windows/desktop/aa366723(v=vs.85).aspx),我已经替换了HeapAlloc,而不是使用LocalAlloc。另外一个SAL表示法的变化。我稍微偏离了lpCmdLine的_In_opt_
- 因为CommandLineToArgvW确实允许它为NULL
,在这种情况下它返回一个只包含程序名称的参数列表。
最后一点需要注意,parse_cmd将仅在一个方面解析与CommandLineToArgvW略有不同的命令行:连续两个双引号字符,而状态为'in quote'模式被解释为转义双引号字符。两个函数都使用第一个函数并输出第二个函数。区别在于对于CommandLineToArgvW,存在从'in quote'模式转换,而parse_cmdline保持在'in quote'模式。这在下面的函数中得到了适当的反映。
您可以按如下方式使用以下功能:
int argc = 0;
LPSTR *argv = CommandLineToArgvA(GetCommandLineA(), &argc);
HeapFree(GetProcessHeap(), NULL, argv);
LPSTR* CommandLineToArgvA(_In_opt_ LPCSTR lpCmdLine, _Out_ int *pNumArgs)
{
if (!pNumArgs)
{
SetLastError(ERROR_INVALID_PARAMETER);
return NULL;
}
*pNumArgs = 0;
/*follow CommandLinetoArgvW and if lpCmdLine is NULL return the path to the executable.
Use 'programname' so that we don't have to allocate MAX_PATH * sizeof(CHAR) for argv
every time. Since this is ANSI the return can't be greater than MAX_PATH (260
characters)*/
CHAR programname[MAX_PATH] = {};
/*pnlength = the length of the string that is copied to the buffer, in characters, not
including the terminating null character*/
DWORD pnlength = GetModuleFileNameA(NULL, programname, MAX_PATH);
if (pnlength == 0) //error getting program name
{
//GetModuleFileNameA will SetLastError
return NULL;
}
if (*lpCmdLine == NULL)
{
/*In keeping with CommandLineToArgvW the caller should make a single call to HeapFree
to release the memory of argv. Allocate a single block of memory with space for two
pointers (representing argv[0] and argv[1]). argv[0] will contain a pointer to argv+2
where the actual program name will be stored. argv[1] will be nullptr per the C++
specifications for argv. Hence space required is the size of a LPSTR (char*) multiplied
by 2 [pointers] + the length of the program name (+1 for null terminating character)
multiplied by the sizeof CHAR. HeapAlloc is called with HEAP_GENERATE_EXCEPTIONS flag,
so if there is a failure on allocating memory an exception will be generated.*/
LPSTR *argv = static_cast<LPSTR*>(HeapAlloc(GetProcessHeap(),
HEAP_ZERO_MEMORY | HEAP_GENERATE_EXCEPTIONS,
(sizeof(LPSTR) * 2) + ((pnlength + 1) * sizeof(CHAR))));
memcpy(argv + 2, programname, pnlength+1); //add 1 for the terminating null character
argv[0] = reinterpret_cast<LPSTR>(argv + 2);
argv[1] = nullptr;
*pNumArgs = 1;
return argv;
}
/*We need to determine the number of arguments and the number of characters so that the
proper amount of memory can be allocated for argv. Our argument count starts at 1 as the
first "argument" is the program name even if there are no other arguments per specs.*/
int argc = 1;
int numchars = 0;
LPCSTR templpcl = lpCmdLine;
bool in_quotes = false; //'in quotes' mode is off (false) or on (true)
/*first scan the program name and copy it. The handling is much simpler than for other
arguments. Basically, whatever lies between the leading double-quote and next one, or a
terminal null character is simply accepted. Fancier handling is not required because the
program name must be a legal NTFS/HPFS file name. Note that the double-quote characters are
not copied.*/
do {
if (*templpcl == '"')
{
//don't add " to character count
in_quotes = !in_quotes;
templpcl++; //move to next character
continue;
}
++numchars; //count character
templpcl++; //move to next character
if (_ismbblead(*templpcl) != 0) //handle MBCS
{
++numchars;
templpcl++; //skip over trail byte
}
} while (*templpcl != '\0' && (in_quotes || (*templpcl != ' ' && *templpcl != '\t')));
//parsed first argument
if (*templpcl == '\0')
{
/*no more arguments, rewind and the next for statement will handle*/
templpcl--;
}
//loop through the remaining arguments
int slashcount = 0; //count of backslashes
bool countorcopychar = true; //count the character or not
for (;;)
{
if (*templpcl)
{
//next argument begins with next non-whitespace character
while (*templpcl == ' ' || *templpcl == '\t')
++templpcl;
}
if (*templpcl == '\0')
break; //end of arguments
++argc; //next argument - increment argument count
//loop through this argument
for (;;)
{
/*Rules:
2N backslashes + " ==> N backslashes and begin/end quote
2N + 1 backslashes + " ==> N backslashes + literal "
N backslashes ==> N backslashes*/
slashcount = 0;
countorcopychar = true;
while (*templpcl == '\\')
{
//count the number of backslashes for use below
++templpcl;
++slashcount;
}
if (*templpcl == '"')
{
//if 2N backslashes before, start/end quote, otherwise count.
if (slashcount % 2 == 0) //even number of backslashes
{
if (in_quotes && *(templpcl +1) == '"')
{
in_quotes = !in_quotes; //NB: parse_cmdline omits this line
templpcl++; //double quote inside quoted string
}
else
{
//skip first quote character and count second
countorcopychar = false;
in_quotes = !in_quotes;
}
}
slashcount /= 2;
}
//count slashes
while (slashcount--)
{
++numchars;
}
if (*templpcl == '\0' || (!in_quotes && (*templpcl == ' ' || *templpcl == '\t')))
{
//at the end of the argument - break
break;
}
if (countorcopychar)
{
if (_ismbblead(*templpcl) != 0) //should copy another character for MBCS
{
++templpcl; //skip over trail byte
++numchars;
}
++numchars;
}
++templpcl;
}
//add a count for the null-terminating character
++numchars;
}
/*allocate memory for argv. Allocate a single block of memory with space for argc number of
pointers. argv[0] will contain a pointer to argv+argc where the actual program name will be
stored. argv[argc] will be nullptr per the C++ specifications. Hence space required is the
size of a LPSTR (char*) multiplied by argc + 1 pointers + the number of characters counted
above multiplied by the sizeof CHAR. HeapAlloc is called with HEAP_GENERATE_EXCEPTIONS
flag, so if there is a failure on allocating memory an exception will be generated.*/
LPSTR *argv = static_cast<LPSTR*>(HeapAlloc(GetProcessHeap(),
HEAP_ZERO_MEMORY | HEAP_GENERATE_EXCEPTIONS,
(sizeof(LPSTR) * (argc+1)) + (numchars * sizeof(CHAR))));
//now loop through the commandline again and split out arguments
in_quotes = false;
templpcl = lpCmdLine;
argv[0] = reinterpret_cast<LPSTR>(argv + argc+1);
LPSTR tempargv = reinterpret_cast<LPSTR>(argv + argc+1);
do {
if (*templpcl == '"')
{
in_quotes = !in_quotes;
templpcl++; //move to next character
continue;
}
*tempargv++ = *templpcl;
templpcl++; //move to next character
if (_ismbblead(*templpcl) != 0) //should copy another character for MBCS
{
*tempargv++ = *templpcl; //copy second byte
templpcl++; //skip over trail byte
}
} while (*templpcl != '\0' && (in_quotes || (*templpcl != ' ' && *templpcl != '\t')));
//parsed first argument
if (*templpcl == '\0')
{
//no more arguments, rewind and the next for statement will handle
templpcl--;
}
else
{
//end of program name - add null terminator
*tempargv = '\0';
}
int currentarg = 1;
argv[currentarg] = ++tempargv;
//loop through the remaining arguments
slashcount = 0; //count of backslashes
countorcopychar = true; //count the character or not
for (;;)
{
if (*templpcl)
{
//next argument begins with next non-whitespace character
while (*templpcl == ' ' || *templpcl == '\t')
++templpcl;
}
if (*templpcl == '\0')
break; //end of arguments
argv[currentarg] = ++tempargv; //copy address of this argument string
//next argument - loop through it's characters
for (;;)
{
/*Rules:
2N backslashes + " ==> N backslashes and begin/end quote
2N + 1 backslashes + " ==> N backslashes + literal "
N backslashes ==> N backslashes*/
slashcount = 0;
countorcopychar = true;
while (*templpcl == '\\')
{
//count the number of backslashes for use below
++templpcl;
++slashcount;
}
if (*templpcl == '"')
{
//if 2N backslashes before, start/end quote, otherwise copy literally.
if (slashcount % 2 == 0) //even number of backslashes
{
if (in_quotes && *(templpcl+1) == '"')
{
in_quotes = !in_quotes; //NB: parse_cmdline omits this line
templpcl++; //double quote inside quoted string
}
else
{
//skip first quote character and count second
countorcopychar = false;
in_quotes = !in_quotes;
}
}
slashcount /= 2;
}
//copy slashes
while (slashcount--)
{
*tempargv++ = '\\';
}
if (*templpcl == '\0' || (!in_quotes && (*templpcl == ' ' || *templpcl == '\t')))
{
//at the end of the argument - break
break;
}
if (countorcopychar)
{
*tempargv++ = *templpcl;
if (_ismbblead(*templpcl) != 0) //should copy another character for MBCS
{
++templpcl; //skip over trail byte
*tempargv++ = *templpcl;
}
}
++templpcl;
}
//null-terminate the argument
*tempargv = '\0';
++currentarg;
}
argv[argc] = nullptr;
*pNumArgs = argc;
return argv;
}
答案 4 :(得分:3)
CommandLineToArgvW()
在shell32.dll中。我猜想壳牌开发人员为自己的用途创建了这个功能,并且它被公之于众,因为有人认为第三方开发者会发现它有用,或者因为某些法庭行动使他们这样做了。
由于Shell开发人员只需要一个他们曾写过的Unicode版本。为将ANSI转换为Unicode,调用函数并将Unicode结果转换为ANSI的函数编写ANSI包装器会相当简单(如果Shell32.dll曾提供此API的ANSI变体,那可能就是做)。
答案 5 :(得分:1)
当不想解析UNICODE时,这些都没有完全解决问题,所以我的解决方案是从WINE项目修改的,它们包含CommandLineToArgvW
shell32.dll
的源代码,将其修改为以下,它对我来说非常适合:
/*************************************************************************
* CommandLineToArgvA [SHELL32.@]
*
* MODIFIED FROM https://www.winehq.org/ project
* We must interpret the quotes in the command line to rebuild the argv
* array correctly:
* - arguments are separated by spaces or tabs
* - quotes serve as optional argument delimiters
* '"a b"' -> 'a b'
* - escaped quotes must be converted back to '"'
* '\"' -> '"'
* - consecutive backslashes preceding a quote see their number halved with
* the remainder escaping the quote:
* 2n backslashes + quote -> n backslashes + quote as an argument delimiter
* 2n+1 backslashes + quote -> n backslashes + literal quote
* - backslashes that are not followed by a quote are copied literally:
* 'a\b' -> 'a\b'
* 'a\\b' -> 'a\\b'
* - in quoted strings, consecutive quotes see their number divided by three
* with the remainder modulo 3 deciding whether to close the string or not.
* Note that the opening quote must be counted in the consecutive quotes,
* that's the (1+) below:
* (1+) 3n quotes -> n quotes
* (1+) 3n+1 quotes -> n quotes plus closes the quoted string
* (1+) 3n+2 quotes -> n+1 quotes plus closes the quoted string
* - in unquoted strings, the first quote opens the quoted string and the
* remaining consecutive quotes follow the above rule.
*/
LPSTR* WINAPI CommandLineToArgvA(LPSTR lpCmdline, int* numargs)
{
DWORD argc;
LPSTR *argv;
LPSTR s;
LPSTR d;
LPSTR cmdline;
int qcount,bcount;
if(!numargs || *lpCmdline==0)
{
SetLastError(ERROR_INVALID_PARAMETER);
return NULL;
}
/* --- First count the arguments */
argc=1;
s=lpCmdline;
/* The first argument, the executable path, follows special rules */
if (*s=='"')
{
/* The executable path ends at the next quote, no matter what */
s++;
while (*s)
if (*s++=='"')
break;
}
else
{
/* The executable path ends at the next space, no matter what */
while (*s && *s!=' ' && *s!='\t')
s++;
}
/* skip to the first argument, if any */
while (*s==' ' || *s=='\t')
s++;
if (*s)
argc++;
/* Analyze the remaining arguments */
qcount=bcount=0;
while (*s)
{
if ((*s==' ' || *s=='\t') && qcount==0)
{
/* skip to the next argument and count it if any */
while (*s==' ' || *s=='\t')
s++;
if (*s)
argc++;
bcount=0;
}
else if (*s=='\\')
{
/* '\', count them */
bcount++;
s++;
}
else if (*s=='"')
{
/* '"' */
if ((bcount & 1)==0)
qcount++; /* unescaped '"' */
s++;
bcount=0;
/* consecutive quotes, see comment in copying code below */
while (*s=='"')
{
qcount++;
s++;
}
qcount=qcount % 3;
if (qcount==2)
qcount=0;
}
else
{
/* a regular character */
bcount=0;
s++;
}
}
/* Allocate in a single lump, the string array, and the strings that go
* with it. This way the caller can make a single LocalFree() call to free
* both, as per MSDN.
*/
argv=LocalAlloc(LMEM_FIXED, (argc+1)*sizeof(LPSTR)+(strlen(lpCmdline)+1)*sizeof(char));
if (!argv)
return NULL;
cmdline=(LPSTR)(argv+argc+1);
strcpy(cmdline, lpCmdline);
/* --- Then split and copy the arguments */
argv[0]=d=cmdline;
argc=1;
/* The first argument, the executable path, follows special rules */
if (*d=='"')
{
/* The executable path ends at the next quote, no matter what */
s=d+1;
while (*s)
{
if (*s=='"')
{
s++;
break;
}
*d++=*s++;
}
}
else
{
/* The executable path ends at the next space, no matter what */
while (*d && *d!=' ' && *d!='\t')
d++;
s=d;
if (*s)
s++;
}
/* close the executable path */
*d++=0;
/* skip to the first argument and initialize it if any */
while (*s==' ' || *s=='\t')
s++;
if (!*s)
{
/* There are no parameters so we are all done */
argv[argc]=NULL;
*numargs=argc;
return argv;
}
/* Split and copy the remaining arguments */
argv[argc++]=d;
qcount=bcount=0;
while (*s)
{
if ((*s==' ' || *s=='\t') && qcount==0)
{
/* close the argument */
*d++=0;
bcount=0;
/* skip to the next one and initialize it if any */
do {
s++;
} while (*s==' ' || *s=='\t');
if (*s)
argv[argc++]=d;
}
else if (*s=='\\')
{
*d++=*s++;
bcount++;
}
else if (*s=='"')
{
if ((bcount & 1)==0)
{
/* Preceded by an even number of '\', this is half that
* number of '\', plus a quote which we erase.
*/
d-=bcount/2;
qcount++;
}
else
{
/* Preceded by an odd number of '\', this is half that
* number of '\' followed by a '"'
*/
d=d-bcount/2-1;
*d++='"';
}
s++;
bcount=0;
/* Now count the number of consecutive quotes. Note that qcount
* already takes into account the opening quote if any, as well as
* the quote that lead us here.
*/
while (*s=='"')
{
if (++qcount==3)
{
*d++='"';
qcount=0;
}
s++;
}
if (qcount==2)
qcount=0;
}
else
{
/* a regular character */
*d++=*s++;
bcount=0;
}
}
*d='\0';
argv[argc]=NULL;
*numargs=argc;
return argv;
}
解析空字符串""
时要小心,它会返回NULL
而不是可执行路径,这与标准CommandLineToArgvW
的不同行为有关用法如下:
int argc;
LPSTR * argv = CommandLineToArgvA(GetCommandLineA(), &argc);
// AFTER consumed argv
LocalFree(argv);
答案 6 :(得分:0)
以下是我想到的在WinMain顶部获得老式argc / argv对的最简单方法。假设命令行确实是ANSI文本,那么实际上您不需要进行任何更高级的转换。
int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd) {
int argc;
LPWSTR *szArglist = CommandLineToArgvW(GetCommandLineW(), &argc);
char **argv = new char*[argc];
for (int i=0; i<argc; i++) {
int lgth = wcslen(szArglist[i]);
argv[i] = new char[lgth+1];
for (int j=0; j<=lgth; j++)
argv[i][j] = char(szArglist[i][j]);
}
LocalFree(szArglist);