在python 2.7中编码sys.argv

时间:2017-08-14 14:55:31

标签: python encoding utf-8

我正在编写一个脚本,它从软件(Maltego)获取输入,并使用sys.argv [1]作为变量。当来自Maltego的信息是希伯来语时,我得到问号而不是实际文本。 我尝试用不同的方式对文本进行编码,但一切都失败了。

我正在使用Python 2.7。

非常感谢任何解决方案的想法。

该脚本正在Maltego内部运行:

# -*- coding: utf-8 -*-

import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
from MaltegoTransform import *
import codecs
import sys
reload(sys)
sys.setdefaultencoding('utf-8')


me = MaltegoTransform()
search_value = sys.argv[1].encode("utf-8")

1 个答案:

答案 0 :(得分:1)

这是我的五十便士:

import sys


def win32_utf8_argv():
    """Uses shell32.GetCommandLineArgvW to get sys.argv as a list of UTF-8
    strings.

    Versions 2.5 and older of Python don't support Unicode in sys.argv on
    Windows, with the underlying Windows API instead replacing multi-byte
    characters with '?'.

    Returns None on failure.


    """

    try:
        from ctypes import POINTER, byref, cdll, c_int, windll
        from ctypes.wintypes import LPCWSTR, LPWSTR

        GetCommandLineW = cdll.kernel32.GetCommandLineW
        GetCommandLineW.argtypes = []
        GetCommandLineW.restype = LPCWSTR

        CommandLineToArgvW = windll.shell32.CommandLineToArgvW
        CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
        CommandLineToArgvW.restype = POINTER(LPWSTR)

        cmd = GetCommandLineW()
        argc = c_int(0)
        argv = CommandLineToArgvW(cmd, byref(argc))
        if argc.value > 0:
            # Remove Python executable if present
            if argc.value - len(sys.argv) == 1:
                start = 1
            else:
                start = 0
            return [argv[i].encode('utf-8') for i in
                    xrange(start, argc.value)]
    except Exception:
        pass

if __name__ == '__main__':
    a = win32_utf8_argv()
    print (a[1])