我已经阅读了堆栈溢出和谷歌有关python的seg故障,我的情况还没有落入到目前为止我读过的任何内容中。
基本上我已经编写了一个接受来自外部客户端的HTTP的API。客户端将二进制文件POST到API,我将在其上运行一些进程并在之后存储。
我正在导入一个用Python编码的模块来进行实际的数据处理。
我使用API通过SSH连接到机器,并且能够通过命令行解释器执行这些过程。但是当我尝试在我的API中执行该过程时,我得到一个分段错误。对我来说奇怪的部分是完全相同的代码部分,字面上复制并粘贴在命令行解释器工作正常。 Seg故障发生在调用导入模块的函数的位置。
---------------------这是终端翻译--------------------- --------------
Python 2.7.3 (default, Apr 20 2012, 22:39:59)
[GCC 4.6.3] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> postdata = open('binfile').read()
>>> postdata[0:10]
'MZ\x90\x00\x03\x00\x00\x00\x04\x00'
>>> import atklite
>>> metadata = atklite.FileAnalysis(data=postdata).return_analysis()
>>> metadata
{'sha1': '1f0c33a18a417bb09f85add08b2a675c1deda5ca', 'analyzetime': 'Thu Jun 21 12:03:23 2012', 'ftype': 'PE32 executable (GUI) Intel 80386, for MS Windows', 'ttime': 1340294603.405788, 'sha256': '287744fc9d7b4f509fc271cce0af873c2d53fd1aa678d96dfc9afa8d6d1ad7a0', 'ssdeep': '768:HA/gyc4PWUSrxWoG+RUlyesIkJAkV+xQHz3ClxEEAdK88uDC4wOVdfxCO/ysRXA9:g/gyc4PWUSrxWoG+RUlyesIkJAkV+xQc', 'md5': '000038fe4b46c210c37bdde767835007', 'size': 28672}
--------------这是它崩溃的API的一部分----------------------- -
def POST(self):
a = web.ctx.env.get("HTTP_X_AUTH_TOKEN", None)
creds = authenticator(a)
x = web.input()
b = x.values()
written = open('binfile', 'wb')
for char in b[0]:
written.write(char)
written.close()
postdata = open('binfile').read()
print postdata[0:10]
analysis = atklite.FileAnalysis(data=postdata) #SEG FAULTS HERE!!!!
metadata = analysis.return_analysis()
---------------------------导入的模块功能----------------- -----------------
class FileAnalysis:
results = {'analyzetime': time.ctime(),'ttime': time.time()}
hash_libs = ('md5','sha1','sha256')
if ERROR:
sys.exit('This function requires dependencies that aren\'t satisfied')
__ssd = ssdeep()
__ms = magic.open(magic.MAGIC_NONE)
__ms.load()
def __init__(self, data=None, filename=None):
if filename:
return(self.analyze_file(filename))
if data:
return(self.analyze_data(data))
def hash_data(self, data):
for lib in self.hash_libs:
hl = getattr(hashlib, lib)
self.results[lib] = hl(data).hexdigest()
def hash_file(self, filename):
hlibs = {}
size = 0
for lib in self.hash_libs:
hlibs[lib] = getattr(hashlib, lib)()
for chunk in read_chunks(open(filename, 'rb')):
# We do this here so we don't have to read twice
size += len(chunk)
for hl in hlibs:
hlibs[hl].update(chunk)
self.results['size'] = size
for hl in hlibs:
self.results[hl] = hlibs[hl].hexdigest()
def analyze_data(self, data):
if not data:
return False
self.hash_data(data)
self.results['size'] = len(data)
self.results['ftype'] = self.__ms.buffer(data)
self.results['ssdeep'] = self.__ssd.hash_bytes(data)
def analyze_file(self, filename):
if not os.path.isfile(filename):
raise IOError("File: %s doesn't exist" % filename)
# Size is done inside the hash bytes function so we don't have to read multiple times
self.hash_file(filename)
self.results['ftype'] = self.__ms.file(filename)
self.results['ssdeep'] = self.__ssd.hash_file(filename)
def return_analysis(self):
return (self.results)
def dump(self):
res = []
for result in self.results:
res.append("%s: %s" % (result, self.results[result]))
return("\n".join(res))
使用Duncans建议跟踪它
> /home/spezzino/ProcessingCode/atklite.py(93)analyze_data()
-> self.results['ftype'] = self.__ms.buffer(data)
(Pdb) s
--Call--
> /usr/lib/python2.7/dist-packages/magic.py(126)buffer()
-> def buffer(self, buf):
(Pdb) s
> /usr/lib/python2.7/dist-packages/magic.py(132)buffer()
-> return _buffer(self._magic_t, buf, len(buf))
(Pdb) s
--Return--
> /usr/lib/python2.7/dist-packages/magic.py(132)buffer()->'PE32 executa...or MS Windows'
-> return _buffer(self._magic_t, buf, len(buf))
(Pdb) s
> /home/spezzino/ProcessingCode/atklite.py(94)analyze_data()
-> self.results['ssdeep'] = self.__ssd.hash_bytes(data)
(Pdb) s
Segmentation fault
答案 0 :(得分:1)
我没有解释,但搜索它似乎有ssdeep的替代包装,称为pydeep可能更可靠(似乎你经常在osx上使用segfaults的包 - 也许这对linux也有效。
它提供了几乎相同的界面(参见上面的链接),因此应该很容易交换。