我正在尝试在C中制作一个快速简单的签名检测程序。它应该读取二进制文件(.exe,ELF,库等...)并搜索二进制数据(有时是字符串,有时是字节);
我在C中有一个简单的测试程序:
#include <stdio.h>
#include <unistd.h>
const char *str = "TestingOneTwoThree";
int main()
{
while(1)
{
fprintf(stdout, "%s %ld\n", str, (long)getpid());
sleep(1);
}
}
这是我正在使用的horspool算法。我直接从这里找到的维基百科伪代码中调整了它:https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define HORSPOOL_COUNT 256
#define BLOCK_SIZE 1024
#define MAX(a, b) a > b ? a : b
ssize_t horspool_find(const char *buf, size_t buflen, const char *egg, size_t egglen)
{
int table[HORSPOOL_COUNT];
ssize_t shift = 0, i, tmp;
for(i = 0; i < HORSPOOL_COUNT; ++i)
{
table[i] = (int)egglen;
}
for(i = 0; i < egglen - 1; ++i)
{
table[(int)egg[i]] = egglen - i - 1;
}
while(shift <= buflen - egglen)
{
i = egglen - 1;
while(buf[shift + i] == egg[i])
{
if(i == 0)
{
return shift;
}
i--;
}
shift += MAX(1, table[(int)buf[shift + egglen - 1]]);
}
return -1;
}
char *readfile(const char *filename, size_t *size)
{
int ch;
size_t used = 0, allocated = 0;
char *buf = NULL, *tmp = NULL;
FILE *f;
if((f = fopen(filename, "rb")) == NULL)
{
if(size) *size = 0;
return perror("fopen"), NULL;
}
while((ch=fgetc(f)) != EOF)
{
if(used >= allocated)
{
allocated += BLOCK_SIZE;
tmp = realloc(buf, allocated);
if(tmp == NULL)
{
free(buf);
if(size) *size = 0;
fclose(f);
return perror("realloc"), NULL;
}
buf = tmp;
}
buf[used++] = (char)ch;
}
fclose(f);
if(size) *size = used;
return realloc(buf, used);
}
ssize_t naivealg_find(const char *buf, size_t buflen, const char *find, size_t findlen)
{
size_t i, j, diff = buflen - findlen;
for(i = 0; i < diff; ++i)
{
for(j = 0; j < findlen; ++j)
{
if(buf[i+j] != find[j])
{
break;
}
}
if(j == findlen)
{
return (ssize_t)i;
}
}
return -1;
}
int main()
{
size_t size;
char *buf = readfile("./a.out", &size);
char *pat = "TestingOneTwoThree";
ssize_t pos1 = horspool_find(buf, size, pat, strlen(pat));
ssize_t pos2 = naivealg_find(buf, size, pat, strlen(pat));
fprintf(stdout, "Offsets: %zd ~ %zd\n", pos1, pos2);
return 0;
}
输出类似于:
Offsets: -1 ~ 2052
注意:
buf
和egg
参数正常工作。答案 0 :(得分:2)
代码使用带签名的char
并且二进制数据会不时地使用负索引进行错误索引。
// table[(int)buf[shift + egglen - 1]]
table[(unsigned char )buf[shift + egglen - 1]]
此问题也存在于egg
模式中。
// table[(int) egg[i]] = egglen - i - 1;
table[(unsigned char) egg[i]] = egglen - i - 1;
buflen < egglen
// while (shift <= buflen - egglen)
// change to avoid underflow
while (shift + egglen <= buflen)
还考虑以二进制文件打开文件:
ssize_t shift,i; --> size_t shift,i;
int table[HORSPOOL_COUNT]; -- > size_t table[HORSPOOL_COUNT];
将()
添加到#define MAX(a, b) (((a) > (b)) ? (a) : (b))