原型:size_t offsetof(type, member);
我知道第一个参数是type,如果我只有字符串中的名称而不是类型怎么办。我想让offsetof
成员只使用字符串文字
我想得到社区的帮助,如何实现这一目标。
前:
#include <stdio.h>
#include <stddef.h>
typedef struct example_ {
void *member1;
void *member2;
} example;
unsigned int
offset_gen(char *ds, char *member)
{
return (offsetof(ds, member));
}
void
main()
{
printf ("\n %d", offset_gen("example", "member1"));
printf ("\n %d", offset_gen("example", "member2"));
}
答案 0 :(得分:2)
这是一个关于如何开始实现这一目标的真实示例。
注意:这不是应用程序就绪代码。我是从头开始写的,因此,应该只被视为一个概念验证;一个人可以用作开发团队讨论的基础。此版本不使用正确的C解析器,但假定在C源中使用某些约定。
此帖子中包含的所有文件均在CC0下获得许可,即专用于公共领域。但请记住,没有任何保证:如果它打破或打破别的东西,不要怪我。
本质上,我们使用Bash + Awk脚本生成C程序,在编译和运行时,生成具有预先计算数据的哈希表,以及可用于查找结构类型的成员偏移的member_offset()
函数,结构类型和成员名称作为字符串给出。
为了便于说明,这是一个完整的工作示例,包括Makefile
。
文件mytypes.h
包含我们感兴趣的类型:
#include <stdlib.h>
struct type1 {
char one, two[2];
float three;
int (*callback)(const char *, void *, size_t);
} __attribute__((__packed__));
struct type2 {
char four;
struct type1 five;
int six, seven[3];
};
您不需要将类型填充到单个头文件中;如果您将它们放在不同的文件中,则只需编辑Makefile
即可。但是,一个要求是所有类型都包含在头文件中,在中间C生成器文件中可以是#include
,仅在构建时编译和运行。
为了说明,我们有一个main.c
,允许用户在命令行上指定结构类型和成员名称,并将偏移量打印到标准输出:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
extern size_t member_offset(const char *type, const char *name, const size_t not_found);
int main(int argc, char *argv[])
{
int arg;
size_t offset;
if (argc < 3 || !(argc & 1) || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s TYPE NAME [ TYPE NAME ... ]\n", argv[0]);
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
for (arg = 1; arg < argc - 1; arg += 2) {
offset = member_offset(argv[arg], argv[arg + 1], ~(size_t)0);
if (errno) {
fprintf(stderr, "struct %s unknown, or has no member %s.\n", argv[arg], argv[arg + 1]);
return EXIT_FAILURE;
}
printf("struct %s has member %s at offset %zu.\n", argv[arg], argv[arg + 1], offset);
fflush(stdout);
}
return EXIT_SUCCESS;
}
要构建项目,我们使用Makefile
。请注意,缩进是 Tab s,而不是空格;这样做很挑剔。
CC := gcc
CFLAGS := -Wall -O2
LDFLAGS :=
.PHONY: all clean
all: clean example
clean:
rm -f *.o example member-offset.c member-offset-generator.c member-offset-generator
member-offset.c: mytypes.h
rm -f $@ member-offset-generator member-offset-generator.c
./member-offset-generator.bash mytypes.h:type1 mytypes.h:type2 > member-offset-generator.c
$(CC) $(CFLAGS) member-offset-generator.c $(LDFLAGS) -o member-offset-generator
./member-offset-generator > $@
rm -f member-offset-generator member-offset-generator.c
%.o: %.c
$(CC) $(CFLAGS) -c $^
example: member-offset.o main.c
$(CC) $(CFLAGS) $^ $(LDFLAGS) -o $@
请注意上面的member-offset.c
规则。它指的是自动生成的C源文件,它将包含member_offset()
函数。如果它还不存在,则会重新编译,并且每当mytypes.h
被修改时都会重新编译。
命令./member-offset-generator.bash mytypes.h:type1 mytypes.h:type2 > member-offset-generator.c
使用尚未显示的第四个文件(请参见下文),检查mytypes.h
,并在类型数据库哈希表中包含struct type1
和struct type2
。输出是member-offset-generator.c
,这是一个C程序,在编译和运行时,会生成我们真正想要的C代码。将此规则拆分为单独的规则可能会更好,但是现在,我让它自动编译并运行member-offset-generator.c
并删除它(因为只需输出member-offset.c
一次)。
生成该中间C程序member-offset-generator.bash
的shell脚本非常复杂:
#!/bin/bash
export LANG=C LC_ALL=C
[ -n "$CC" ] || export CC="gcc"
[ -n "$CFLAGS" ] || export CFLAGS="-Wall -O2"
if [ $# -lt 1 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
exec >&2
printf '\n'
printf 'Usage: %s [ -h | --help ]\n' "$0"
printf ' %s HEADER[:TYPE] ...\n' "$0"
printf '\n'
printf 'This script autogenerates a C program, that when run,\n'
printf 'emits a C implementation of function member_offset()\n'
printf 'which returns the offset of "member" within type "struct type".\n'
printf '\n'
printf 'The generated C program includes all HEADER files,\n'
printf 'but each one only once. Only the specified struct types\n'
printf 'will be supported by the final function.\n'
printf '\n'
exit 1
fi
function hash_of_function() {
sed -e 's| ||' << END
/* DJB2 xor hash, http://www.cse.yorku.ca/~oz/hash.html */
size_t hash_of(const void *data, const size_t size)
{
const unsigned char *p = (const unsigned char *)data;
const unsigned char *const q = (const unsigned char *)data + size;
size_t h = 5381;
while (p < q)
h = ((h << 5) + h) ^ (*(p++));
return h;
}
END
}
# Emit all headers as includes, but each one only once.
printf '%s\n' "$@" | awk \
'BEGIN {
RS="\n"
FS=":"
split("", seen)
printf "#include <stdlib.h>\n"
printf "#include <stddef.h>\n"
printf "#include <string.h>\n"
printf "#include <stdio.h>\n"
seen["stdlib.h"] = 1
seen["stddef.h"] = 1
seen["string.h"] = 1
seen["stdio.h"] = 1
}
{
header = $1
sub(/^[<"]/, "", header)
sub(/[>"]$/, "", header)
if (length(header) > 0 && !(header in seen)) {
seen[header] = 1
if (substr($1, 1, 1) == "<")
printf "#include <%s>\n", header
else
printf "#include \"%s\"\n", header
}
}'
# emit the hash function as a string.
printf '\nstatic const char hash_of_def[] =\n'
hash_of_function | sed -e 's|\\|\\\\|g; s|"|\\"|g; s|^| "|g; s|[\t\v\f ]*$|\\n"|g'
printf ' ;\n\n'
# and the hash function itself.
hash_of_function
# emit structures and code used by the generator itself.
sed -e 's|^ ||' <<END
struct type_member_list {
struct type_member_list *next;
size_t offset;
size_t hash;
size_t namelen;
char name[];
};
struct type_list {
struct type_list *next;
struct type_member_list *members;
size_t hash;
size_t slots;
size_t typelen;
char type[];
};
static size_t type_list_size(const struct type_list *list)
{
size_t result = 0;
while (list) {
++result;
list = list->next;
}
return result;
}
static size_t type_member_list_size(const struct type_member_list *list)
{
size_t result = 0;
while (list) {
++result;
list = list->next;
}
return result;
}
static struct type_list *types = NULL;
static void add_type_member(const char *type, const char *name, const size_t offset)
{
const size_t typelen = (type) ? strlen(type) : 0;
const size_t namelen = (name) ? strlen(name) : 0;
struct type_list *list = NULL, *temp;
struct type_member_list *member;
if (!typelen || !namelen) {
if (!typelen)
fprintf(stderr, "Error: add_type_member() called with empty type.\n");
if (!namelen)
fprintf(stderr, "Error: add_type_member() called with empty name.\n");
exit(EXIT_FAILURE);
}
/* Find the list for the specified type. */
for (temp = types; temp != NULL; temp = temp->next)
if (temp->typelen == typelen && !strcmp(temp->type, type)) {
list = temp;
break;
}
/* If this is a new type, create a new list. */
if (!list) {
list = malloc(sizeof (struct type_list) + typelen + 1);
if (!list) {
fprintf(stderr, "Error: Out of memory.\n");
exit(EXIT_FAILURE);
}
memcpy(list->type, type, typelen);
list->type[typelen] = '\0';
list->typelen = typelen;
list->hash = hash_of(type, typelen);
list->slots = 0;
list->members = NULL;
/* Prepend to global types list. */
list->next = types;
types = list;
}
/* Create a new member. */
member = malloc(sizeof (struct type_member_list) + namelen + 1);
if (!member) {
fprintf(stderr, "Error: Out of memory.\n");
exit(EXIT_FAILURE);
}
memcpy(member->name, name, namelen);
member->name[namelen] = '\0';
member->namelen = namelen;
member->hash = hash_of(name, namelen);
member->offset = offset;
/* Prepend to member list. */
member->next = list->members;
list->members = member;
}
void add_types_and_members(void)
{
END
ignorefirst=$'<"'
ignorelast=$'>"'
# Extract the member names from each structure.
for pair in "$@"; do
name="${pair#*:}"
[ ":$name" = ":$pair" ] && continue
[ -n "$name" ] || continue
file="${pair%%:*}"
file="${file#[$ignorefirst]}"
file="${file%[$ignorelast]}"
$CC $CFLAGS -P -E "$file" | \
sed -e '/#/ d' | tr -s '\t\n\v\f\r ' ' ' | \
sed -e 's|\(struct [^ ]*\) {|\n\1 {\n|g; s|}|\n}\n|g; s| *;|\n|g; s|)([^)]*)||g' | \
awk -v name="$name" \
'BEGIN {
RS = " *\n"
FS = " *,"
split("", members)
}
$0 == ("struct " name " {") {
inside = 1
next
}
$0 == "}" {
inside = 0
next
}
inside {
for (i = 1; i <= NF; i++) {
member = $i
sub(/\[[^\[\]]*\]/, "", member)
sub(/^.*[ \*(]/, "", member)
if (!(member in members))
members[member] = member
}
}
END {
for (member in members)
printf " add_type_member(\"%s\", \"%s\", offsetof(struct %s, %s));\n", name, member, name, member
}' || exit 1
done
# emit the rest of the generator code.
sed -e 's|^ ||' <<END
}
size_t type_slots(struct type_list *list)
{
const size_t size = type_list_size(list);
const size_t max_slots = 4 * size + 1;
size_t slots = size;
size_t *used, i, n;
struct type_list *item;
used = malloc(max_slots * sizeof used[0]);
if (!used) {
fprintf(stderr, "Error: Out of memory.\n");
exit(EXIT_FAILURE);
}
while (1) {
if (slots >= max_slots) {
fprintf(stderr, "Error: Weak hash function; hash table grows too large.\n");
fprintf(stderr, " (Need more than %zu slots for %zu data entries.)\n", max_slots, size);
exit(EXIT_FAILURE);
}
for (i = 0; i < slots; i++)
used[i] = 0;
for (item = list; item != NULL; item = item->next)
++used[item->hash % slots];
n = used[0];
for (i = 1; i < slots; i++)
if (used[i] > n)
n = used[i];
if (n <= 1) {
free(used);
return slots;
}
slots++;
}
}
size_t generate_type(const char *type, struct type_member_list *list, const size_t size)
{
/* Maximum size for current hash table. */
const size_t max_slots = 4*size + 1;
size_t slots = size;
size_t *used, i, n;
struct type_member_list *item;
if (size < 1)
return 0;
used = malloc(max_slots * sizeof used[0]);
if (!used) {
fprintf(stderr, "Error: Out of memory.\n");
exit(EXIT_FAILURE);
}
while (1) {
if (slots >= max_slots) {
fprintf(stderr, "Error: Weak hash function; hash table grows too large.\n");
fprintf(stderr, " (Need more than %zu slots for %zu data entries.)\n", max_slots, size);
exit(EXIT_FAILURE);
}
/* Clear slot use counts. */
for (i = 0; i < slots; i++)
used[i] = 0;
/* Count slot occupancies. */
for (item = list; item != NULL; item = item->next)
++used[item->hash % slots];
/* Find the maximum slot occupancy. */
n = used[0];
for (i = 1; i < slots; i++)
if (used[i] > n)
n = used[i];
/* Suitable size? */
if (n <= 1)
break;
/* Try a larger hash table, then. */
slots++;
}
free(used);
/* Print out the contents of this hash table. */
printf("static const struct member struct_%s_members[%zu] = {\n", type, slots);
for (i = 0; i < slots; i++) {
for (item = list; item != NULL; item = item->next)
if (item->hash % slots == i)
break;
if (item) {
printf(" { .offset = %zu,\n", item->offset);
printf(" .hash = %zu,\n", item->hash);
printf(" .namelen = %zu,\n", item->namelen);
printf(" .name = \"%s\" },\n", item->name);
} else {
printf(" { .offset = 0,\n");
printf(" .hash = 0,\n");
printf(" .namelen = 0,\n");
printf(" .name = NULL },\n");
}
}
printf("};\n\n");
return slots;
}
int main(void)
{
struct type_list *list;
size_t main_slots, i;
add_types_and_members();
printf("#include <stdlib.h>\n");
printf("#include <string.h>\n");
printf("#include <errno.h>\n");
printf("\n");
printf("struct member {\n");
printf(" const size_t offset;\n");
printf(" const size_t hash;\n");
printf(" const size_t namelen;\n");
printf(" const char *const name;\n");
printf("};\n");
printf("\n");
printf("struct type {\n");
printf(" const size_t hash;\n");
printf(" const size_t namelen;\n");
printf(" const size_t members;\n");
printf(" const struct member *const member;\n");
printf(" const char *const name;\n");
printf("};\n");
printf("\n");
printf("%s\n", hash_of_def);
printf("\n");
for (list = types; list != NULL; list = list->next)
list->slots = generate_type(list->type, list->members, type_member_list_size(list->members));
main_slots = type_slots(types);
printf("static const size_t num_types = %zu;\n", main_slots);
printf("static const struct type types[%zu] = {\n", main_slots);
for (i = 0; i < main_slots; i++) {
for (list = types; list != NULL; list = list->next)
if (list->hash % main_slots == i)
break;
if (list) {
printf(" { .hash = %zuUL,\n", list->hash);
printf(" .namelen = %zu,\n", list->typelen);
printf(" .members = %zu,\n", list->slots);
printf(" .member = struct_%s_members,\n", list->type);
printf(" .name = \"%s\" },\n", list->type);
} else {
printf(" { .hash = 0,\n");
printf(" .namelen = 0,\n");
printf(" .members = 0,\n");
printf(" .member = NULL,\n");
printf(" .name = NULL },\n");
}
}
printf("};\n");
printf("\n");
printf("size_t member_offset(const char *type, const char *name, const size_t not_found)\n");
printf("{\n");
printf(" const size_t typelen = (type) ? strlen(type) : 0;\n");
printf(" const size_t namelen = (name) ? strlen(name) : 0;\n");
printf("\n");
printf(" if (typelen > 0 && namelen > 0) {\n");
printf(" const size_t typehash = hash_of(type, typelen);\n");
printf(" const size_t t = typehash %% num_types;\n");
printf(" if (types[t].hash == typehash &&\n");
printf(" types[t].namelen == typelen &&\n");
printf(" !strcmp(types[t].name, type)) {\n");
printf(" const size_t namehash = hash_of(name, namelen);\n");
printf(" const struct member *const member = types[t].member + (namehash %% types[t].members);\n");
printf(" if (member->hash == namehash &&\n");
printf(" member->namelen == namelen &&\n");
printf(" !strcmp(member->name, name)) {\n");
printf(" errno = 0;\n");
printf(" return member->offset;\n");
printf(" }\n");
printf(" }\n");
printf(" }\n");
printf(" errno = ENOENT;\n");
printf(" return not_found;\n");
printf("}\n\n");
return EXIT_SUCCESS;
}
END
此版本使用djb2 xor哈希函数。如果您使用其他一个,请在sed ... <<END
行之后的C END
行中以hash_of_function
结尾处的strcmp()
结尾处用C语言写入。 (sed只是为了删除八个缩进空格,使脚本更容易阅读。)它快速,简单。对于任何现实世界的用例是否足够,我都不知道;对于一些测试头文件我扔了它,它工作得很好。
已知的结构类型和每种已知结构类型的成员都存储在哈希表中。由于条目很小,并且这是为了提高性能,因此哈希表每个哈希表槽最多有一个条目,有一些空槽。这意味着每次查找最多两个探针(每个探针一个探针)。中间C程序搜索每个条目最多放置一个类型或成员的最小大小(插槽数),以便可以使用简单数组。这为哈希表搜索产生了恒定时间($ O(1)$)的复杂性。因为我们确实需要从两个提供的字符串计算哈希值,所以从技术上讲,时间复杂度取决于它们的长度。这意味着,您需要使用 fast 哈希函数;哈希函数不需要完美或加密安全。
每个哈希表的一个探测首先比较哈希值,然后是字符串长度,最后是字符串本身,以确保没有错误匹配。这意味着当找到匹配时,恰好生成了两个strcmp()
。
如果您知道永远不会调用该函数来查找不存在的成员的偏移量,或者使用不存在的类型,则可以安全地省略./member-offset-generator.bash mytypes.h:type1 mytypes.h:type2 | less
检查。
您可以通过运行
来检查生成的中间程序Makefile
正如您在此时可能已经注意到的那样,编写生成C代码的C程序是复杂的;并且编写生成生成C代码的C程序的脚本通常不值得维护。但是,它绝对可行,尽管维护脚本需要比生成的代码值得花费更多的努力。请注意这种风险。
make
(运行make clean example
时)中的默认操作与make
相同。如果将以上所有内容保存到各自的文件中,然后运行
rm -f *.o example member-offset.c member-offset-generator.c member-offset-generator
rm -f member-offset.c member-offset-generator member-offset-generator.c
./member-offset-generator.bash mytypes.h:type1 mytypes.h:type2 > member-offset-generator.c
gcc -Wall -O2 member-offset-generator.c -o member-offset-generator
./member-offset-generator > member-offset.c
rm -f member-offset-generator member-offset-generator.c
gcc -Wall -O2 -c member-offset.c
gcc member-offset.o main.c -o example
你应该看到像这样的东西
make
因为@
输出它运行的命令,我没有隐藏它们中的任何一个(通过在./example type1 one type1 two type1 three type1 callback
之前添加相应的命令)。
然后,如果你运行
struct type1 has member one at offset 0.
struct type1 has member two at offset 1.
struct type1 has member three at offset 3.
struct type1 has member callback at offset 7.
示例程序应输出
int
在x86-64上,这是一个LP64架构(long
是32位,./example type2 four type2 five type2 six type2 seven
和指针64位),正在运行
struct type2 has member four at offset 0.
struct type2 has member five at offset 1.
struct type2 has member six at offset 16.
struct type2 has member seven at offset 20.
输出
-m32
在x86-64上,可以使用make CFLAGS="-Wall -O2 -m32" clean all
GCC选项编译32位代码。所以,运行
./example type2 four type2 five type2 six type2 seven
然后
struct type2 has member four at offset 0.
struct type2 has member five at offset 1.
struct type2 has member six at offset 12.
struct type2 has member seven at offset 16.
输出
member-offset-generator.bash
如果我们在哈希表条目中添加对结构成员类型的支持,这可以扩展为允许某种内省。
但是,我不能强调考虑维持这项工作所需的维护工作的重要性。如果代码库有一套严格的编码标准,并且有人知道这个代码生成器生成器足以定期检查它正确解析结构,并且多个开发人员可以长期维护它,那么肯定;我不明白为什么不使用这样的东西。否则,它可能会成为一个沉重的负担,可能会拉下项目的其余部分。特别是如果只有一个开发人员有足够的知识来维护代码生成器生成器,他们就会离开。在我看来,没有任何项目应该依赖于特定的人。
如果您有任何具体问题,请随时在评论中提问,我会尝试解释。但是,我不会逐行解释整个{{1}}脚本(正如我过去偶尔为我编写的其他示例所做的那样),因为在435行,它具有固有的初始状态(由脚本创建的C程序输出的C代码)复杂性,对任何人来说都是不值得的。
答案 1 :(得分:1)
我认为你以错误的方式使用offsetof
。您必须将结构类型和成员名称传递给宏,而不是包含其名称的字符串。
例如,如果你的结构是:
typedef struct example_ {
void *member1;
void *member2;
}example;
然后,您可以将member1
的偏移量计算为:
offsetof(example, member1)
但是如果您仍然希望使用字符串文字,则必须手动将member
中的offset_gen
参数与结构成员名称进行比较,并调用相应的宏。
示例:
unsigned int
offset_gen(char *ds, char *member)
{
if(!strcmp(ds,"example"))
{
if(!strcmp(member,"member1"))
return (offsetof(example, member1));
else if(!strcmp(member,"member2"))
return (offsetof(example, member2));
}
return -1; // if no match for input paramters is found
}