Skip to content

Build failure with corrupted test-triggers--usdt-tst-special.o #98

@thesamesam

Description

@thesamesam

(Please don't spend time on this yet as I don't really feel like I've got a clear idea of how to reproduce it or why this environment is triggering it.)

I hit this build failure via the ebuild which failed like (with a debug print added):

/var/tmp/portage/dev-debug/dtrace-9999/work/dtrace-9999/build/run-dtrace -x nolibs -G -o /var/tmp/portage/dev-debug/dtrace-9999/work/dtrace-9999/build/test-triggers--usdt-tst-special-prov.o -s test/triggers/usdt-tst-special-prov.d /var/tmp/portage/dev-debug/dtrace-9999/work/dtrace-9999/build/test-triggers--usdt-tst-special.o
process_obj: elf_update write failed
dtrace: failed to link script test/triggers/usdt-tst-special-prov.d: an error was encountered while processing /var/tmp/portage/dev-debug/dtrace-9999/work/dtrace-9999/build/test-triggers--usdt-tst-special.o
make: *** [Makerules:31: /var/tmp/portage/dev-debug/dtrace-9999/work/dtrace-9999/build/test-triggers--usdt-tst-special-prov.o] Error 1
 * ERROR: dev-debug/dtrace-9999::gentoo failed (compile phase):
 *   emake failed

Broken

~/git/dtrace-utils-bad $ make CFLAGS="-O2 -march=native -g" verbose=yes
[...]
cc -Iinclude -Iuts/common -Iinclude/i386 -I/home/sam/git/dtrace-utils-bad/build  -O2 -march=native -g -std=gnu99 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_DT_VERSION=\"2.0.1\" -fno-inline -O2 -MP -MMD -MF /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special.o.deps -MT /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special.o -c -o /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special.o test/triggers/usdt-tst-special.c
/home/sam/git/dtrace-utils-bad/build/run-dtrace -x nolibs -G -o /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special-prov.o -s test/triggers/usdt-tst-special-prov.d /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special.o
dtrace: failed to link script test/triggers/usdt-tst-special-prov.d: an error was encountered while processing /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special.o
make: *** [Makerules:31: /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special-prov.o] Error 1

or

# Trying again
$ make CFLAGS="-O2 -march=native -g" verbose=yes
ln -sf /home/sam/git/dtrace-utils-bad/build/drti.o /home/sam/git/dtrace-utils-bad/build/dlibs/drti/drti.o
if [[ -f .git/index ]]; then \
        git log --no-walk --pretty=format:%H > .git-version.tmp; \
else \
        cp .git-archive-version .git-version.tmp; \
fi
if test -r ".git-version" && cmp -s ".git-version" ".git-version.tmp"; then \
        rm -f ".git-version.tmp"; \
else \
        printf "VERSION: .git-version\n"; \
        mv -f ".git-version.tmp" ".git-version"; \
fi
bpf-unknown-none-gcc -D__amd64 -Ilibdtrace -Iinclude -I/home/sam/git/dtrace-utils-bad/build/include -idirafter /usr/include -O2 -Wall -Wno-unknown-pragmas -mcpu=v3 -masm=normal -S \
        -o - bpf/get_bvar.c | \
        awk '/dt_get_bvar:/ { \
                 getline; \
                 if(/stxdw/ && /%r6$/) { \
                     print "ERROR: bpf-unknown-none-gcc is too old"; \
                     exit(1); \
                 } \
                 exit(0); \
             }'
/home/sam/git/dtrace-utils-bad/build/run-dtrace -x nolibs -G -o /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special-prov.o -s test/triggers/usdt-tst-special-prov.d /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special.o
cc -O2 -march=native -g -std=gnu99 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_DT_VERSION=\"2.0.1\" -fno-inline -O2  -o /home/sam/git/dtrace-utils-bad/build/usdt-tst-special /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special.o /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special-prov.o -L/home/sam/git/dtrace-utils-bad/build
/usr/lib/gcc/x86_64-pc-linux-gnu/15/../../../../x86_64-pc-linux-gnu/bin/ld: warning: /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special.o has a section extending past end of file
/usr/lib/gcc/x86_64-pc-linux-gnu/15/../../../../x86_64-pc-linux-gnu/bin/ld: error: /home/sam/git/dtrace-utils-bad/build/test-triggers--usdt-tst-special.o: ELF section name out of range
collect2: error: ld returned 1 exit status
make: *** [Makerules:31: /home/sam/git/dtrace-utils-bad/build/usdt-tst-special] Error 1

Hacky debugging patch

$ git diff
diff --git a/libdtrace/dt_link.c b/libdtrace/dt_link.c
index b2148a8b..17cb8df2 100644
--- a/libdtrace/dt_link.c
+++ b/libdtrace/dt_link.c
@@ -1111,8 +1111,10 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 
 	scn_rel = NULL;
 	while ((scn_rel = elf_nextscn(elf, scn_rel)) != NULL) {
-		if (gelf_getshdr(scn_rel, &shdr_rel) == NULL)
+		if (gelf_getshdr(scn_rel, &shdr_rel) == NULL) {
+			__builtin_printf("process_obj: gelf_getshdr(scn_rel, &shdr_rel) failed\n");
 			goto err;
+                }
 
 		/*
 		 * Skip any non-relocation sections.
@@ -1120,8 +1122,10 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 		if (shdr_rel.sh_type != SHT_RELA && shdr_rel.sh_type != SHT_REL)
 			continue;
 
-		if ((data_rel = elf_getdata(scn_rel, NULL)) == NULL)
+		if ((data_rel = elf_getdata(scn_rel, NULL)) == NULL) {
+			__builtin_printf("process_obj: elf_getdata(scn_rel, NULL) failed\n");
 			goto err;
+		}
 
 		/*
 		 * Grab the section, section header and section data for the
@@ -1129,16 +1133,20 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 		 */
 		if ((scn_sym = elf_getscn(elf, shdr_rel.sh_link)) == NULL ||
 		    gelf_getshdr(scn_sym, &shdr_sym) == NULL ||
-		    (data_sym = elf_getdata(scn_sym, NULL)) == NULL)
+		    (data_sym = elf_getdata(scn_sym, NULL)) == NULL) {
+			__builtin_printf("process_obj: section grab/hdr/data failed\n");
 			goto err;
+		}
 
 		/*
 		 * Ditto for that symbol table's string table.
 		 */
 		if ((scn_str = elf_getscn(elf, shdr_sym.sh_link)) == NULL ||
 		    gelf_getshdr(scn_str, &shdr_str) == NULL ||
-		    (data_str = elf_getdata(scn_str, NULL)) == NULL)
+		    (data_str = elf_getdata(scn_str, NULL)) == NULL) {
+			__builtin_printf("process_obj: string table\n");
 			goto err;
+		}
 
 		/*
 		 * Grab the section, section header and section data for the
@@ -1148,8 +1156,10 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 		 */
 		if ((scn_tgt = elf_getscn(elf, shdr_rel.sh_info)) == NULL ||
 		    gelf_getshdr(scn_tgt, &shdr_tgt) == NULL ||
-		    (data_tgt = elf_getdata(scn_tgt, NULL)) == NULL)
+		    (data_tgt = elf_getdata(scn_tgt, NULL)) == NULL) {
+			__builtin_printf("process_obj: target reloc search failed\n");
 			goto err;
+		}
 
 		/*
 		 * We're looking for relocations to symbols matching this form:
@@ -1200,6 +1210,7 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 			if (gelf_getsym(data_sym, GELF_R_SYM(rela.r_info),
 			    &rsym) == NULL) {
 				dt_strtab_destroy(strtab);
+				__builtin_printf("process_obj: failed after destroy\n");
 				goto err;
 			}
 
@@ -1211,6 +1222,7 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 			if (dt_elf_symtab_lookup(data_sym, isym, rela.r_offset,
 			    shdr_rel.sh_info, &fsym) != 0) {
 				dt_strtab_destroy(strtab);
+				__builtin_printf("process_obj: failed after destroy 2\n");
 				goto err;
 			}
 
@@ -1219,6 +1231,7 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 
 			if (fsym.st_name > data_str->d_size) {
 				dt_strtab_destroy(strtab);
+				__builtin_printf("process_obj: failed after destroy 3\n");
 				goto err;
 			}
 
@@ -1238,6 +1251,7 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 			    objkey, s) + 1;
 			if ((p = dt_alloc(dtp, len)) == NULL) {
 				dt_strtab_destroy(strtab);
+				__builtin_printf("process_obj: failed after destroy 4\n");
 				goto err;
 			}
 			snprintf(p, len, dt_symfmt, dt_symprefix, objkey, s);
@@ -1270,12 +1284,15 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 
 			dt_strtab_destroy(strtab);
 
-			if ((pair = dt_alloc(dtp, sizeof(*pair))) == NULL)
+			if ((pair = dt_alloc(dtp, sizeof(*pair))) == NULL) {
+				__builtin_printf("process_obj: failed dt_alloc pair\n");
 				goto err;
+			}
 
 			if ((pair->dlp_str = dt_alloc(dtp, data_str->d_size +
 			    len)) == NULL) {
 				dt_free(dtp, pair);
+				__builtin_printf("process_obj: failed dt_alloc pair->dlp_str\n");
 				goto err;
 			}
 
@@ -1283,6 +1300,7 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 			    nsym * symsize)) == NULL) {
 				dt_free(dtp, pair->dlp_str);
 				dt_free(dtp, pair);
+				__builtin_printf("process_obj: failed dt_alloc pair->dlp_sym\n");
 				goto err;
 			}
 
@@ -1333,8 +1351,10 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 			ndx = GELF_R_SYM(rela.r_info);
 
 			if (gelf_getsym(data_sym, ndx, &rsym) == NULL ||
-			    rsym.st_name > data_str->d_size)
+			    rsym.st_name > data_str->d_size) {
+				__builtin_printf("process_obj: failed getsym/size check\n");
 				goto err;
+			}
 
 			s = (char *)data_str->d_buf + rsym.st_name;
 
@@ -1358,12 +1378,16 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 				dt_dprintf("normal probe\n");
 			}
 
-			if (*s++ != '_')
+			if (*s++ != '_') {
+				__builtin_printf("process_obj: not _\n");
 				goto err;
+			}
 
 			if ((p = strstr(s, "___")) == NULL ||
-			    p - s >= sizeof(pname))
+			    p - s >= sizeof(pname)) {
+				__builtin_printf("process_obj: not __ or bad size\n");
 				goto err;
+			}
 
 			memcpy(pname, s, p - s);
 			pname[p - s] = '\0';
@@ -1371,11 +1395,15 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 			p = strhyphenate(p + 3); /* strlen("___") */
 
 			if (dt_elf_symtab_lookup(data_sym, isym, rela.r_offset,
-			    shdr_rel.sh_info, &fsym) != 0)
+			    shdr_rel.sh_info, &fsym) != 0) {
+				__builtin_printf("process_obj: lookup failed after strhyphenate\n");
 				goto err;
+			}
 
-			if (fsym.st_name > data_str->d_size)
+			if (fsym.st_name > data_str->d_size) {
+				__builtin_printf("process_obj: bad size check on fsym\n");
 				goto err;
+			}
 
 			assert(GELF_ST_TYPE(fsym.st_info) == STT_FUNC);
 
@@ -1409,8 +1437,10 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 			} else if (strncmp(s, dt_symprefix,
 			    strlen(dt_symprefix)) == 0) {
 				r = s;
-				if ((s = strchr(s, '.')) == NULL)
+				if ((s = strchr(s, '.')) == NULL) {
+					__builtin_printf("process_obj: bad .\n");
 					goto err;
+				}
 				s++;
 			}
 
@@ -1425,8 +1455,10 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 			assert(fsym.st_value <= rela.r_offset);
 
 			off = rela.r_offset - fsym.st_value;
-			if (dt_modtext(dtp, data_tgt->d_buf, &rela, &off) != 0)
+			if (dt_modtext(dtp, data_tgt->d_buf, &rela, &off) != 0) {
+				__builtin_printf("process_obj: dt_modtext bad\n");
 				goto err;
+			}
 
 			if (dt_probe_define(pvp, prp, s, r, off, eprobe) != 0)
 				return dt_link_error(dtp, elf, fd, bufs,
@@ -1470,8 +1502,10 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
 		}
 	}
 
-	if (mod && elf_update(elf, ELF_C_WRITE) == -1)
+	if (mod && elf_update(elf, ELF_C_WRITE) == -1) {
+		__builtin_printf("process_obj: elf_update write failed\n");
 		goto err;
+	}
 
 	elf_end(elf);
 	close(fd);

Working

~/git/dtrace-utils-good $ make CFLAGS="-O2 -march=native" verbose=yes
# fine

What I don't get yet is:

  • it kind of looks like a classic race where something truncates/clobbers a file, but it happens with -j1
  • only happens with -g
  • expanding -march=native to -march=icelake-server -mabm -mno-pconfig -mno-sgx --param=l1-cache-line-size=64 --param=l1-cache-size=48 --param=l2-cache-size=49152 makes it work?

This is with gcc version 15.0.0 20240826 (experimental) 92c5265d22afaac146b2a7ecbc3dac9fc3382877 (Gentoo 15.0.9999 p, commit 24f7b8a07ce29ac39d8d3245a1ba7f7abf3dcfa1) but I reproduced it with GCC 14 and 13 too.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions