Blob


1 /*
2 * gcc2 name demangler.
3 *
4 * gcc2 follows the C++ Annotated Reference Manual section 7.2.1
5 * name mangling description with a few changes.
6 * See gpcompare.texi, gxxint_15.html in this directory for the changes.
7 *
8 * Not implemented:
9 * unicode mangling
10 * renaming of operator functions
11 */
12 /*
13 RULES TO ADD:
15 _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_
18 */
19 #include <u.h>
20 #include <libc.h>
21 #include <bio.h>
22 #include <mach.h>
24 #define debug 0
26 typedef struct Chartab Chartab;
27 struct Chartab
28 {
29 char c;
30 char *s;
31 };
33 static char*
34 chartabsearch(Chartab *ct, int c)
35 {
36 for(; ct->c; ct++)
37 if(ct->c == c)
38 return ct->s;
39 return nil;
40 }
42 static Chartab typetab[] =
43 {
44 'b', "bool",
45 'c', "char",
46 'd', "double",
47 'e', "...",
48 'f', "float",
49 'i', "int",
50 'J', "complex",
51 'l', "long",
52 'r', "long double",
53 's', "short",
54 'v', "void",
55 'w', "wchar_t",
56 'x', "long long",
57 0, 0
58 };
60 static Chartab modifiertab[] =
61 {
62 'C', "const",
63 'S', "signed", /* means static for member functions */
64 'U', "unsigned",
65 'V', "volatile",
67 'G', "garbage", /* no idea what this is */
68 0, 0
69 };
71 static char constructor[] = "constructor";
72 static char destructor[] = "destructor";
73 static char gconstructor[] = "$gconstructor"; /* global destructor */
74 static char gdestructor[] = "$gdestructor"; /* global destructor */
76 static char manglestarts[] = "123456789CFHQSUVt";
78 static int gccname(char**, char**);
79 static char *demanglegcc2a(char*, char*);
80 static char *demanglegcc2b(char*, char*);
81 static char *demanglegcc2c(char*, char*);
82 static int gccnumber(char**, int*, int);
84 char*
85 demanglegcc2(char *s, char *buf)
86 {
87 char *name, *os, *p, *t;
88 int isfn, namelen;
91 /*
92 * Pick off some cases that seem not to fit the pattern.
93 */
94 if((t = demanglegcc2a(s, buf)) != nil)
95 return t;
96 if((t = demanglegcc2b(s, buf)) != nil)
97 return t;
98 if((t = demanglegcc2c(s, buf)) != nil)
99 return t;
101 /*
102 * First, figure out whether this is a mangled name.
103 * The name begins with a short version of the name, then __.
104 * Of course, some C names begin with __ too, so the ultimate
105 * test is whether what follows __ looks reasonable.
106 * We use a test on the first letter instead.
108 * Constructors have no name - they begin __ (double underscore).
109 * Destructors break the rule - they begin _._ (underscore, dot, underscore).
110 */
111 os = s;
112 isfn = 0;
113 if(memcmp(s, "_._", 3) == 0){
114 isfn = 1;
115 name = destructor;
116 namelen = strlen(name);
117 s += 3;
118 }else if(memcmp(s, "_GLOBAL_.D.__", 13) == 0){
119 isfn = 1;
120 name = gdestructor;
121 namelen = strlen(name);
122 s += 13;
123 }else if(memcmp(s, "_GLOBAL_.D._", 12) == 0){
124 isfn = 0;
125 name = gdestructor;
126 namelen = strlen(name);
127 s += 12;
128 }else if(memcmp(s, "_GLOBAL_.I.__", 13) == 0){
129 isfn = 1;
130 name = gconstructor;
131 namelen = strlen(name);
132 s += 13;
133 }else if(memcmp(s, "_GLOBAL_.I._", 12) == 0){
134 isfn = 0;
135 name = gconstructor;
136 namelen = strlen(name);
137 s += 12;
138 }else{
139 t = strstr(os, "__");
140 if(t == nil)
141 return os;
142 do{
143 s = t;
144 if(strchr(manglestarts, *(s+2)))
145 break;
146 }while((t = strstr(s+1, "__")) != nil);
148 name = os;
149 namelen = s - os;
150 if(namelen == 0){
151 isfn = 1;
152 name = constructor;
153 namelen = strlen(name);
155 s += 2;
158 /*
159 * Now s points at the mangled crap (maybe).
160 * and name is the final element of the name.
161 */
162 if(strchr(manglestarts, *s) == nil)
163 return os;
165 p = buf;
166 if(*s == 'F'){
167 /* global function, no extra name pieces, just types */
168 isfn = 1;
169 }else{
170 /* parse extra name pieces */
171 if(!gccname(&s, &p)){
172 if(debug)
173 fprint(2, "parsename %s: %r\n", s);
174 return os;
177 /* if we have a constructor or destructor, try to use the C++ name */
178 t = nil;
179 if(name == constructor || name == destructor){
180 *p = 0;
181 t = strrchr(buf, ':');
182 if(t)
183 t++;
184 else
185 t = buf;
187 strcpy(p, "::");
188 p += 2;
189 if(t){
190 namelen = strlen(t)-2;
191 if(name == destructor)
192 *p++ = '~';
193 name = t;
196 if(p >= buf+2 && memcmp(p-2, "::", 2) == 0 && *(p-3) == ')')
197 p -= 2;
198 memmove(p, name, namelen);
199 p += namelen;
201 if(*s == 'F'){
202 /* might be from above, or might follow name pieces */
203 s++;
204 isfn = 1;
207 /* the rest of the name is argument types - could skip this */
208 if(*s || isfn){
209 *p++ = '(';
210 while(*s != 0 && *s != '_'){
211 if(!gccname(&s, &p))
212 break;
213 *p++ = ',';
215 if(*(p-1) == ',')
216 p--;
217 *p++ = ')';
220 if(*s == '_'){
221 /* return type (left over from H) */
224 *p = 0;
225 return buf;
228 /*
229 * _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_
230 * _t12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc.npos
231 * (maybe the funny syntax means they are private)
232 */
233 static char*
234 demanglegcc2a(char *s, char *buf)
236 char *p;
238 if(*s != '_' || strchr(manglestarts, *(s+1)) == nil)
239 return nil;
240 p = buf;
241 s++;
242 if(!gccname(&s, &p))
243 return nil;
244 if(*s != '.')
245 return nil;
246 s++;
247 strcpy(p, "::");
248 p += 2;
249 strcpy(p, s);
250 return buf;
253 /*
254 * _tfb => type info for bool
255 * __vt_7ostream => vtbl for ostream
256 */
257 static char*
258 demanglegcc2b(char *s, char *buf)
260 char *p;
261 char *t;
263 if(memcmp(s, "__ti", 4) == 0){
264 t = "$typeinfo";
265 s += 4;
266 }else if(memcmp(s, "__tf", 4) == 0){
267 t = "$typeinfofn";
268 s += 4;
269 }else if(memcmp(s, "__vt_", 5) == 0){
270 t = "$vtbl";
271 s += 5;
272 }else
273 return nil;
275 p = buf;
276 for(;;){
277 if(*s == 0 || !gccname(&s, &p))
278 return nil;
279 if(*s == 0)
280 break;
281 if(*s != '.' && *s != '$')
282 return nil;
283 strcpy(p, "::");
284 p += 2;
285 s++;
287 strcpy(p, "::");
288 p += 2;
289 strcpy(p, t);
290 return buf;
293 /*
294 * __thunk_176__._Q210LogMessage9LogStream => thunk (offset -176) for LogMessage::LogStream
295 */
296 static char*
297 demanglegcc2c(char *s, char *buf)
299 int n;
300 char *p;
302 if(memcmp(s, "__thunk_", 8) != 0)
303 return nil;
304 s += 8;
305 if(!gccnumber(&s, &n, 1))
306 return nil;
307 if(memcmp(s, "__._", 4) != 0) /* might as well be morse code */
308 return nil;
309 s += 4;
310 p = buf;
311 if(!gccname(&s, &p))
312 return nil;
313 strcpy(p, "::$thunk");
314 return buf;
317 /*
318 * Parse a number, a non-empty run of digits.
319 * If many==0, then only one digit is used, even
320 * if it is followed by more. When we need a big
321 * number in a one-digit slot, it gets bracketed by underscores.
322 */
323 static int
324 gccnumber(char **ps, int *pn, int many)
326 char *s;
327 int n, eatunderscore;
329 s = *ps;
330 eatunderscore = 0;
331 if(!many && *s == '_'){
332 many = 1;
333 s++;
334 eatunderscore = 1;
336 if(!isdigit((uchar)*s)){
337 bad:
338 werrstr("bad number %.20s", *ps);
339 return 0;
341 if(many)
342 n = strtol(s, &s, 10);
343 else
344 n = *s++ - '0';
345 if(eatunderscore){
346 if(*s != '_')
347 goto bad;
348 s++;
350 *ps = s;
351 *pn = n;
352 return 1;
355 /*
356 * Pick apart the next mangled name section.
357 * Names and types are treated as the same.
358 * Let's see how far we can go before that becomes a problem.
359 */
360 static int
361 gccname(char **ps, char **pp)
363 int i, n, m, val;
364 char *os, *s, *t, *p, *p0, *p1;
366 s = *ps;
367 os = s;
368 p = *pp;
370 /* print("\tgccname: %s\n", s); */
372 /* basic types */
373 if((t = chartabsearch(typetab, *s)) != nil){
374 s++;
375 strcpy(p, t);
376 p += strlen(t);
377 goto out;
380 /* modifiers */
381 if((t = chartabsearch(modifiertab, *s)) != nil){
382 s++;
383 if(!gccname(&s, &p))
384 return 0;
385 /*
386 * These don't end up in the right place
387 * and i don't care anyway
388 * (AssertHeld__C17ReaderWriterMutex)
389 */
390 /*
391 *p++ = ' ';
392 strcpy(p, t);
393 p += strlen(p);
394 */
395 goto out;
398 switch(*s){
399 default:
400 bad:
401 if(debug)
402 fprint(2, "gccname: %s (%s)\n", os, s);
403 werrstr("bad name %.20s", s);
404 return 0;
406 case '1': case '2': case '3': case '4': /* length-prefixed string */
407 case '5': case '6': case '7': case '8': case '9':
408 if(!gccnumber(&s, &n, 1))
409 return 0;
410 memmove(p, s, n);
411 p += n;
412 s += n;
413 break;
415 case 'A': /* array */
416 t = s;
417 s++;
418 if(!gccnumber(&s, &n, 1))
419 return 0;
420 if(*s != '_'){
421 werrstr("bad array %.20s", t);
422 return 0;
424 s++;
425 sprint(p, "array[%d] ", n);
426 p += strlen(p);
427 break;
429 case 'F': /* function */
430 t = s;
431 s++;
432 strcpy(p, "fn(");
433 p += 3;
434 /* arguments */
435 while(*s && *s != '_')
436 if(!gccname(&s, &p))
437 return 0;
438 if(*s != '_'){
439 werrstr("unexpected end in function: %s", t);
440 return 0;
442 s++;
443 strcpy(p, " => ");
444 p += 4;
445 /* return type */
446 if(!gccname(&s, &p))
447 return 0;
448 *p++ = ')';
449 break;
451 case 'H': /* template specialization */
452 if(memcmp(s-2, "__", 2) != 0)
453 fprint(2, "wow: %s\n", s-2);
454 t = s;
455 s++;
456 if(!gccnumber(&s, &n, 0))
457 return 0;
458 p0 = p;
459 /* template arguments */
460 *p++ = '<';
461 for(i=0; i<n; i++){
462 val = 1;
463 if(*s == 'Z'){ /* argument is a type, not value */
464 val = 0;
465 s++;
467 if(!gccname(&s, &p))
468 return 0;
469 if(val){
470 if(!gccnumber(&s, &m, 1)) /* gccnumber: 1 or 0? */
471 return 0;
472 sprint(p, "=%d", m);
473 p += strlen(p);
475 if(i+1<n)
476 *p++ = ',';
478 *p++ = '>';
479 if(*s != '_'){
480 werrstr("bad template %s", t);
481 return 0;
483 s++;
485 /*
486 * Can't seem to tell difference between a qualifying name
487 * and arguments. Not sure which is which. It appears that if
488 * you get a name, use it, otherwise look for types.
489 * The G type qualifier appears to have no effect other than
490 * turning an ambiguous name into a definite type.
492 * SetFlag__H1Zb_P15FlagSettingMode_v
493 * => void SetFlag<bool>(FlagSettingMode *)
494 * SetFlag__H1Zb_15FlagSettingMode_v
495 * => void FlagSettingMode::SetFlag<bool>()
496 * SetFlag__H1Zb_G15FlagSettingMode_v
497 * => void SetFlag<bool>(FlagSettingMode)
498 */
499 if(strchr("ACFGPRSUVX", *s)){
500 /* args */
501 t = s;
502 p1 = p;
503 *p++ = '(';
504 while(*s != '_'){
505 if(*s == 0 || !gccname(&s, &p)){
506 werrstr("bad H args: %s", t);
507 return 0;
510 *p++ = ')';
511 s++;
512 }else{
513 p1 = p;
514 /* name */
515 if(!gccname(&s, &p))
516 return 0;
518 /*
519 * Need to do some rearrangement of <> () and names here.
520 * Doesn't matter since we strip out the <> and () anyway.
521 */
522 break;
524 case 'M': /* M1S: pointer to member */
525 if(*(s+1) != '1' || *(s+2) != 'S')
526 goto bad;
527 s += 3;
528 strcpy(p, "mptr ");
529 p += 5;
530 if(!gccname(&s, &p))
531 return 0;
532 break;
534 case 'N': /* multiply-repeated type */
535 s++;
536 if(!gccnumber(&s, &n, 0) || !gccnumber(&s, &m, 0))
537 return 0;
538 sprint(p, "T%dx%d", m, n);
539 p += strlen(p);
540 break;
542 case 'P': /* pointer */
543 s++;
544 strcpy(p, "ptr ");
545 p += 4;
546 if(!gccname(&s, &p))
547 return 0;
548 break;
550 case 'Q': /* qualified name */
551 s++;
552 if(!gccnumber(&s, &n, 0))
553 return 0;
554 for(i=0; i<n; i++){
555 if(!gccname(&s, &p)){
556 werrstr("in hierarchy: %r");
557 return 0;
559 if(i+1 < n){
560 strcpy(p, "::");
561 p += 2;
564 break;
566 case 'R': /* reference */
567 s++;
568 strcpy(p, "ref ");
569 p += 4;
570 if(!gccname(&s, &p))
571 return 0;
572 break;
574 case 't': /* class template instantiation */
575 /* should share code with case 'H' */
576 t = s;
577 s++;
578 if(!gccname(&s, &p))
579 return 0;
580 if(!gccnumber(&s, &n, 0))
581 return 0;
582 p0 = p;
583 /* template arguments */
584 *p++ = '<';
585 for(i=0; i<n; i++){
586 val = 1;
587 if(*s == 'Z'){ /* argument is a type, not value */
588 val = 0;
589 s++;
591 if(!gccname(&s, &p))
592 return 0;
593 if(val){
594 if(!gccnumber(&s, &m, 1)) /* gccnumber: 1 or 0? */
595 return 0;
596 sprint(p, "=%d", m);
597 p += strlen(p);
599 if(i+1<n)
600 *p++ = ',';
602 *p++ = '>';
603 break;
605 case 'T': /* once-repeated type */
606 s++;
607 if(!gccnumber(&s, &n, 0))
608 return 0;
609 sprint(p, "T%d", n);
610 p += strlen(p);
611 break;
613 case 'X': /* type parameter in 'H' */
614 if(!isdigit((uchar)*(s+1)) || !isdigit((uchar)*(s+2)))
615 goto bad;
616 memmove(p, s, 3);
617 p += 3;
618 s += 3;
619 break;
622 USED(p1);
623 USED(p0);
625 out:
626 *ps = s;
627 *pp = p;
628 return 1;