forked from shizmob/smol
-
Notifications
You must be signed in to change notification settings - Fork 4
/
smoldd.py
executable file
·224 lines (182 loc) · 8.01 KB
/
smoldd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#!/usr/bin/env python3
import os.path, struct, sys
import argparse, glob, shutil, subprocess
import smol.hackyelf as hackyelf
import smol.linkmap as linkmap
from smol.shared import *
from smol.parse import *
# TODO: support for hashes that aren't djb2
def readbyte(blob, off): return struct.unpack('<B', blob[off:off+1])[0], (off+1)
def readshort(blob, off):return struct.unpack('<H', blob[off:off+2])[0], (off+2)
def readint(blob, off): return struct.unpack('<I', blob[off:off+4])[0], (off+4)
def readlong(blob, off): return struct.unpack('<Q', blob[off:off+8])[0], (off+8)
def readstr(blob, off):
text = bytearray()
while True:
char, off = readbyte(blob, off)
if char == 0:
break
text.append(char)
return text.decode('utf-8'), off
def get_def_libpaths(cc_bin, is32bit):
# FIXME: HACK
if is32bit: return ['/usr/lib32/','/lib32/']
return get_cc_paths(cc_bin)['libraries']
def find_libs(deflibs, libname):
dirs = os.environ.get('LD_LIBRARY_PATH','').split(':') + deflibs
for d in dirs:
for f in glob.glob(glob.escape("%s/%s" % (d, libname)) + '*'):
yield f
def build_hashtab(readelf_bin, lib, hashid):
#symbols = list_symbols(readelf_bin, lib)
symbols = build_symbol_map(readelf_bin, dict({lib:lib}))
#print(repr(symbols))
hashfn = get_hash_fn(hashid)
return { hashfn(symbol[0]):symbol[0] for symbol in symbols.items() }
def addr2off(elf, addr):
for x in elf.phdrs:
if x.ptype != hackyelf.PT_LOAD: continue
if addr >= x.vaddr and addr < x.vaddr + x.memsz:
aoff = addr - x.vaddr
assert aoff < x.filesz, ".bss address!"
return aoff + x.off
error("E: Address %08x not in the static address range!" % addr)
def get_needed_libs(elf, blob):
assert elf.dyn is not None, "No DYNAMIC table present in the ELF file!"
strtabs = [x.val for x in elf.dyn if x.tag == hackyelf.DT_STRTAB]
assert len(strtabs) == 1, "Only one DT_STRTAB may be present in an ELF file."
strtab = strtabs[0]
return [readstr(blob,addr2off(elf, strtab+x.val))[0]
for x in elf.dyn if x.tag == hackyelf.DT_NEEDED]
def get_hashtbl(elf, blob, args):
htaddr = None
if args.map is not None:
lmap = linkmap.parse(args.map.read())
tabs = [x for x in lmap.mmap if x.sym == '_symbols']
assert len(tabs) == 1, "One '_symbols' symbol must be present."
htaddr = tabs[0].org
elif elf.is32bit:
txtoff = addr2off(elf, elf.entry)
# scan for 'push IMM32'
while blob[txtoff] != 0x68:
txtoff = txtoff + 1
assert txtoff < len(blob), "wtf??? (can't find a push IMM32 instruction which pushes the hashtable address)"
txtoff = txtoff + 1
#eprintf("Hash table offset: 0x%08x?" % txtoff)
htaddr, ___ = readint(blob, txtoff)
else: # 64-bit
txtoff = addr2off(elf, elf.entry)
# scan for 'push IMM32'
# but the first one we'll encounter pushes the entrypoint addr!
while blob[txtoff] != 0x68:
txtoff = txtoff + 1
assert txtoff < len(blob), "wtf??? (can't find a push IMM32 instruction which pushes the hashtable or entrypoint address)"
txtoff = txtoff + 1
# except, this is actually the value we're looking for when the binary
# had been linked with -fuse-dnload-loader! so let's just check the
# value
htaddr, ___ = readint(blob, txtoff)
#eprintf("ELF entry == 0x%08x" % elf.entry)
if htaddr == elf.entry:
# now we can look for the interesting address
while blob[txtoff] != 0x68:
txtoff = txtoff + 1
assert txtoff < len(blob), "wtf??? (can't find a push IMM32 instruction which pushes the hashtable address)"
txtoff = txtoff + 1
#eprintf("Hash table offset: 0x%08x?" % txtoff)
htaddr, ___ = readint(blob, txtoff)
else:
pass#eprintf("Hash table offset: 0x%08x?" % txtoff)
assert htaddr is not None, "wtf? (no hashtable address)"
#eprintf("Hash table address: 0x%08x" % htaddr)
htoff = addr2off(elf, htaddr)
#eprintf("Hash table offset: 0x%08x" % htoff)
tbl = []
while True:
hashsz = 2 if elf.is32bit and args.hash16 else 4
#eprintf("sym from 0x%08x" % htoff)
#eprintf("sym end at 0x%08x, blob end at 0x%08x" % (htoff+hashsz, len(blob)))
if htoff+hashsz > len(blob):
#eprintf("htoff = 0x%08x, len=%08x" % (htoff, len(blob)))
if len(blob) <= htoff and len(tbl) > 0:
break
#if elf.is32bit:
if readbyte(blob, htoff)[0] == 0:
break
else:
assert False, "AAAAA rest is %s" % repr(blob[htoff:])
#else:
# if struct.unpack('<H', blob[htoff:htoff+2])[0] == 0:
# break
# else:
# assert False, "AAAAA rest is %s" % repr(blob[htoff:])
val, ___ = (readshort if hashsz == 2 else readint)(blob, htoff)
if (val & (0xFF if args.break_on_zerobyte else 0xFFFF)) == 0:
break
tbl.append(val)
#eprintf("sym %08x" % val)
htoff = htoff + (4 if elf.is32bit else 8)
return tbl
def do_smoldd_run(args):
blob = args.input.read()
elf = hackyelf.parse(blob)
if elf.dyn is not None and any(d.tag == hackyelf.DT_JMPREL for d in elf.dyn):
# TODO
print("Binaries using -fuse-dlfixup-loader are not yet supported by "+\
"smoldd. For a temporary workaround, use strings(1) instead.")
return 1
deflibs = get_def_libpaths(args.cc, elf.is32bit)
needed = get_needed_libs(elf, blob)
neededpaths = dict((l,list(find_libs(deflibs, l))[0]) for l in needed)
htbl = get_hashtbl(elf, blob, args)
hashid = get_hash_id(args.hash16, args.crc32c)
libhashes = dict((l, build_hashtab(args.readelf, neededpaths[l], hashid)) for l in needed)
hashresolves = dict({})
noresolves = []
for x in htbl:
done = False
for l, v in libhashes.items():
if x in v:
hashresolves.setdefault(l, {})[x] = v[x]
done = True
break
if not done:
noresolves.append(x)
for l, v in hashresolves.items():
print("%s:" % l)
for x in v.keys():
print("\t%08x -> %s" % (x, v[x]))
if len(noresolves) > 0:
print("UNRESOLVED:")
for x in noresolves:
print("\t%08x" % x)
return 0
def main():
parser = argparse.ArgumentParser()
parser.add_argument('input', type=argparse.FileType('rb'),
default=sys.stdin.buffer, help="input file")
parser.add_argument('--cc',
default=shutil.which('cc'), help="C compiler binary")
parser.add_argument('--readelf',
default=shutil.which('readelf'), help="readelf binary")
parser.add_argument('--map', type=argparse.FileType('r'), help=\
"Get the address of the symbol hash table from the "+\
"linker map output instead of attempting to parse the"+\
" binary.")
hashgrp = parser.add_mutually_exclusive_group()
hashgrp.add_argument('-s', '--hash16', default=False, action='store_true', \
help="Use 16-bit (BSD2) hashes instead of 32-bit djb2 hashes. "+\
"Only usable for 32-bit output.")
hashgrp.add_argument('-c', '--crc32c', default=False, action='store_true', \
help="Use Intel's crc32 intrinsic for hashing. Conflicts with `--hash16'.")
hashgrp.add_argument('-B', '--break-on-zerobyte', default=False, action='store_true', \
help="Specify that the hash table ends on an entry with a LSByte of 0"+\
" (by default, smoldd checks the lowest 2 bytes).")
args = parser.parse_args()
return do_smoldd_run(args)
if __name__ == '__main__':
rv = main()
if rv is None: pass
else:
try: sys.exit(int(rv))
except Exception: sys.exit(1)