Remove old callgraph scripts; recommend calltool instead.

This commit is contained in:
Nick Mathewson 2017-06-14 17:44:15 -04:00
parent 3f40d9ec20
commit 80ad374b84
4 changed files with 3 additions and 324 deletions

View File

@ -226,17 +226,10 @@ performance! See the gperftools manual for more info, but basically:
Generating and analyzing a callgraph Generating and analyzing a callgraph
------------------------------------ ------------------------------------
1. Run `./scripts/maint/generate_callgraph.sh`. This will generate a 0. Build Tor on linux or mac, ideally with -O0 or -fno-inline.
bunch of files in a new ./callgraph directory.
2. Run `./scripts/maint/analyze_callgraph.py callgraph/src/*/*`. This 1. Clone 'https://gitweb.torproject.org/user/nickm/calltool.git/' .
will do a lot of graph operations and then dump out a new Follow the README in that repository.
`callgraph.pkl` file, containing data in Python's 'pickle' format.
3. Run `./scripts/maint/display_callgraph.py`. It will display:
- the number of functions reachable from each function.
- all strongly-connnected components in the Tor callgraph
- the largest bottlenecks in the largest SCC in the Tor callgraph.
Note that currently the callgraph generator can't detect calls that pass Note that currently the callgraph generator can't detect calls that pass
through function pointers. through function pointers.

View File

@ -1,259 +0,0 @@
#!/usr/bin/python
import re
import sys
import copy
import cPickle
import os
class Parser:
def __init__(self):
self.calls = {}
self.definedIn = {}
def enter_func(self, name):
if self.infunc and not self.extern and self.calledfns:
if self.infunc in self.definedIn:
#print "{}: {} or {}?".format(
# self.infunc, self.definedIn[self.infunc], self.module)
self.definedIn[self.infunc] = 'nil'
else:
self.definedIn[self.infunc] = self.module
self.calls.setdefault(self.infunc, set()).update( self.calledfns )
self.calledfns = set()
self.infunc = name
self.extern = False
def parse_callgraph_file(self, inp, module):
self.infunc = None
self.extern = False
self.calledfns = set()
self.module = module
for line in inp:
m = re.match(r"Call graph node for function: '([^']+)'", line)
if m:
self.enter_func(m.group(1))
continue
m = re.match(r" CS<[^>]+> calls external node", line)
if m:
self.extern = True
m = re.match(r" CS<[^>]+> calls function '([^']+)'", line)
if m:
self.calledfns.add(m.group(1))
self.enter_func(None)
def extract_callgraph(self):
c = self.calls
self.calls = {}
return c
def transitive_closure(g):
passno = 0
changed = True
g = copy.deepcopy(g)
import random
while changed:
passno += 1
changed = False
keys = g.keys()
idx = 0
for k in keys:
idx += 1
print "Pass %d/?: %d/%d\r" %(passno, idx, len(keys)),
sys.stdout.flush()
newset = g[k].copy()
for fn in g[k]:
newset.update(g.get(fn, set()))
if len(newset) != len(g[k]):
g[k].update( newset )
changed = True
print
return g
def strongly_connected_components(g):
# From https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm, done stupidly.
index_of = {}
index = [ 0 ]
lowlink = {}
S = []
onStack = set()
all_sccs = []
def strongconnect(fn):
index_of[fn] = index[0]
lowlink[fn] = index[0]
index[0] += 1
S.append(fn)
onStack.add(fn)
for w in g.get(fn, []):
if w not in index_of:
strongconnect(w)
lowlink[fn] = min(lowlink[fn], lowlink[w])
elif w in onStack:
lowlink[fn] = min(lowlink[fn], index_of[w])
if lowlink[fn] == index_of[fn]:
this_scc = []
all_sccs.append(this_scc)
while True:
w = S.pop()
onStack.remove(w)
this_scc.append(w)
if w == fn:
break
for v in g.keys():
if v not in index_of:
strongconnect(v)
return all_sccs
def biggest_component(sccs):
return max(len(c) for c in sccs)
def connection_bottlenecks(callgraph):
callers = {}
for fn in callgraph:
for fn2 in callgraph[fn]:
callers.setdefault(fn2, set()).add(fn)
components = strongly_connected_components(callgraph)
components.sort(key=len)
big_component_fns = components[-1]
size = len(big_component_fns)
function_bottlenecks = fn_results = []
total = len(big_component_fns)
idx = 0
for fn in big_component_fns:
idx += 1
print "Pass 1/3: %d/%d\r"%(idx, total),
sys.stdout.flush()
cg2 = copy.deepcopy(callgraph)
del cg2[fn]
fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn) )
print
bcf_set = set(big_component_fns)
call_bottlenecks = fn_results = []
result_set = set()
total = len(big_component_fns)
idx = 0
for fn in big_component_fns:
fn_callers = callers[fn].intersection(bcf_set)
idx += 1
if len(fn_callers) != 1:
continue
print "Pass 2/3: %d/%d\r"%(idx, total),
sys.stdout.flush()
caller = fn_callers.pop()
assert len(fn_callers) == 0
cg2 = copy.deepcopy(callgraph)
cg2[caller].remove(fn)
fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn, "called by", caller) )
result_set.add( (caller, fn) )
print
total = len(big_component_fns)
idx = 0
for fn in big_component_fns:
fn_calls = callgraph[fn].intersection(bcf_set)
idx += 1
if len(fn_calls) != 1:
continue
print "Pass 3/3: %d/%d\r"%(idx, total),
sys.stdout.flush()
callee = fn_calls.pop()
if (fn, callee) in result_set:
continue
assert len(fn_calls) == 0
cg2 = copy.deepcopy(callgraph)
cg2[fn].remove(callee)
fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), callee, "called by", fn) )
print
return (function_bottlenecks, call_bottlenecks)
if __name__ == '__main__':
p = Parser()
for fname in sys.argv[1:]:
modname = re.sub(r'.*/', '', fname).replace('.callgraph', '.c')
with open(fname, 'r') as f:
p.parse_callgraph_file(f, modname)
sys.stdout.flush()
print "Building callgraph"
callgraph = p.extract_callgraph()
inModule = p.definedIn
print "Deriving module callgraph"
modCallgraph = {}
for fn in callgraph:
fnMod = inModule[fn]
for called in callgraph[fn]:
try:
calledMod = inModule[called]
except KeyError:
continue
modCallgraph.setdefault(fnMod, set()).add(calledMod)
del modCallgraph['nil']
print "Finding strongly connected components"
sccs = strongly_connected_components(callgraph)
print "Finding the transitive closure of the callgraph.."
closure = transitive_closure(callgraph)
print "Finding bottlenecks..."
bottlenecks = connection_bottlenecks(callgraph)
print "Finding module SCCs"
modSCCS = strongly_connected_components(modCallgraph)
print "Finding module TC"
modTC = transitive_closure(modCallgraph)
print "Finding module bottlenecks"
modB = connection_bottlenecks(modCallgraph)
data = {
'callgraph' : callgraph,
'sccs' : sccs,
'closure' : closure,
'bottlenecks' : bottlenecks,
'modules' : p.definedIn,
'modItems' : {
'callgraph' : modCallgraph,
'sccs' : modSCCS,
'closure' : modTC,
'bottlenecks' : modB,
}
}
with open('callgraph.pkl', 'w') as f:
cPickle.dump(data, f)

View File

@ -1,41 +0,0 @@
#!/usr/bin/python
import cPickle
data = cPickle.load(open("callgraph.pkl"))
# data = data['modItems']
callgraph = data['callgraph']
closure = data['closure']
sccs = data['sccs']
fn_bottle, call_bottle = data['bottlenecks']
for n_reachable, fn in sorted(list((len(r), fn) for fn, r in closure.iteritems())):
print "%s can reach %s other functions." %(fn, n_reachable)
c = [ (len(component), component) for component in sccs ]
c.sort()
print "\n================================"
for n, component in c:
if n < 2:
continue
print "Strongly connected component of size %d:"%n
print component
print "\n================================"
print "====== Number of functions pulled into blob, by function in blob."
fn_bottle.sort()
for n, fn in fn_bottle[-30:]:
print "%3d: %s"%(n, fn)
print "====== Number of functions pulled into blob, by call in blob."
call_bottle.sort()
for n, fn1, _, fn2 in call_bottle[-30:]:
print "%3d: %s -> %s "%(n, fn2, fn1)

View File

@ -1,14 +0,0 @@
#!/bin/sh
C_FILES=`echo src/common/*.c src/or/*.c src/tools/*.c`
CFLAGS="-Isrc/ext/trunnel -Isrc/trunnel -I. -Isrc/ext -Isrc/common -DLOCALSTATEDIR=\"\" -DSHARE_DATADIR=\"\" -Dinline="
mkdir -p callgraph/src/common
mkdir -p callgraph/src/or
mkdir -p callgraph/src/tools
for fn in $C_FILES; do
echo $fn
clang $CFLAGS -S -emit-llvm -fno-inline -o - $fn | \
opt -analyze -print-callgraph >/dev/null 2> "callgraph/${fn}allgraph"
done