From 6a763b5fc5dba5751d58f9f8d49511f3a3775df0 Mon Sep 17 00:00:00 2001 From: agone Date: Tue, 27 Sep 2022 16:27:57 +0200 Subject: [PATCH] add outputformat: elf graph jsonl distinct jsonl-files for vertexes and edges replace tmp-output with fixed file names for better artifact handling --- src/elf/README.md | 1 + src/elf/generate_elf_graph.py | 128 +++++++++++++++++++++++++++++++++- src/elf/graph.config | 3 + 3 files changed, 130 insertions(+), 2 deletions(-) diff --git a/src/elf/README.md b/src/elf/README.md index a4447a752..abe90f4c9 100644 --- a/src/elf/README.md +++ b/src/elf/README.md @@ -8,6 +8,7 @@ The scripts can output in several formats. Currently supported: * Cypher (Neo4J) (default, this might change) * dot (graphviz), output in PNG and SVG +* jsonl (arangodb / arangoimport) This script originally comes from the following repository: diff --git a/src/elf/generate_elf_graph.py b/src/elf/generate_elf_graph.py index e688dfbf1..3be157398 100644 --- a/src/elf/generate_elf_graph.py +++ b/src/elf/generate_elf_graph.py @@ -250,6 +250,107 @@ def createcypher(outputdir, binaries, linked_libraries, pass +def createjsonl(outputdir, binaries, linked_libraries, + filename_to_full_path, elf_to_exported_symbols, + elf_to_imported_symbols, hashes): + + elf_to_placeholder = {} + placeholder_to_elf = {} + symbol_to_placeholder = {} + placeholder_to_symbol = {} + all_placeholder_names = set() + + for filename in binaries: + while True: + placeholdername = ''.join(secrets.choice(string.ascii_letters) for i in range(8)) + if placeholdername not in placeholder_to_elf and placeholdername not in all_placeholder_names: + placeholder_to_elf[placeholdername] = filename + break + elf_to_placeholder[filename] = placeholdername + all_placeholder_names.add(placeholdername) + + with open(outputdir.as_posix() + '/elf.jsonl', 'w') as fileopen: + for filename in binaries: + # define a node + fileopen.write('{ "_key" : "%s", "name" : "%s" }\n' % (elf_to_placeholder[filename], filename)) + + with open(outputdir.as_posix() + '/elf.linkswith.jsonl', 'w') as fileopen: + # add links between files + for filename in binaries: + if linked_libraries[filename] == []: + continue + + # record the dependencies that are linked + for l in linked_libraries[filename]: + libfound = False + if l in filename_to_full_path: + for fl in filename_to_full_path[l]: + # only record dependencies that + # are in the same collection of of binaries + if fl in binaries: + libfound = True + break + if not libfound: + # problem here, ignore for now + continue + + fileopen.write('{ "_from" : "elf/%s", "_to" : "elf/%s" }\n' % (elf_to_placeholder[filename], hashes[fl])) + + with open(outputdir.as_posix() + '/symbol.jsonl', 'w') as fileopen: + # then add all the exported symbols just once + tmpexportsymbols = set() + + for filename in binaries: + for exp in elf_to_exported_symbols[filename]: + # remove a few symbols that are not needed + if exp['size'] == 0: + continue + if exp['type'] == 'NOTYPE': + continue + tmpexportsymbols.add((exp['name'], exp['type'], exp['binding'])) + + for exp in tmpexportsymbols: + (symbolname, symboltype, symbolbinding) = exp + while True: + placeholdername = ''.join(secrets.choice(string.ascii_letters) for i in range(8)) + if placeholdername not in placeholder_to_symbol and placeholdername not in all_placeholder_names: + placeholder_to_symbol[placeholdername] = symbolname + break + symbol_to_placeholder[(symbolname, symboltype)] = placeholdername + all_placeholder_names.add(placeholdername) + fileopen.write('{ "_key" : "%s", "name" : "%s", "type" : "%s" }\n' % (symbol_to_placeholder[(symbolname, symboltype)], symbolname, symboltype)) + + with open(outputdir.as_posix() + '/symbol.exports.jsonl', 'w') as fileopen: + # then declare for all the symbols which are exported + for filename in binaries: + for exp in elf_to_exported_symbols[filename]: + # remove a few symbols that are not needed + if exp['size'] == 0: + continue + if exp['type'] == 'no_type': + continue + fileopen.write('{ "_from" : "elf/%s", "_to" : "symbol/%s" }\n' % (elf_to_placeholder[filename], symbol_to_placeholder[(exp['name'], exp['type'])])) + + # store which files use which symbols + + with open(outputdir.as_posix() + '/symbol.uses.jsonl', 'w') as fileopen: + for filename in binaries: + for imp in elf_to_imported_symbols[filename]: + if imp['size'] == 0: + continue + if imp['binding'] == 'local': + # skip LOCAL symbols + continue + if imp['binding'] == 'weak': + # skip WEAK symbols for now + continue + if (imp['name'], imp['type']) in symbol_to_placeholder: + fileopen.write('{ "_from" : "elf/%s", "_to" : "symbol/%s" }\n' % (elf_to_placeholder[filename], symbol_to_placeholder[(imp['name'], imp['type'])])) + else: + print('something is horribly wrong here...') + pass + + @click.command(short_help='process BANG result files and output ELF graphs') @click.option('--config-file', '-c', required=True, help='configuration file', type=click.File('r')) @click.option('--directory', '-d', 'result_directory', required=True, help='BANG result directory', type=click.Path(exists=True)) @@ -270,7 +371,7 @@ def main(config_file, result_directory, output, root_directory): sys.exit(1) #supported_formats = ['text', 'cypher', 'graphviz'] - supported_formats = ['cypher', 'dot'] + supported_formats = ['cypher', 'dot', 'jsonl'] # check the output format. By default it is cypher. outputformat = 'cypher' @@ -331,9 +432,28 @@ def main(config_file, result_directory, output, root_directory): file=sys.stderr) config_file.close() sys.exit(1) + if outputformat == 'jsonl': + if section == 'jsonl': + try: + outputdir = pathlib.Path(config.get(section, 'outputdir')) + except: + print("Directory to write jsonl files not configured", + file=sys.stderr) + config_file.close() + sys.exit(1) + if not outputdir.exists(): + print("Directory to write jsonl files does not exist", + file=sys.stderr) + config_file.close() + sys.exit(1) + if not outputdir.is_dir(): + print("Directory to write jsonl files is not a directory", + file=sys.stderr) + config_file.close() + sys.exit(1) config_file.close() - if outputformat == 'cypher': + if outputformat == 'cypher' or outputformat == 'jsonl': if outputdir is None: print("Directory to write output files to not configured", file=sys.stderr) @@ -515,6 +635,10 @@ def main(config_file, result_directory, output, root_directory): createdot(outputdir, binaries, linked_libraries, filename_to_full_path, elf_to_exported_symbols, elf_to_imported_symbols, hashes) + elif outputformat == 'jsonl': + createjsonl(outputdir, binaries, linked_libraries, + filename_to_full_path, elf_to_exported_symbols, + elf_to_imported_symbols, hashes) if __name__ == "__main__": main() diff --git a/src/elf/graph.config b/src/elf/graph.config index 5e38ada4c..42f68b91e 100644 --- a/src/elf/graph.config +++ b/src/elf/graph.config @@ -7,3 +7,6 @@ outputdir = /home/armijn/cypher outputdir = /home/armijn/dot [neo4j] + +[jsonl] +outputdir = /home/builder/jsonl