-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocessor.rb
More file actions
113 lines (90 loc) · 2.75 KB
/
preprocessor.rb
File metadata and controls
113 lines (90 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
require 'digest'
require 'json'
require 'fileutils'
require_relative 'index_database'
class Preprocessor
ADDR_POS_INDEX = "pos.idx"
VAL_ADDR_INDEX = "val_addr.idx"
ADDR_REF_INDEX = "addr_ref.idx"
def initialize(ruby_heap_dump_file, cache_location: '.')
@heap_file = ruby_heap_dump_file
@cache_location = cache_location
end
def process
@digest = calculate_digest
if was_processed?(@digest)
return
end
build_index_db(@digest)
end
def get_db
IndexDatabase.new(heap_file: @heap_file,
addr_pos_path: addr_pos_path,
val_addr_path: val_addr_path,
addr_ref_path: addr_ref_path).tap do |db|
db.load
end
end
private
def addr_pos_path
File.join(@cache_location, @digest, ADDR_POS_INDEX)
end
def val_addr_path
File.join(@cache_location, @digest, VAL_ADDR_INDEX)
end
def addr_ref_path
File.join(@cache_location, @digest, ADDR_REF_INDEX)
end
def build_index_db(digest)
path = File.join(@cache_location, digest)
FileUtils.mkpath(path)
# build three index mappings:
# 1. given `address`, have file position in original file which reference it
# 2. given `value`, have `address` which contain it
# 3. given `address`, have `address` which reference it
addr_pos = {}
val_addr = {}
addr_ref = {}
File.open(@heap_file) do |f|
prev_pos = f.pos
f.each_line do |line|
data = JSON.parse(line)
# TODO: this line should be refactored
addr = data["address"] || data["root"] # guess it may be root (if no addr)
addr_pos[addr] = prev_pos
prev_pos = f.pos
if value = data["value"]
val_addr[value] ||= []
val_addr[value] << addr
end
end
end
File.open(@heap_file).each_line do |line|
data = JSON.parse(line)
if references = data["references"]
# TODO: this line should be refactored
addr = data["address"] || data["root"] # guess it may be root (if no addr)
references.each do |ref|
addr_ref[ref] ||= []
addr_ref[ref] << addr
end
end
end
File.open(addr_pos_path, "w") { |f| f.write(addr_pos.to_json) }
File.open(val_addr_path, "w") { |f| f.write(val_addr.to_json) }
File.open(addr_ref_path, "w") { |f| f.write(addr_ref.to_json) }
end
def calculate_digest
# use md5 digest to check
md5 = Digest::MD5.new
File.open(@heap_file).each_line do |line|
md5 << line
end
md5.hexdigest
end
def was_processed?(digest)
# check our cache folder existance
# it would be better to check the content inside
File.directory?(File.join(@cache_location, digest))
end
end