Module: Google::SafeBrowsingParser
- Extended by:
- SafeBrowsingParser
- Included in:
- SafeBrowsingParser
- Defined in:
- lib/google/safe_browsing_parser.rb
Constant Summary collapse
- CHUNKNUM_SIZE =
Bytes
HOST_KEY_SIZE = 4
- FULL_HASH_SIZE =
Bytes, 256 bit
32
Instance Method Summary collapse
- #parse_add_data(byte_arr, hash_len) ⇒ Object
- #parse_count_number(char) ⇒ Object
- #parse_full_hash_data(byte_arr, full_hash_arr) ⇒ Object
-
#parse_full_hash_entries(str) ⇒ Object
Returns { ‘goog-malware-shavar’ (list_name) => { :add_chunk_num1 => [full_hash0, full_hash1, …] :add_chunk_num2 => [full_hash0] } }.
- #parse_hash_prefix(char_arr, hash_len) ⇒ Object
- #parse_host_key(char_arr) ⇒ Object
-
#parse_shavar_list(str, test_mode = false) ⇒ Object
The shavar list data has the following structure For ADD { :chunk_num => 343243, :hash_len => 4, :chunk_len => 4343, :chunk_data => { :host_key_one => [prefix0, prefix1, …], :host_key_two => [] } }.
- #parse_sub_data(byte_arr, hash_len) ⇒ Object
Instance Method Details
#parse_add_data(byte_arr, hash_len) ⇒ Object
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/google/safe_browsing_parser.rb', line 89 def parse_add_data byte_arr, hash_len ret = {} total_chars = 0 pointer = 0 while pointer < byte_arr.size host_key = parse_host_key byte_arr[pointer...pointer+HOST_KEY_SIZE] total_chars += HOST_KEY_SIZE pointer += HOST_KEY_SIZE ret[host_key] ||= [] count = parse_count_number byte_arr[pointer] pointer += 1 total_chars += 1 if count > 0 sub_count = 0 while sub_count < count ret[host_key] << parse_hash_prefix(byte_arr[pointer...pointer+hash_len], hash_len) total_chars += hash_len pointer += hash_len sub_count += 1 end end end ret end |
#parse_count_number(char) ⇒ Object
154 155 156 |
# File 'lib/google/safe_browsing_parser.rb', line 154 def parse_count_number char char.unpack('C').first end |
#parse_full_hash_data(byte_arr, full_hash_arr) ⇒ Object
207 208 209 210 211 |
# File 'lib/google/safe_browsing_parser.rb', line 207 def parse_full_hash_data byte_arr, full_hash_arr byte_arr.each_slice(FULL_HASH_SIZE) do |slice| full_hash_arr << slice.join('').unpack("H#{FULL_HASH_SIZE*2}").first end end |
#parse_full_hash_entries(str) ⇒ Object
Returns { ‘goog-malware-shavar’ (list_name)
=> {
:add_chunk_num1 => [full_hash0, full_hash1, ...]
:add_chunk_num2 => [full_hash0]
}
}
BODY = ([MAC LF] HASHENTRY+) | (REKEY LF) EOF HASHENTRY = LISTNAME “:” ADDCHUNK “:” HASHDATALEN LF HASHDATA ADDCHUNK = DIGIT+ # Add chunk number HASHDATALEN = DIGIT+ # Length of HASHDATA HASHDATA = <HASHDATALEN number of unsigned bytes> # Full length hashes in binary MAC = (LOALPHA | DIGIT)+
Ignore rekey response for now
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
# File 'lib/google/safe_browsing_parser.rb', line 180 def parse_full_hash_entries str full_list = {} scanner = StringScanner.new(str) count = 0; scanner.pos = 0 while !(head = scanner.scan_until(FULL_HASH_HEAD)).nil? m = FULL_HASH_HEAD.match head return full_list if m[:rekey] count += 1 list_name, chunk_num, chunk_len = m[:list].to_s.to_sym, m[:chunk_num].to_i, m[:chunk_len].to_i pointer = 0; chunk_data = [] my_list = (full_list[list_name] ||= {}) my_list[chunk_num] ||= [] while pointer < chunk_len chunk_data << scanner.get_byte pointer += 1 end parse_full_hash_data chunk_data, my_list[chunk_num] end full_list end |
#parse_hash_prefix(char_arr, hash_len) ⇒ Object
158 159 160 |
# File 'lib/google/safe_browsing_parser.rb', line 158 def parse_hash_prefix char_arr, hash_len char_arr.join('').unpack("H#{hash_len*2}").first end |
#parse_host_key(char_arr) ⇒ Object
150 151 152 |
# File 'lib/google/safe_browsing_parser.rb', line 150 def parse_host_key char_arr char_arr.join('').unpack('H8').first end |
#parse_shavar_list(str, test_mode = false) ⇒ Object
The shavar list data has the following structure For ADD { :chunk_num => 343243, :hash_len => 4, :chunk_len => 4343,
:chunk_data => {
:host_key_one => [prefix0, prefix1, ...],
:host_key_two => []
}
}
For SUB { :chunk_num => 343243, :hash_len => 4, :chunk_len => 4343,
:chunk_data => {
:host_key_one => {
:add_chunknum_one => [prefix0, prefix1, ...],
:add_chunknum_two => []
}
:host_key_two => {
:add_chunknum_three => [prefix0, prefix1, ...],
:add_chunknum_four => []
}
}
}
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/google/safe_browsing_parser.rb', line 42 def parse_shavar_list str, test_mode = false adds = []; subs = [] scanner = StringScanner.new(str) count = 0; scanner.pos = 0 while !(head = scanner.scan_until(ADD_SUB_HEAD)).nil? if test_mode && count > 0 break end count += 1 m = ADD_SUB_HEAD.match head chunk_num, hash_len, chunk_len = m[:chunk_num].to_i, m[:hash_len].to_i, m[:chunk_len].to_i pointer = 0; chunk_data = [] while pointer < chunk_len chunk_data << scanner.get_byte pointer += 1 end if m[:add_sub] == 'a' data_arr = parse_add_data(chunk_data, hash_len) elsif m[:add_sub] == 's' data_arr = parse_sub_data(chunk_data, hash_len) end obj = { chunk_num: chunk_num, hash_len: hash_len, chunk_len: chunk_len, chunk_data: data_arr } if m[:add_sub] == ADD adds << obj elsif m[:add_sub] == SUB subs << obj end end Rails.logger.info "Total # of ADD/SUB section is #{count}, #{adds.size} adds, #{subs.size} subs" [adds, subs] end |
#parse_sub_data(byte_arr, hash_len) ⇒ Object
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/google/safe_browsing_parser.rb', line 116 def parse_sub_data byte_arr, hash_len ret = {} total_chars = 0 pointer = 0 while pointer < byte_arr.size host_key = parse_host_key byte_arr[pointer...pointer+HOST_KEY_SIZE] total_chars += HOST_KEY_SIZE pointer += HOST_KEY_SIZE count = parse_count_number byte_arr[pointer] total_chars += 1 pointer += 1 ret[host_key] ||= {} if count > 0 sub_count = 0 while sub_count < count add_chunknum = byte_arr[pointer...pointer+CHUNKNUM_SIZE].join('').unpack('L>').first pointer += CHUNKNUM_SIZE; total_chars += CHUNKNUM_SIZE ret[host_key][add_chunknum] ||= [] ret[host_key][add_chunknum] << parse_hash_prefix(byte_arr[pointer...pointer+hash_len], hash_len) pointer += hash_len; total_chars += hash_len sub_count += 1 end else add_chunknum = byte_arr[pointer...pointer+CHUNKNUM_SIZE].join('').unpack('L>').first ret[host_key][add_chunknum] = [] pointer += CHUNKNUM_SIZE; total_chars += CHUNKNUM_SIZE end end ret end |