Class: EmployMe::Parser::Salary::Strategies::PatternMatch
- Inherits:
-
Object
- Object
- EmployMe::Parser::Salary::Strategies::PatternMatch
- Defined in:
- lib/employ_me/parser/salary/strategies/pattern_match.rb
Class Method Summary collapse
-
.perform(root_node) ⇒ Object
Return [salary floor, salary ceiling].
Class Method Details
.perform(root_node) ⇒ Object
Return [salary floor, salary ceiling]
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# File 'lib/employ_me/parser/salary/strategies/pattern_match.rb', line 7 def self.perform(root_node) tree = [root_node] # Depth First Search while tree.size > 0 curr_node = tree.shift if curr_node.children.all? { |child| child.name == "comment" || child.name == 'text' } curr_node_text = curr_node.text # Salary Format: $100K - $200K regex = Regexp.new('\\$([0-9]+)K \S \\$([0-9]+)K', Regexp::IGNORECASE) result = regex.match(curr_node_text) if result low = result[1].to_i * 1000 high = result[2].to_i * 1000 return [low, high] end # Salary Format: $100,000 - $200,000 regex = Regexp.new('\\$([0-9]+),[0-9]+ - \\$([0-9]+),[0-9]+', Regexp::IGNORECASE) result = regex.match(curr_node_text) if result low = result[1].to_i * 1000 high = result[2].to_i * 1000 return [low, high] end # Salary Format: $100,000.00 - $200,000.00 regex = Regexp.new('\\$([0-9]+),[0-9]+\.00 to \\$([0-9]+),[0-9]+\.00', Regexp::IGNORECASE) result = regex.match(curr_node_text) if result low = result[1].to_i * 1000 high = result[2].to_i * 1000 return [low, high] end # Salary Format: $100,000 to $200,000 regex = Regexp.new('\\$([0-9]+),[0-9]+ to \\$([0-9]+),[0-9]+', Regexp::IGNORECASE) result = regex.match(curr_node_text) if result low = result[1].to_i * 1000 high = result[2].to_i * 1000 return [low, high] end # Salary Format: $100,000 - $200,000 USD regex = Regexp.new('\\$([0-9]+),[0-9]+ - \\$([0-9]+),[0-9]+ USD', Regexp::IGNORECASE) result = regex.match(curr_node_text) if result low = result[1].to_i * 1000 high = result[2].to_i * 1000 return [low, high] end # Salary Format: $100,000 and up to $200,000 regex = Regexp.new('\\$([0-9]+),[0-9]+ and up to \\$([0-9]+),[0-9]+', Regexp::IGNORECASE) result = regex.match(curr_node_text) if result low = result[1].to_i * 1000 high = result[2].to_i * 1000 return [low, high] end end tree.concat(curr_node.children) end nil end |