Class: EmployMe::Parser::Location::Strategies::PatternMatch
- Inherits:
-
Object
- Object
- EmployMe::Parser::Location::Strategies::PatternMatch
- Defined in:
- lib/employ_me/parser/location/strategies/pattern_match.rb
Class Method Summary collapse
-
.perform(root_node) ⇒ Object
Return [state code, city name, state name].
Class Method Details
.perform(root_node) ⇒ Object
Return [state code, city name, state name]
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/employ_me/parser/location/strategies/pattern_match.rb', line 7 def self.perform(root_node) tree = [root_node] # Depth First Search while tree.size > 0 curr_node = tree.shift if curr_node.children.all? { |child| child.name == "comment" || child.name == 'text' } curr_node_text = curr_node.text # Remote Jobs regex = Regexp.new('Remote', Regexp::IGNORECASE) return ['REMOTE', nil, nil] if regex.match(curr_node_text) regex = Regexp.new('US & Canada Remote', Regexp::IGNORECASE) return ['REMOTE', nil, nil] if regex.match(curr_node_text) # San Francisco, CA Jobs regex = Regexp.new('San Francisco, CA', Regexp::IGNORECASE) return ['CA', 'San Francisco', 'CA'] if regex.match(curr_node_text) regex = Regexp.new('San Francisco', Regexp::IGNORECASE) return ['CA', 'San Francisco', 'CA'] if regex.match(curr_node_text) # San Mateo, CA Jobs regex = Regexp.new('San Mateo, CA', Regexp::IGNORECASE) return ['CA', 'San Mateo', 'CA'] if regex.match(curr_node_text) # Boca Raton, FL Jobs regex = Regexp.new('Boca Raton, FL', Regexp::IGNORECASE) return ['FL', 'Boca Raton', 'FL'] if regex.match(curr_node_text) regex = Regexp.new('Boca Raton, Florida', Regexp::IGNORECASE) return ['FL', 'Boca Raton', 'FL'] if regex.match(curr_node_text) regex = Regexp.new('Boca Raton, Florida, United States', Regexp::IGNORECASE) return ['FL', 'Boca Raton', 'FL'] if regex.match(curr_node_text) # Raleigh, NC Jobs regex = Regexp.new('Raleigh, NC', Regexp::IGNORECASE) return ['NC', 'Raleigh', 'NC'] if regex.match(curr_node_text) # New York, NY Jobs regex = Regexp.new('New York, NY', Regexp::IGNORECASE) return ['NY', 'New York', 'NY'] if regex.match(curr_node_text) regex = Regexp.new('New York, New York', Regexp::IGNORECASE) return ['NY', 'New York', 'NY'] if regex.match(curr_node_text) regex = Regexp.new('New York, New York, United States', Regexp::IGNORECASE) return ['NY', 'New York', 'NY'] if regex.match(curr_node_text) regex = Regexp.new('New York Office', Regexp::IGNORECASE) return ['NY', 'New York', 'NY'] if regex.match(curr_node_text) # Seattle, WA Jobs regex = Regexp.new('Seattle, WA', Regexp::IGNORECASE) return ['WA', 'Seattle', 'WA'] if regex.match(curr_node_text) regex = Regexp.new('Seattle, Washington', Regexp::IGNORECASE) return ['WA', 'Seattle', 'WA'] if regex.match(curr_node_text) regex = Regexp.new('Seattle, Washington, United States', Regexp::IGNORECASE) return ['WA', 'Seattle', 'WA'] if regex.match(curr_node_text) end tree.concat(curr_node.children) end nil end |