Class: TeRex::StopWord

Inherits:
Object
  • Object
show all
Defined in:
lib/te_rex/stop_word.rb

Constant Summary collapse

ENTERPRISE =
[
  "-", #bayes_data should handle this but coming through: look at stemmer.
  "amadeus",
  "ean",
  "error",
  "expedia",
  "java",
  "json",
  "orbitz",
  "priceline",
  "sabre",
  "travelocity",
  "xml",
  "xmlst",
  "xmlws"
]
DATES_TIMES =
[
  "january",
  "february",
  "march",
  "april",
  "may",
  "june",
  "july",
  "august",
  "september",
  "october",
  "november",
  "december",
  "jan",
  "feb",
  "mar",
  "apr",
  "aug",
  "sept",
  "nov",
  "dec",
  "monday",
  "mon",
  "tuesday",
  "tue",
  "wednesday",
  "wed",
  "thursday",
  "thur",
  "friday",
  "fri",
  "saturday",
  "sat",
  "sunday",
  "sun",
  "pm",
  "am",
  "0",
  "1",
  "2",
  "3",
  "4",
  "5",
  "6",
  "7",
  "8",
  "9"
]
CONNECTOR =
[
  "a",
  "all",
  "am",
  "an",
  "and",
  "are",
  "as",
  "at",
  "be",
  "been",
  "by",
  "can",
  "do",
  "does",
  "doesn't",
  "for",
  "get",
  "has",
  "hotel",
  "in",
  "into",
  "is",
  "it",
  "it's",
  "its",
  "of",
  "on",
  "or",
  "so",
  "sorry",
  "than",
  "that",
  "that's",
  "this",
  "the",
  "there",
  "their",
  "to",
  "us",
  "was",
  "we",
  "we're",
  "were",
  "what",
  "what's",
  "where",
  "when",
  "which",
  "with",
  "you",
  "you've"
]
LIST =
ENTERPRISE + DATES_TIMES + CONNECTOR