Class: Keyphrase
- Inherits:
-
Object
- Object
- Keyphrase
- Defined in:
- lib/keyphrase.rb,
lib/keyphrase/version.rb,
lib/keyphrase/stoplist/afr.rb,
lib/keyphrase/stoplist/aka.rb,
lib/keyphrase/stoplist/amh.rb,
lib/keyphrase/stoplist/ara.rb,
lib/keyphrase/stoplist/aze.rb,
lib/keyphrase/stoplist/bel.rb,
lib/keyphrase/stoplist/ben.rb,
lib/keyphrase/stoplist/bul.rb,
lib/keyphrase/stoplist/cat.rb,
lib/keyphrase/stoplist/ces.rb,
lib/keyphrase/stoplist/cmn.rb,
lib/keyphrase/stoplist/dan.rb,
lib/keyphrase/stoplist/deu.rb,
lib/keyphrase/stoplist/ell.rb,
lib/keyphrase/stoplist/eng.rb,
lib/keyphrase/stoplist/epo.rb,
lib/keyphrase/stoplist/est.rb,
lib/keyphrase/stoplist/fin.rb,
lib/keyphrase/stoplist/fra.rb,
lib/keyphrase/stoplist/guj.rb,
lib/keyphrase/stoplist/heb.rb,
lib/keyphrase/stoplist/hin.rb,
lib/keyphrase/stoplist/hrv.rb,
lib/keyphrase/stoplist/hun.rb,
lib/keyphrase/stoplist/hye.rb,
lib/keyphrase/stoplist/ind.rb,
lib/keyphrase/stoplist/ita.rb,
lib/keyphrase/stoplist/jav.rb,
lib/keyphrase/stoplist/jpn.rb,
lib/keyphrase/stoplist/kan.rb,
lib/keyphrase/stoplist/kat.rb,
lib/keyphrase/stoplist/khm.rb,
lib/keyphrase/stoplist/kor.rb,
lib/keyphrase/stoplist/lat.rb,
lib/keyphrase/stoplist/lav.rb,
lib/keyphrase/stoplist/lit.rb,
lib/keyphrase/stoplist/mal.rb,
lib/keyphrase/stoplist/mar.rb,
lib/keyphrase/stoplist/mkd.rb,
lib/keyphrase/stoplist/mya.rb,
lib/keyphrase/stoplist/nep.rb,
lib/keyphrase/stoplist/nld.rb,
lib/keyphrase/stoplist/nob.rb,
lib/keyphrase/stoplist/ori.rb,
lib/keyphrase/stoplist/pan.rb,
lib/keyphrase/stoplist/pes.rb,
lib/keyphrase/stoplist/pol.rb,
lib/keyphrase/stoplist/por.rb,
lib/keyphrase/stoplist/ron.rb,
lib/keyphrase/stoplist/rus.rb,
lib/keyphrase/stoplist/sin.rb,
lib/keyphrase/stoplist/slk.rb,
lib/keyphrase/stoplist/slv.rb,
lib/keyphrase/stoplist/sna.rb,
lib/keyphrase/stoplist/spa.rb,
lib/keyphrase/stoplist/srp.rb,
lib/keyphrase/stoplist/swe.rb,
lib/keyphrase/stoplist/tam.rb,
lib/keyphrase/stoplist/tel.rb,
lib/keyphrase/stoplist/tgl.rb,
lib/keyphrase/stoplist/tha.rb,
lib/keyphrase/stoplist/tuk.rb,
lib/keyphrase/stoplist/tur.rb,
lib/keyphrase/stoplist/ukr.rb,
lib/keyphrase/stoplist/urd.rb,
lib/keyphrase/stoplist/uzb.rb,
lib/keyphrase/stoplist/vie.rb,
lib/keyphrase/stoplist/yid.rb,
lib/keyphrase/stoplist/zul.rb
Defined Under Namespace
Modules: Stoplist
Constant Summary collapse
- CLEAN_REGEX =
don’t remove ‘ because it might be part of a stop word
/([^\p{L}a-zA-Z0-9\'\- \.]|(?<!\w)\.)/- BLACKLIST_REGEX =
remove words with no letters, ie 123.23.12. And last chance to remove ‘ and -
/(?:^|\s)[^a-zA-Z\p{L}]+\b|\'|\-/- CLEAN_SPACES_REGEX =
/\s+/- SENTENCES_REGEX =
/[+!?,;:&\[\]\{\}\<\>\=\/\n\t\\"\\(\\)\u2019\u2013\|]|-(?!\w)|'(?=s)|(?<!\s)\.(?![a-zA-Z0-9])|(?<!\w)\#(?=\w)/u
- VERSION =
"0.2.0"
Class Method Summary collapse
Instance Method Summary collapse
- #analyse(text, options = {}) ⇒ Object
-
#initialize ⇒ Keyphrase
constructor
A new instance of Keyphrase.
Constructor Details
#initialize ⇒ Keyphrase
Returns a new instance of Keyphrase.
19 20 21 |
# File 'lib/keyphrase.rb', line 19 def initialize @cached_regex = {} end |
Class Method Details
.analyse(text, options = {}) ⇒ Object
14 15 16 17 |
# File 'lib/keyphrase.rb', line 14 def self.analyse text, ={} @@keyphrase ||= Keyphrase.new @@keyphrase.analyse text, end |
Instance Method Details
#analyse(text, options = {}) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/keyphrase.rb', line 23 def analyse text, ={} stopwords = [:stopwords] lang = [:lang] || :eng clean_regex = [:clean] || CLEAN_REGEX position_bonus = [:position_bonus] || true sort = [:sort] || true blacklist = [:blacklist] || BLACKLIST_REGEX sentences_regex = [:sentences_regex] || SENTENCES_REGEX clean_spaces_regex = [:clean_spaces_regex] || CLEAN_SPACES_REGEX pattern = buildStopwordRegExPattern lang, stopwords sentences = text.split sentences_regex phrases = generateCandidateKeywords sentences, pattern, clean_regex, blacklist, clean_spaces_regex wordscores = calculateWordScores phrases candidates = generateCandidateKeywordScores phrases, wordscores, position_bonus if sort candidates = candidates.sort_by{|k,v| -v}.to_h end if [:verbose] candidates.each do |word, score| puts sprintf '%.2f - %s', score, word end end return candidates end |