Class: TextSentinel

Inherits:
Object
  • Object
show all
Defined in:
lib/text_sentinel.rb

Overview

Given a string, tell us whether or not is acceptable.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text, opts = nil) ⇒ TextSentinel

Returns a new instance of TextSentinel.



15
16
17
18
# File 'lib/text_sentinel.rb', line 15

def initialize(text, opts = nil)
  @opts = opts || {}
  @text = text.to_s.encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
end

Instance Attribute Details

#textObject

Returns the value of attribute text.



11
12
13
# File 'lib/text_sentinel.rb', line 11

def text
  @text
end

Class Method Details

.body_sentinel(text, opts = {}) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/text_sentinel.rb', line 20

def self.body_sentinel(text, opts = {})
  entropy = SiteSetting.body_min_entropy
  if opts[:private_message]
    scale_entropy =
      SiteSetting.min_personal_message_post_length.to_f / SiteSetting.min_post_length.to_f
    entropy = (entropy * scale_entropy).to_i
    entropy =
      (SiteSetting.min_personal_message_post_length.to_f * ENTROPY_SCALE).to_i if entropy >
      SiteSetting.min_personal_message_post_length
  else
    entropy = (SiteSetting.min_post_length.to_f * ENTROPY_SCALE).to_i if entropy >
      SiteSetting.min_post_length
  end
  TextSentinel.new(text, min_entropy: entropy)
end

.title_sentinel(text) ⇒ Object



36
37
38
39
40
41
42
43
44
# File 'lib/text_sentinel.rb', line 36

def self.title_sentinel(text)
  entropy =
    if SiteSetting.min_topic_title_length > SiteSetting.title_min_entropy
      SiteSetting.title_min_entropy
    else
      (SiteSetting.min_topic_title_length.to_f * ENTROPY_SCALE).to_i
    end
  TextSentinel.new(text, min_entropy: entropy, max_word_length: SiteSetting.title_max_word_length)
end

Instance Method Details

#entropyObject

Entropy is a number of how many unique characters the string needs. Non-ASCII characters are weighted heavier since they contain more “information”



48
49
50
51
# File 'lib/text_sentinel.rb', line 48

def entropy
  chars = @text.to_s.strip.split("")
  @entropy ||= chars.pack("M*" * chars.size).gsub("\n", "").split("=").uniq.size
end

#seems_meaningful?Boolean

Returns:

  • (Boolean)


58
59
60
61
# File 'lib/text_sentinel.rb', line 58

def seems_meaningful?
  # Minimum entropy if entropy check required
  @opts[:min_entropy].blank? || (entropy >= @opts[:min_entropy])
end

#seems_pronounceable?Boolean

Returns:

  • (Boolean)


63
64
65
66
67
# File 'lib/text_sentinel.rb', line 63

def seems_pronounceable?
  # At least some non-symbol characters
  # (We don't have a comprehensive list of symbols, but this will eliminate some noise)
  @text.gsub(symbols_regex, "").size > 0
end

#seems_quiet?Boolean

Returns:

  • (Boolean)


77
78
79
80
81
82
# File 'lib/text_sentinel.rb', line 77

def seems_quiet?
  return true if skipped_locale.include?(SiteSetting.default_locale)
  # We don't allow all upper case content
  SiteSetting.allow_uppercase_posts || @text == @text.mb_chars.downcase.to_s ||
    @text != @text.mb_chars.upcase.to_s
end

#seems_unpretentious?Boolean

Returns:

  • (Boolean)


69
70
71
72
73
74
75
# File 'lib/text_sentinel.rb', line 69

def seems_unpretentious?
  return true if skipped_locale.include?(SiteSetting.default_locale)
  # Don't allow super long words if there is a word length maximum

  @opts[:max_word_length].blank? ||
    (@text.split(%r{\s|/|-|\.|:}).map(&:size).max || 0) <= @opts[:max_word_length]
end

#valid?Boolean

Returns:

  • (Boolean)


53
54
55
56
# File 'lib/text_sentinel.rb', line 53

def valid?
  @text.present? && seems_meaningful? && seems_pronounceable? && seems_unpretentious? &&
    seems_quiet?
end