Class: HomographDetector

Inherits:
Object
  • Object
show all
Defined in:
lib/homograph_detector.rb

Constant Summary collapse

SCRIPT_BOPOMOFO =

Unicode Script names returned by the ‘unicode-scripts’ gem

'Bopomofo'
SCRIPT_COMMON =
'Common'
SCRIPT_CYRILLIC =
'Cyrillic'
SCRIPT_GREEK =
'Greek'
SCRIPT_HAN =
'Han'
SCRIPT_HANGUL =
'Hangul'
SCRIPT_HIRAGANA =
'Hiragana'
SCRIPT_INHERITED =
'Inherited'
SCRIPT_KATAKANA =
'Katakana'
SCRIPT_LATIN =
'Latin'
SPECIAL_SCRIPTS =

Groups of Unicode Scripts

Set[SCRIPT_COMMON, SCRIPT_INHERITED].freeze
JAPANESE_SCRIPTS =
Set[SCRIPT_HAN, SCRIPT_HIRAGANA, SCRIPT_KATAKANA].freeze
CHINESE_SCRIPTS =
Set[SCRIPT_BOPOMOFO, SCRIPT_HAN].freeze
KOREAN_SCRIPTS =
Set[SCRIPT_HAN, SCRIPT_HANGUL].freeze
APPROVED_SCRIPT_COMBINATIONS =

Certain combinations of Unicode Scripts are okay

[
  Set[*JAPANESE_SCRIPTS, SCRIPT_LATIN].freeze,
  Set[*CHINESE_SCRIPTS, SCRIPT_LATIN].freeze,
  Set[*KOREAN_SCRIPTS, SCRIPT_LATIN].freeze
].freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(address) ⇒ HomographDetector

Returns a new instance of HomographDetector.



36
37
38
# File 'lib/homograph_detector.rb', line 36

def initialize(address)
  @address = address
end

Instance Attribute Details

#addressObject (readonly)

Returns the value of attribute address.



34
35
36
# File 'lib/homograph_detector.rb', line 34

def address
  @address
end

Class Method Details

.homograph_attack?(address) ⇒ Boolean

Returns:

  • (Boolean)


40
41
42
# File 'lib/homograph_detector.rb', line 40

def self.homograph_attack?(address)
  new(address).homograph_attack?
end

Instance Method Details

#homograph_attack?Boolean

Returns:

  • (Boolean)


44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/homograph_detector.rb', line 44

def homograph_attack?
  # If we can't determine the Unicode Scripts for the domain, return false
  return false if domain_scripts.nil?

  # If the combination of Unicode Scripts used in the domain are ones we have
  # whitelisted, return false
  return false if domain_has_approved_combination_of_scripts?

  # If the combination of Unicode Scripts in the domain are problematic,
  # return true
  return true if domain_has_sketchy_combination_of_scripts?

  # If the domain is entirely composed of Cyrillic characters and each
  # character can be confusable with a Latin character, return true
  return true if domain_has_confusable_cyrillic_chars?

  false
end