Class: Linguist::Heuristics

Inherits:
Object
  • Object
show all
Defined in:
lib/linguist/heuristics.rb

Overview

A collection of simple heuristics that can be used to better analyze languages.

Constant Summary collapse

ACTIVE =
true

Class Method Summary collapse

Class Method Details

.active?Boolean

Returns:

  • (Boolean)


154
155
156
# File 'lib/linguist/heuristics.rb', line 154

def self.active?
  !!ACTIVE
end

.disambiguate_asc(data) ⇒ Object



138
139
140
141
142
# File 'lib/linguist/heuristics.rb', line 138

def self.disambiguate_asc(data)
  matches = []
  matches << Language["AsciiDoc"] if /^=+(\s|\n)/.match(data)
  matches
end

.disambiguate_c(data) ⇒ Object

.h extensions are ambiguous between C, C++, and Objective-C. We want to shortcut look for Objective-C and now C++ too!

Returns an array of Languages or []



50
51
52
53
54
55
56
57
58
# File 'lib/linguist/heuristics.rb', line 50

def self.disambiguate_c(data)
  matches = []
  if data.include?("@interface")
    matches << Language["Objective-C"]
  elsif data.include?("#include <cstdint>")
    matches << Language["C++"]
  end
  matches
end

.disambiguate_cl(data) ⇒ Object



100
101
102
103
104
105
106
107
108
# File 'lib/linguist/heuristics.rb', line 100

def self.disambiguate_cl(data)
  matches = []
  if data.include?("(defun ")
    matches << Language["Common Lisp"]
  elsif /\/\* |\/\/ |^\}/.match(data)
    matches << Language["OpenCL"]
  end
  matches
end

.disambiguate_ecl(data) ⇒ Object



70
71
72
73
74
75
76
77
78
# File 'lib/linguist/heuristics.rb', line 70

def self.disambiguate_ecl(data)
  matches = []
  if data.include?(":-")
    matches << Language["Prolog"]
  elsif data.include?(":=")
    matches << Language["ECL"]
  end
  matches
end

.disambiguate_f(data) ⇒ Object



144
145
146
147
148
149
150
151
152
# File 'lib/linguist/heuristics.rb', line 144

def self.disambiguate_f(data)
  matches = []
  if /^: /.match(data)
    matches << Language["Forth"]
  elsif /^([c*][^a-z]|      subroutine\s)/i.match(data)
    matches << Language["FORTRAN"]
  end
  matches
end

.disambiguate_hack(data) ⇒ Object



117
118
119
120
121
122
123
124
125
# File 'lib/linguist/heuristics.rb', line 117

def self.disambiguate_hack(data)
  matches = []
  if data.include?("<?hh")
    matches << Language["Hack"]
  elsif /<?[^h]/.match(data)
    matches << Language["PHP"]
  end
  matches
end

.disambiguate_pl(data) ⇒ Object



60
61
62
63
64
65
66
67
68
# File 'lib/linguist/heuristics.rb', line 60

def self.disambiguate_pl(data)
  matches = []
  if data.include?("use strict")
    matches << Language["Perl"]
  elsif data.include?(":-")
    matches << Language["Prolog"]
  end
  matches
end

.disambiguate_pro(data) ⇒ Object



80
81
82
83
84
85
86
87
88
# File 'lib/linguist/heuristics.rb', line 80

def self.disambiguate_pro(data)
  matches = []
  if (data.include?(":-"))
    matches << Language["Prolog"]
  else
    matches << Language["IDL"]
  end
  matches
end

.disambiguate_r(data) ⇒ Object



110
111
112
113
114
115
# File 'lib/linguist/heuristics.rb', line 110

def self.disambiguate_r(data)
  matches = []
  matches << Language["Rebol"] if /\bRebol\b/i.match(data)
  matches << Language["R"] if data.include?("<-")
  matches
end

.disambiguate_sc(data) ⇒ Object



127
128
129
130
131
132
133
134
135
136
# File 'lib/linguist/heuristics.rb', line 127

def self.disambiguate_sc(data)
  matches = []
  if (/\^(this|super)\./.match(data) || /^\s*(\+|\*)\s*\w+\s*{/.match(data) || /^\s*~\w+\s*=\./.match(data))
    matches << Language["SuperCollider"]
  end
  if (/^\s*import (scala|java)\./.match(data) || /^\s*val\s+\w+\s*=/.match(data) || /^\s*class\b/.match(data))
    matches << Language["Scala"]
  end
  matches
end

.disambiguate_ts(data) ⇒ Object



90
91
92
93
94
95
96
97
98
# File 'lib/linguist/heuristics.rb', line 90

def self.disambiguate_ts(data)
  matches = []
  if (data.include?("</translation>"))
    matches << Language["XML"]
  else
    matches << Language["TypeScript"]
  end
  matches
end

.find_by_heuristics(data, languages) ⇒ Object

Public: Given an array of String language names, apply heuristics against the given data and return an array of matching languages, or nil.

data - Array of tokens or String data to analyze. languages - Array of language name Strings to restrict to.

Returns an array of Languages or []



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/linguist/heuristics.rb', line 14

def self.find_by_heuristics(data, languages)
  if active?
    result = []

    if languages.all? { |l| ["Perl", "Prolog"].include?(l) }
      result = disambiguate_pl(data)
    end
    if languages.all? { |l| ["ECL", "Prolog"].include?(l) }
      result = disambiguate_ecl(data)
    end
    if languages.all? { |l| ["IDL", "Prolog"].include?(l) }
      result = disambiguate_pro(data)
    end
    if languages.all? { |l| ["Common Lisp", "OpenCL"].include?(l) }
      result = disambiguate_cl(data)
    end
    if languages.all? { |l| ["Hack", "PHP"].include?(l) }
      result = disambiguate_hack(data)
    end
    if languages.all? { |l| ["Scala", "SuperCollider"].include?(l) }
      result = disambiguate_sc(data)
    end
    if languages.all? { |l| ["AsciiDoc", "AGS Script"].include?(l) }
      result = disambiguate_asc(data)
    end
    if languages.all? { |l| ["FORTRAN", "Forth"].include?(l) }
      result = disambiguate_f(data)
    end
    return result
  end
end