Class: String

Inherits:
Object show all
Defined in:
lib/arachni/ruby/string.rb

Overview

Overloads the String class.

Author:

Constant Summary collapse

HAS_HTML_TAG_CACHE =
Arachni::Support::Cache::LeastRecentlyPushed.new( 1_000 )
BINARY_CACHE =
Arachni::Support::Cache::LeastRecentlyPushed.new( 1_000 )

Instance Method Summary collapse

Instance Method Details

#binary?Boolean

Returns:

  • (Boolean)


179
180
181
182
183
184
185
# File 'lib/arachni/ruby/string.rb', line 179

def binary?
    # Stolen from YAML.
    BINARY_CACHE.fetch self do
        ( index("\x00") ||
            count("\x00-\x7F", "^ -~\t\r\n").fdiv(length) > 0.3)
    end
end

#diff_ratio(other) ⇒ Float

Calculates the difference ratio (at a word level) between self and other

Parameters:

Returns:

  • (Float)

    0.0 (identical strings) to 1.0 (completely different)



120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/arachni/ruby/string.rb', line 120

def diff_ratio( other )
    return 0.0 if self == other
    return 1.0 if empty? || other.empty?

    s_words = self.words( true )
    o_words = other.words( true )

    common = (s_words & o_words).size.to_f
    union  = (s_words | o_words).size.to_f

    (union - common) / union
end

#escape_double_quoteObject



57
58
59
# File 'lib/arachni/ruby/string.rb', line 57

def escape_double_quote
    gsub( '"', '\"' )
end

#has_html_tag?(tag, attributes = nil) ⇒ Boolean

Parameters:

  • tag (String)

    Tag name to look for, in lower case.

  • attributes (String, Regexp) (defaults to: nil)

    Content to look for in attributes, in lower case.

Returns:

  • (Boolean)


35
36
37
38
39
40
41
42
43
# File 'lib/arachni/ruby/string.rb', line 35

def has_html_tag?( tag, attributes = nil )
    HAS_HTML_TAG_CACHE.fetch [self, tag, attributes] do
        if attributes
            attributes = ".*#{attributes}"
        end

        self =~ /<\s*#{tag}#{attributes}.*?>/mi
    end
end

#longest_wordString

Returns Longest word.

Returns:



153
154
155
# File 'lib/arachni/ruby/string.rb', line 153

def longest_word
    words( true ).sort_by { |w| w.size }.last
end

#persistent_hashInteger

Returns In integer with the property of:

If str1 == str2 then str1.persistent_hash == str2.persistent_hash.

It basically has the same function as Ruby's #hash method, but does not use a random seed per Ruby process -- making it suitable for use in distributed systems.

Returns:

  • (Integer)

    In integer with the property of:

    If str1 == str2 then str1.persistent_hash == str2.persistent_hash.

    It basically has the same function as Ruby's #hash method, but does not use a random seed per Ruby process -- making it suitable for use in distributed systems.



165
166
167
# File 'lib/arachni/ruby/string.rb', line 165

def persistent_hash
    Zlib.crc32 self
end

#rdiff(other) ⇒ String

Gets the reverse diff between self and str on a word level.

str = <<END
This is the first test.
Not really sure what else to put here...
END

str2 = <<END
This is the second test.
Not really sure what else to put here...
Boo-Yah!
END

str.rdiff( str2 )
# => "This is the test.\nNot really sure what else to put here...\n"

Parameters:

Returns:



104
105
106
107
108
109
110
111
112
# File 'lib/arachni/ruby/string.rb', line 104

def rdiff( other )
    return self if self == other

    # get the words of the first text in an array
    s_words = words

    # get what hasn't changed (the rdiff, so to speak) as a string
    (s_words - (s_words - other.words)).join
end

#recodeObject



175
176
177
# File 'lib/arachni/ruby/string.rb', line 175

def recode
    dup.recode!
end

#recode!Object



169
170
171
172
173
# File 'lib/arachni/ruby/string.rb', line 169

def recode!
    force_encoding( 'utf-8' )
    encode!( 'utf-8', invalid: :replace, undef: :replace )
    self
end

#scan_in_groups(regexp) ⇒ Hash

Returns Grouped matches.

Parameters:

  • regexp (Regexp)

    Regular expression with named captures.

Returns:

  • (Hash)

    Grouped matches.

Raises:

  • (ArgumentError)


24
25
26
27
28
29
# File 'lib/arachni/ruby/string.rb', line 24

def scan_in_groups( regexp )
    raise ArgumentError, 'Regexp does not contain any names.' if regexp.names.empty?
    return {} if !(matches = scan( regexp ).first)

    Hash[regexp.names.zip( matches )].reject { |_, v| v.empty? }
end

#shortest_wordString

Returns Shortest word.

Returns:



147
148
149
# File 'lib/arachni/ruby/string.rb', line 147

def shortest_word
    words( true ).sort_by { |w| w.size }.first
end

#sub_in_groups(regexp, substitutions) ⇒ String

Returns Updated copy of self.

Parameters:

  • regexp (Regexp)

    Regular expression with named captures.

  • substitutions (Hash)

    Hash (with capture names as keys) with which to replace the regexp matches.

Returns:

  • (String)

    Updated copy of self.



53
54
55
# File 'lib/arachni/ruby/string.rb', line 53

def sub_in_groups( regexp, substitutions )
    dup.sub_in_groups!( regexp, substitutions )
end

#sub_in_groups!(regexp, updates) ⇒ String

Returns Updated self.

Parameters:

  • regexp (Regexp)

    Regular expression with named captures.

  • updates (Hash)

    Hash (with capture names as keys) with which to replace the regexp matches.

Returns:



69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/arachni/ruby/string.rb', line 69

def sub_in_groups!( regexp, updates )
    return if !(match = regexp.match( self ))

    # updates.reject! { |k| !(match.offset( k ) rescue nil) }

    keys_in_order = updates.keys.sort_by { |k| match.offset( k ) }.reverse
    keys_in_order.each do |k|
        offsets_for_group = match.offset( k )
        self[offsets_for_group.first...offsets_for_group.last] = updates[k]
    end

    self
end

#words(strict = false) ⇒ Array<String>

Returns the words in self.

Parameters:

  • strict (Bool) (defaults to: false)

    Include only words, no boundary characters (like spaces, etc.).

Returns:



139
140
141
142
143
# File 'lib/arachni/ruby/string.rb', line 139

def words( strict = false )
    splits = split( /\b/ )
    splits.reject! { |w| !(w =~ /\w/) } if strict
    splits
end