Class: Bioinform::StringParser

Inherits:
Parser show all
Includes:
MultipleMotifsParser
Defined in:
lib/bioinform/parsers/string_parser.rb

Direct Known Subclasses

JasparParser, StringFantomParser

Instance Attribute Summary collapse

Attributes inherited from Parser

#input

Instance Method Summary collapse

Methods inherited from Parser

array_from_acgt_hash, choose, need_tranpose?, normalize_hash_keys, parse, #parse, parse!, transform_input, try_convert_to_array, valid_matrix?

Methods included from Parser::SingleMotifParser

#each, included

Constructor Details

#initialize(input) ⇒ StringParser

Returns a new instance of StringParser.

Raises:

  • (ArgumentError)


10
11
12
13
14
# File 'lib/bioinform/parsers/string_parser.rb', line 10

def initialize(input)
  raise ArgumentError, 'StringParser should be initialized with a String'  unless input.is_a?(String)
  super
  @scanner = StringScanner.new(input.multiline_squish)
end

Instance Attribute Details

#row_acgt_markersObject (readonly)

Returns the value of attribute row_acgt_markers.



8
9
10
# File 'lib/bioinform/parsers/string_parser.rb', line 8

def row_acgt_markers
  @row_acgt_markers
end

#scannerObject (readonly)

Returns the value of attribute scanner.



8
9
10
# File 'lib/bioinform/parsers/string_parser.rb', line 8

def scanner
  @scanner
end

Instance Method Details

#header_patObject



20
21
22
# File 'lib/bioinform/parsers/string_parser.rb', line 20

def header_pat
  />?\s*(?<name>\S+)\n/
end

#number_patObject



16
17
18
# File 'lib/bioinform/parsers/string_parser.rb', line 16

def number_pat
  /[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?/
end

#parse!Object



59
60
61
62
63
64
65
66
# File 'lib/bioinform/parsers/string_parser.rb', line 59

def parse!
  scan_any_spaces
  name = parse_name
  parse_acgt_header
  matrix = parse_matrix
  matrix = matrix.transpose if row_acgt_markers
  Parser.parse!(matrix).tap{|result| result.name = name}
end

#parse_acgt_headerObject



55
56
57
# File 'lib/bioinform/parsers/string_parser.rb', line 55

def parse_acgt_header
  scanner.scan(/A\s*C\s*G\s*T\s*\n/i)
end

#parse_matrixObject



46
47
48
49
50
51
52
53
# File 'lib/bioinform/parsers/string_parser.rb', line 46

def parse_matrix
  matrix = []
  @row_acgt_markers = true  if scanner.check(/A.*\nC.*\nG.*\nT.*\n?/)
  while row_string = scan_row
    matrix << split_row(row_string)
  end
  matrix
end

#parse_nameObject



41
42
43
44
# File 'lib/bioinform/parsers/string_parser.rb', line 41

def parse_name
  match = scanner.advanced_scan(header_pat)
  match && match[:name]
end

#row_patObject



24
25
26
# File 'lib/bioinform/parsers/string_parser.rb', line 24

def row_pat
  /([ACGT]\s*[:|]?\s*)?(?<row>(#{number_pat} )*#{number_pat})\n?/
end

#scan_any_spacesObject



37
38
39
# File 'lib/bioinform/parsers/string_parser.rb', line 37

def scan_any_spaces
  scanner.scan(/\s+/)
end

#scan_rowObject



28
29
30
31
# File 'lib/bioinform/parsers/string_parser.rb', line 28

def scan_row
  match = scanner.advanced_scan(row_pat)
  match && match[:row]
end

#scanner_resetObject



68
69
70
# File 'lib/bioinform/parsers/string_parser.rb', line 68

def scanner_reset
  scanner.reset
end

#split_row(row_string) ⇒ Object



33
34
35
# File 'lib/bioinform/parsers/string_parser.rb', line 33

def split_row(row_string)
  row_string.split.map(&:to_f)
end