Class: Polytrix::Documentation::CodeSegmenter

Inherits:
Object
  • Object
show all
Includes:
CommentStyles
Defined in:
lib/polytrix/documentation/code_segmenter.rb

Overview

This class was extracted from the [Rocco](rtomayko.github.com/rocco/) project which was in turn based on the [Docco](jashkenas.github.com/docco/).

Constant Summary collapse

DEFAULT_OPTIONS =
{
  language: 'rb',
  comment_chars: '#'
}

Constants included from CommentStyles

Polytrix::Documentation::CommentStyles::COMMENT_STYLES, Polytrix::Documentation::CommentStyles::C_STYLE_COMMENTS

Instance Method Summary collapse

Methods included from CommentStyles

infer

Constructor Details

#initialize(options = {}) ⇒ CodeSegmenter

Returns a new instance of CodeSegmenter.



16
17
18
19
# File 'lib/polytrix/documentation/code_segmenter.rb', line 16

def initialize(options = {})
  @options = DEFAULT_OPTIONS.merge options
  @options[:comment_chars] = generate_comment_chars if @options[:comment_chars].is_a? String
end

Instance Method Details

#comment(lines) ⇒ Object



150
151
152
153
154
# File 'lib/polytrix/documentation/code_segmenter.rb', line 150

def comment(lines)
  lines.map do | line |
    "#{@options[:comment_chars][:single]} #{line}"
  end.join "\n"
end

#normalize_leading_spaces(sections) ⇒ Object

Normalizes documentation whitespace by checking for leading whitespace, removing it, and then removing the same amount of whitespace from each succeeding line. That is:

def func():
  """
    Comment 1
    Comment 2
  """
  print "omg!"

should yield a comment block of ‘Comment 1nComment 2` and code of `def func():n print “omg!”`



137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/polytrix/documentation/code_segmenter.rb', line 137

def normalize_leading_spaces(sections)
  sections.map do |section|
    if section.any? && section[0].any?
      leading_space = section[0][0].match("^\s+")
      if leading_space
        section[0] =
          section[0].map { |line| line.sub(/^#{leading_space.to_s}/, '') }
      end
    end
    section
  end
end

#segment(source_code) ⇒ Object

Parse the raw file source_code into a list of two-tuples. Each tuple has the form ‘[docs, code]` where both elements are arrays containing the raw lines parsed from the input file, comment characters stripped.



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/polytrix/documentation/code_segmenter.rb', line 26

def segment(source_code) # rubocop:disable all
  sections, docs, code = [], [], []
  lines = source_code.split("\n")

  # The first line is ignored if it is a shebang line.  We also ignore the
  # PEP 263 encoding information in python sourcefiles, and the similar ruby
  # 1.9 syntax.
  lines.shift if lines[0] =~ /^\#\!/
  lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ &&
                 %w(python rb).include?(@options[:language])

  # To detect both block comments and single-line comments, we'll set
  # up a tiny state machine, and loop through each line of the file.
  # This requires an `in_comment_block` boolean, and a few regular
  # expressions for line tests.  We'll do the same for fake heredoc parsing.
  in_comment_block = false
  in_heredoc = false
  single_line_comment, block_comment_start, block_comment_mid, block_comment_end =
    nil, nil, nil, nil
  unless @options[:comment_chars][:single].nil?
    single_line_comment = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:single])}\\s?")
  end
  unless @options[:comment_chars][:multi].nil?
    block_comment_start = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*$")
    block_comment_end   = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$")
    block_comment_one_liner = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*(.*?)\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$") # rubocop:disable Metrics/LineLength
    block_comment_start_with = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*(.*?)$")
    block_comment_end_with = Regexp.new("\\s*(.*?)\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$")
    if @options[:comment_chars][:multi][:middle]
      block_comment_mid = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:middle])}\\s?")
    end
  end
  unless @options[:comment_chars][:heredoc].nil?
    heredoc_start = Regexp.new("#{Regexp.escape(@options[:comment_chars][:heredoc])}(\\S+)$")
  end
  lines.each do |line|
    # If we're currently in a comment block, check whether the line matches
    # the _end_ of a comment block or the _end_ of a comment block with a
    # comment.
    if in_comment_block
      if block_comment_end && line.match(block_comment_end)
        in_comment_block = false
      elsif block_comment_end_with && line.match(block_comment_end_with)
        in_comment_block = false
        docs << line.match(block_comment_end_with).captures.first
                      .sub(block_comment_mid || '', '')
      else
        docs << line.sub(block_comment_mid || '', '')
      end
    # If we're currently in a heredoc, we're looking for the end of the
    # heredoc, and everything it contains is code.
    elsif in_heredoc
      if line.match(Regexp.new("^#{Regexp.escape(in_heredoc)}$"))
        in_heredoc = false
      end
      code << line
    # Otherwise, check whether the line starts a heredoc. If so, note the end
    # pattern, and the line is code.  Otherwise check whether the line matches
    # the beginning of a block, or a single-line comment all on it's lonesome.
    # In either case, if there's code, start a new section.
    else
      if heredoc_start && line.match(heredoc_start)
        in_heredoc = Regexp.last_match[1]
        code << line
      elsif block_comment_one_liner && line.match(block_comment_one_liner)
        if code.any?
          sections << [docs, code]
          docs, code = [], []
        end
        docs << line.match(block_comment_one_liner).captures.first
      elsif block_comment_start && line.match(block_comment_start)
        in_comment_block = true
        if code.any?
          sections << [docs, code]
          docs, code = [], []
        end
      elsif block_comment_start_with && line.match(block_comment_start_with)
        in_comment_block = true
        if code.any?
          sections << [docs, code]
          docs, code = [], []
        end
        docs << line.match(block_comment_start_with).captures.first
      elsif single_line_comment && line.match(single_line_comment)
        if code.any?
          sections << [docs, code]
          docs, code = [], []
        end
        docs << line.sub(single_line_comment || '', '')
      else
        code << line
      end
    end
  end
  sections << [docs, code] if docs.any? || code.any?
  normalize_leading_spaces(sections)
end