Class: LineReader

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/textutils/reader/line_reader.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(arg) ⇒ LineReader

Returns a new instance of LineReader.



70
71
72
73
74
75
76
77
78
79
# File 'lib/textutils/reader/line_reader.rb', line 70

def initialize( arg )
  if arg.is_a?( String )  ## old style (deprecated) - pass in filepath as string
    path = arg
    logger.info "LineReader.new - deprecated API - use LineReader.from_file() instead"
    @text = File.read_utf8( path )
  else   ## assume it's a hash
    opts = arg
    @text = opts[:text]
  end
end

Class Method Details

.from_file(path) ⇒ Object



58
59
60
61
62
63
# File 'lib/textutils/reader/line_reader.rb', line 58

def self.from_file( path )
  ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
  ## - see textutils/utils.rb
  text = File.read_utf8( path )
  self.from_string( text )
end

.from_string(text) ⇒ Object



65
66
67
# File 'lib/textutils/reader/line_reader.rb', line 65

def self.from_string( text )
  LineReader.new( text: text )
end

.from_zip(zip_file, entry_path) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/textutils/reader/line_reader.rb', line 31

def self.from_zip( zip_file, entry_path )
  entry = zip_file.find_entry( entry_path )

  ## todo/fix: add force encoding to utf-8 ??
  ##  check!!!
  ##  clean/prepprocess lines
  ##  e.g. CR/LF (/r/n) to LF (e.g. /n)
  text = entry.get_input_stream().read()

  ## NOTE: needs logger ref; only available in instance methods; use global logger for now
  logger = LogUtils::Logger.root
  logger.debug "text.encoding.name (before): #{text.encoding.name}"
#####
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
## NB:
# for now "hardcoded" to utf8 - what else can we do?
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
  text = text.force_encoding( Encoding::UTF_8 )
  logger.debug "text.encoding.name (after): #{text.encoding.name}"     

  ## todo:
  # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
  ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )

  self.from_string( text )
end

Instance Method Details

#each_lineObject



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/textutils/reader/line_reader.rb', line 81

def each_line
  @text.each_line do |line|

    # comments allow:
    # 1) #####  (shell/ruby style)
    # 2) --  comment here (haskel/?? style)
    # 3) % comment here (tex/latex style)

    if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
      # skip komments and do NOT copy to result (keep comments secret!)
      logger.debug 'skipping comment line'
      next
    end
      
    if line =~ /^\s*$/ 
      # kommentar oder leerzeile überspringen 
      logger.debug 'skipping blank line'
      next
    end

    # pass 1) remove possible trailing eol comment
    ##  e.g    -> nyc, New York   # Sample EOL Comment Here (with or without commas,,,,)
    ## becomes -> nyc, New York

    line = line.sub( /\s+#.+$/, '' )

    # pass 2) remove leading and trailing whitespace
    
    line = line.strip
 
    yield( line )
  end # each lines
end