Class: Sinew::Main

Inherits:
Object
  • Object
show all
Defined in:
lib/sinew/main.rb

Constant Summary collapse

CODER =
HTMLEntities.new

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ Main

Returns a new instance of Main.



14
15
16
17
# File 'lib/sinew/main.rb', line 14

def initialize(options)
  @options = options.dup
  _run if !@options[:test]
end

Instance Attribute Details

#rawObject

Returns the value of attribute raw.



12
13
14
# File 'lib/sinew/main.rb', line 12

def raw
  @raw
end

#uriObject

Returns the value of attribute uri.



12
13
14
# File 'lib/sinew/main.rb', line 12

def uri
  @uri
end

#urlObject

Returns the value of attribute url.



12
13
14
# File 'lib/sinew/main.rb', line 12

def url
  @url
end

Instance Method Details

#cleanObject



54
55
56
# File 'lib/sinew/main.rb', line 54

def clean
  @clean ||= TextUtil.html_clean_from_tidy(self.html)
end

#csv_emit(row, options = {}) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/sinew/main.rb', line 86

def csv_emit(row, options = {})
  csv_header(row.keys.sort) if !@csv

  print = { }
  row = @csv_keys.map do |i|
    s = _normalize(row[i], i)
    print[i] = s if !s.empty?
    s
  end
  $stderr.puts print.ai if @options[:verbose]
  @csv << row
  @csv.flush
end

#csv_header(*args) ⇒ Object

csv



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/sinew/main.rb', line 66

def csv_header(*args)
  args = args.flatten
  if args.first.is_a?(String)
    file = args.shift
    if file !~ /^\//
      file = "#{File.dirname(@options[:file])}/#{file}"
    end
  else
    file = @options[:file]
  end
  ext = File.extname(file)
  file = ext.empty? ? "#{file}.csv" : file.gsub(ext, ".csv")

  @path = file
  @csv = CSV.open(file, "wb")
  @csv_keys = args
  @csv << @csv_keys
  _banner("Writing to #{@path}...")
end

#get(url, params = nil) ⇒ Object



19
20
21
# File 'lib/sinew/main.rb', line 19

def get(url, params = nil)
  _http(url, params, :get)
end

#htmlObject

lazy accessors for cleaned up version



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/sinew/main.rb', line 31

def html
  @html ||= begin
    s = TextUtil.html_tidy(@raw)
    nelements = @raw.count("<")
    if nelements > 1
      # is there a problem with tidy?
      percent = 100 * s.count("<") / nelements
      if percent < 80
        # bad xml processing instruction? Try fixing it.
        maybe = TextUtil.html_tidy(@raw.gsub(/<\?[^>]*?>/, ""))
        new_percent = 100 * maybe.count("<") / nelements
        if new_percent > 80
          # yes!
          s = maybe
        else
          Util.warning "Hm - it looks like tidy ate some of your file (#{percent}%)" if percent < 90
        end
      end
    end
    s
  end
end

#nokoObject



58
59
60
# File 'lib/sinew/main.rb', line 58

def noko
  @noko ||= Nokogiri::HTML(html)
end

#post(url, params = nil) ⇒ Object



23
24
25
# File 'lib/sinew/main.rb', line 23

def post(url, params = nil)
  _http(url, params, :post)
end