Module: Awestruct::ContextHelper

Defined in:
lib/awestruct/context_helper.rb

Instance Method Summary collapse

Instance Method Details

#clean_html(str) ⇒ Object



10
11
12
# File 'lib/awestruct/context_helper.rb', line 10

def clean_html(str)
  str.gsub( / /, ' ' )
end

#close_tags(s) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/awestruct/context_helper.rb', line 18

def close_tags(s)
  stack = []
  s.scan(/<\/?[^>]+>/).each do |tag|
    if tag[1] != '/'
      tag = tag[1..-1].scan(/\w+/).first
      stack = [ tag ] + stack
    else
      tag = tag[2..-1].scan(/\w+/).first
      if stack[0] == tag
        stack = stack.drop(1)
      else
        raise "Malformed HTML expected #{tag[0]} but got #{tag} '#{s}'"
      end
    end
  end
  stack.inject(s) { |memo,tag| memo += "</#{tag}>" }
end

#fix_url(base_url, url) ⇒ Object



62
63
64
65
# File 'lib/awestruct/context_helper.rb', line 62

def fix_url(base_url, url)
  return url unless ( url =~ /^\// )
  "#{base_url}#{url}"
end

#fully_qualify_urls(base_url, text) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/awestruct/context_helper.rb', line 40

def fully_qualify_urls(base_url, text)
  doc = Hpricot( text )

  doc.search( "//a" ).each do |a|
    a['href'] = fix_url( base_url, a['href'] )
  end
  doc.search( "//link" ).each do |link|
    link['href'] = fix_url( base_url, link['href'] )
  end
  doc.search( "//img" ).each do |img|
    img['src'] = fix_url( base_url, img['src'] )
  end
  # Hpricot::Doc#to_s output encoding is not necessarily the same as the encoding of text
  if RUBY_VERSION.start_with? '1.8'
    doc.to_s
  else
    doc.to_s.tap do |d| 
      d.force_encoding(text.encoding) if d.encoding != text.encoding 
    end
  end
end

#html_to_text(str) ⇒ Object



6
7
8
# File 'lib/awestruct/context_helper.rb', line 6

def html_to_text(str)
  str.gsub( /<[^>]+>/, '' ).gsub( /&nbsp;/, ' ' )
end

#summarize(text, numwords = 20, ellipsis = '...') ⇒ Object



36
37
38
# File 'lib/awestruct/context_helper.rb', line 36

def summarize(text, numwords=20, ellipsis='...')
  close_tags(text.split(/ /)[0, numwords].join(' ') + ellipsis)
end

#without_images(str) ⇒ Object



14
15
16
# File 'lib/awestruct/context_helper.rb', line 14

def without_images(str)
  str.gsub(/<img[^>]+>/,'').gsub(/<a[^>]+>([^<]*)<\/a>/, '\1')
end