Class: PDF::Extractor::Reader

Inherits:
Object
  • Object
show all
Includes:
REXML::StreamListener
Defined in:
lib/pdf/extractor.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeReader

Returns a new instance of Reader.



70
71
72
# File 'lib/pdf/extractor.rb', line 70

def initialize
	@pages, @fonts = [], []
end

Instance Attribute Details

#fontsObject (readonly)

Returns the value of attribute fonts.



68
69
70
# File 'lib/pdf/extractor.rb', line 68

def fonts
  @fonts
end

#pagesObject (readonly)

Returns the value of attribute pages.



68
69
70
# File 'lib/pdf/extractor.rb', line 68

def pages
  @pages
end

Instance Method Details

#tag_start(name, attributes) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/pdf/extractor.rb', line 74

def tag_start(name, attributes)
	@in_text = false
	case name
	when 'page'
		@pages << PDF::Extractor::Page.new(
			:width => attributes['width'].to_f,
			:height => attributes['height'].to_f
		)
	when 'fontspec'
		@fonts << PDF::Extractor::Font.new(
			:id => attributes['id'],
			:size => attributes['size'].to_f + 2, # is this right?
			:name => attributes['family']
		)
	when 'text'
		@in_text = true
		@pages.last.elements << PDF::Extractor::Element.new(
			:top => attributes['top'].to_f,
			:left => attributes['left'].to_f,
			:width => attributes['width'].to_f,
			:height => attributes['height'].to_f,
			:font => @fonts.find{|n| n.id == attributes['font']}
		)
	when 'b'
		@in_text = true
		@pages.last.elements.last.font.style = :bold
	when 'i'
		@in_text = true
		@pages.last.elements.last.font.style = :italic
	end
end

#text(str) ⇒ Object



106
107
108
# File 'lib/pdf/extractor.rb', line 106

def text(str)
	@pages.last.elements.last.content = str if @in_text and str =~ /\S/
end