Class: PDF::Reader::AdvancedTextRunFilter

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/advanced_text_run_filter.rb

Overview

Filter a collection of TextRun objects based on a set of conditions. It can be used to filter text runs based on their attributes. The filter can return the text runs that matches the conditions (only) or the text runs that do not match the conditions (exclude).

You can filter the text runs based on all its attributes with the operators mentioned in VALID_OPERATORS. The filter can be nested with ‘or’ and ‘and’ conditions.

Examples:

  1. Single condition

AdvancedTextRunFilter.exclude(text_runs, text: { include: ‘sample’ })

  1. Multiple conditions (and)

AdvancedTextRunFilter.exclude(text_runs, {

font_size: { greater_than: 10, less_than: 15 }

})

  1. Multiple possible values (or)

AdvancedTextRunFilter.exclude(text_runs, {

font_size: { equal: [10, 12] }

})

  1. Complex AND/OR filter

AdvancedTextRunFilter.exclude(text_runs, {

and: [
  { font_size: { greater_than: 10 } },
  { or: [
    { text: { include: "sample" } },
    { width: { greater_than: 100 } }
  ]}
]

})

Constant Summary collapse

VALID_OPERATORS =
%i[
  equal
  not_equal
  greater_than
  less_than
  greater_than_or_equal
  less_than_or_equal
  include
  exclude
]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text_runs, filter_hash) ⇒ AdvancedTextRunFilter

: (Array, Hash[Symbol, untyped]) -> void



68
69
70
71
# File 'lib/pdf/reader/advanced_text_run_filter.rb', line 68

def initialize(text_runs, filter_hash)
  @text_runs = text_runs
  @filter_hash = filter_hash
end

Instance Attribute Details

#filter_hashObject (readonly)

: Hash[Symbol, untyped]



65
66
67
# File 'lib/pdf/reader/advanced_text_run_filter.rb', line 65

def filter_hash
  @filter_hash
end

#text_runsObject (readonly)

: Array



62
63
64
# File 'lib/pdf/reader/advanced_text_run_filter.rb', line 62

def text_runs
  @text_runs
end

Class Method Details

.exclude(text_runs, filter_hash) ⇒ Object

: (Array, Hash[Symbol, untyped]) -> Array



57
58
59
# File 'lib/pdf/reader/advanced_text_run_filter.rb', line 57

def self.exclude(text_runs, filter_hash)
  new(text_runs, filter_hash).exclude
end

.only(text_runs, filter_hash) ⇒ Object

: (Array, Hash[Symbol, untyped]) -> Array



52
53
54
# File 'lib/pdf/reader/advanced_text_run_filter.rb', line 52

def self.only(text_runs, filter_hash)
  new(text_runs, filter_hash).only
end

Instance Method Details

#excludeObject

: () -> Array



80
81
82
83
# File 'lib/pdf/reader/advanced_text_run_filter.rb', line 80

def exclude
  return text_runs if filter_hash.empty?
  text_runs.reject { |text_run| evaluate_filter(text_run) }
end

#onlyObject

: () -> Array



74
75
76
77
# File 'lib/pdf/reader/advanced_text_run_filter.rb', line 74

def only
  return text_runs if filter_hash.empty?
  text_runs.select { |text_run| evaluate_filter(text_run) }
end