Class: MicroformatParser::Selector

Inherits:
Object
  • Object
show all
Defined in:
lib/uformatparser.rb

Overview

Implements a selector using a CSS-style expression.

For more information see MicroformatParser.selector.

Constant Summary collapse

REGEX =

Parse each selector into five parts: $1 element name or * (optional) $2 ID name (including leading #, optional) $3 class names (including leading ., zero or more) $4 attribute expressions (zero or more) $5 anything else (no leading spaces)

/^(\*|[A-Za-z][A-Za-z0-9_\-:]*)?(#[A-Za-z][A-Za-z0-9_\-:]*)?((?:\.[A-Za-z][A-Za-z0-9_\-:]*){0,})((?:\[[A-Za-z][A-Za-z0-9_\-:]*(?:(?:~|\|)?=.*)?\]){0,})\s*(.*)$/
ATTR_REGEX =

Parse each attribute expression into three parts: $1 attribute name $2 matching operation $3 matched value Matching operation may be =, ~= or |=. Value may be empty.

/^([A-Za-z][A-Za-z0-9_\-:]*)((?:~|\|)?=)?(.*)$/

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(tag_name, attrs, alt = nil, &depends) ⇒ Selector

:startdoc:



463
464
465
466
467
468
# File 'lib/uformatparser.rb', line 463

def initialize(tag_name, attrs, alt = nil, &depends)
    @tag_name = tag_name
    @attrs = attrs
    @alt = alt
    @depends = depends
end

Class Method Details

.create(statement, alt = nil, &depends) ⇒ Object

Creates a new selector.



473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
# File 'lib/uformatparser.rb', line 473

def Selector.create(statement, alt = nil, &depends)
    statement.strip!
    # Parse the first selector expression into $1-$4, anything else goes in $5
    parts = REGEX.match(statement)
    raise InvalidSelectorException, "Invalid (empty) selector statement" if parts[0].length == 0

    # Set tag_name to the element name if specified and not *
    tag_name = parts[1] if parts[1] and !parts[1].empty? and parts[1] != '*'
    # This array holds the regular expressions for matching attributes.
    # We use an array since we allow multiple expressions on the same attribute,
    # e.g. to find an element with both class 'foo' and class 'bar'.
    attrs = []
    # Match the ID attribute if specified
    attrs << ['id', Regexp.new('^' + parts[2] + '$')] if parts[2]
    # The third part is a collection of class names, prefixed with dot
    # Create an attribute matching regular expression for each class
    # The class attribute is a set of space-separated names, so match accordingly
    if !parts[3].empty?
        parts[3].split('.').each { |cls| attrs << ['class', Regexp.new('\b' + cls + '\b')] if !cls.empty? }
    end
    # Process the remaining attribute expressions. Each expression is enclosed
    # within square brackets, so split the expressions into anything between the
    # square brackets. The result may include empty elements, skip those.
    parts[4].split(/\[|\]/).each do |expr|
        if not expr.empty?
            # Parse the attribute expression and created a regular expression
            # for matching the attribute value, based on the operation.
            name, type, value = ATTR_REGEX.match(expr)[1..3]
            case type
            when '=' then
                # Match the attribute value in full
                match = Regexp.new('^' + value + '$')
            when '~=' then
                # Match a space-separated word within the attribute value
                match = Regexp.new('\b' + value + '\b')
            when '|=' then
                # Match the beginning of the attribute value
                match = Regexp.new('^' + value)
            else
                # Match all attributes values (existence check)
                match = Regexp.new('')
            end
            attrs << [name, match]
        end
    end
    # If there's nothing else in the statement, return this selector.
    selector = Selector.new(tag_name, attrs, alt, &depends)
    return selector if parts[5].empty?

    # Create a compound selector based on the remainder of the statement.
    # This is also why we need the factory and can't call new directly.
    return case parts[5][0]
    when ?,
        # Alternative selector: second statement is alternative to the first one
        Selector.create(parts[5][1..-1], selector)
    when ?+
        # Sibling selector: second statement is returned that will match node
        # followed by previous sibling node based on first statement
        Selector.create(parts[5][1..-1]) do |node|
            node.previous_element and selector.match(node.previous_element)
        end
    when ?>
        # Child selector: second statement is returned that will match node
        # followed by parent node based on the first statement
        Selector.create(parts[5][1..-1]) do |node|
            node.parent? and selector.match(node.parent)
        end
    else
        # Descendant selector: second statement is returned that will match node
        # followed by ascendant node based on the first statement
        Selector.create(parts[5]) do |node|
            parent = node.parent
            match = false
            while parent
                break if match = selector.match(parent)
                parent = parent.parent
            end
            match
        end
    end
end

.for_class(cls) ⇒ Object

Creates a new selector for the given class name.



556
557
558
# File 'lib/uformatparser.rb', line 556

def Selector.for_class(cls)
    Selector.new(nil, [["class", Regexp.new('\b' + cls + '\b')]])
end

Instance Method Details

#inspectObject



580
581
582
583
584
585
586
587
588
589
590
591
# File 'lib/uformatparser.rb', line 580

def inspect
    stmt = @tag_name ? @tag_name : '';
    @attrs.each do |attr|
        stmt += "[#{attr[0]}"
        stmt += "~=#{$1}" if attr[1].source =~ /^\\b(.*)\\b$/
        stmt += "=#{$1}" if attr[1].source =~ /^\^(.*)\$$/
        stmt += "|=#{$1}" if attr[1].source =~ /^\^[^$]*$/
    end
    stmt += ']'
    stmt += ',' + @alt.inspect if @alt
    stmt
end

#match(node) ⇒ Object

Identifies all matching nodes.



561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
# File 'lib/uformatparser.rb', line 561

def match(node)
    # Match node if no element name or element name same as node name
    match = (!@tag_name or @tag_name == node.name)
    # No match if one of the attribute matches failed
    for attr in @attrs
        if attr[1] !~ node.attributes[attr[0]]
            match = false
            break
        end
    end
    # If the node did not match, but we have an alternative match
    # (x+y), apply the alternative match instead
    return @alt.match(node) if not match and @alt
    # If the node did match, but depends on another match (parent,
    # sibling, etc), apply the dependent match as well
    return @depends.call(node) if match and @depends
    match
end