Class: RDF::Microdata::RdfaReader

Inherits:
RDFa::Reader
  • Object
show all
Defined in:
lib/rdf/microdata/rdfa_reader.rb

Overview

Update DOM to turn Microdata into RDFa and parse using the RDFa Reader

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input = $stdin, **options) {|reader| ... } ⇒ reader

Initializes the RdfaReader instance.

Parameters:

  • input (IO, File, String) (defaults to: $stdin)

    the input stream to read

  • options (Hash{Symbol => Object})

    any additional options (see RDF::Reader#initialize)

Yields:

  • (reader)

    self

Yield Parameters:

  • reader (RDF::Reader)

Yield Returns:

  • (void)

    ignored

Raises:

  • (RDF::ReaderError)

    if validate



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/rdf/microdata/rdfa_reader.rb', line 31

def initialize(input = $stdin, **options, &block)
  @options = options
  log_debug('', "using RDFa transformation reader")

  input = case input
  when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
  else
    # Try to detect charset from input
    options[:encoding] ||= input.charset if input.respond_to?(:charset)
    
    # Otherwise, default is utf-8
    options[:encoding] ||= 'utf-8'
    options[:encoding] = options[:encoding].to_s if options[:encoding]
    begin
      input = input.read if input.respond_to?(:read)
      ::Nokogiri::HTML5(input.force_encoding(options[:encoding]), max_parse_errors: 1000)
    rescue LoadError, NoMethodError
      ::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding])
    end
  end

  # For all members having @itemscope
  input.css("[itemscope]").each do |item|
    # Get @itemtypes to create @type and @vocab
    item.attribute('itemscope').remove
    if item['itemtype']
      # Only absolute URLs
      types = item.attribute('itemtype').
        remove.
        to_s.
        split(/\s+/).
        select {|t| RDF::URI(t).absolute?}

      item['typeof'] = types.join(' ') unless types.empty?
      if vocab = types.first
        vocab = begin
          type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1')
          Registry.new(type_vocab) if type_vocab
        end
        item['vocab'] = vocab.uri.to_s if vocab
      end
    end
    item['typeof'] ||= ''

    # Change each itemid attribute to an resource attribute with the same value
    if item['itemid']
      id = item.attribute('itemid').remove
      item['resource'] = id
    end
  end

  # Add @resource for all itemprop values of object based on a @data value
  input.css("object[itemprop][data]").each do |item|
    item['resource'] ||= item['data']
  end

  # Replace all @itemprop values with @property
  input.css("[itemprop]").each {|item| item['property'] = item.attribute('itemprop').remove}

  # Wrap all @itemref properties
  input.css("[itemref]").each do |item|
    item_vocab = item['vocab'] || item.ancestors.detect {|a| a.attribute('vocab')}
    item_vocab = item_vocab.to_s if item_vocab

    item.attribute('itemref').remove.to_s.split(/\s+/).each do |ref|
      if referenced = input.css("##{ref}")
        # Add @vocab to referenced using the closest ansestor having @vocab of item.
        # If the element with id reference has no resource attribute, add a resource attribute whose value is a NUMBER SIGN U+0023 followed by reference to the element.
        # If the element with id reference has no typeof attribute, add a typeof="rdfa:Pattern" attribute to the element.
        referenced.wrap(%(<div vocab="#{item_vocab}" resource="##{ref}" typeof="rdfa:Pattern" />))

        # Add a link child element to the element that represents the item, with a rel="rdfa:copy" attribute and an href attribute whose value is a NUMBER SIGN U+0023 followed by reference
        link = ::Nokogiri::XML::Node.new('link', input)
        link['rel'] = 'rdfa:copy'
        link['href'] = "##{ref}"
        item << link
      end
    end
  end

  @rdfa = input
  log_debug('', "Transformed document: #{input.to_html}")

  options = options.merge(
    library: :nokogiri,
    reference_folding: true,
    host_language: :html5,
    version: :"rdfa1.1")

  # Rely on RDFa reader
  super(input, **options, &block)
end

Instance Attribute Details

#rdfaRDF::HTML::Document (readonly)

The transformed DOM using RDFa

Returns:

  • (RDF::HTML::Document)


9
10
11
# File 'lib/rdf/microdata/rdfa_reader.rb', line 9

def rdfa
  @rdfa
end

Class Method Details

.format(klass = nil) ⇒ Object



11
12
13
14
15
16
17
# File 'lib/rdf/microdata/rdfa_reader.rb', line 11

def self.format(klass = nil)
  if klass.nil?
    RDF::Microdata::Format
  else
    super
  end
end