Class: RDF::Turtle::FreebaseReader

Inherits:
NTriples::Reader
  • Object
show all
Includes:
Terminals
Defined in:
lib/rdf/turtle/freebase_reader.rb

Overview

Parser specifically for Freebase, which has a very regular form.

Constant Summary

Constants included from Terminals

Terminals::ANON, Terminals::BASE, Terminals::BLANK_NODE_LABEL, Terminals::DECIMAL, Terminals::DOUBLE, Terminals::ECHAR, Terminals::EXPONENT, Terminals::INTEGER, Terminals::IRIREF, Terminals::IRI_RANGE, Terminals::LANG_DIR, Terminals::PERCENT, Terminals::PLX, Terminals::PNAME_LN, Terminals::PNAME_NS, Terminals::PN_CHARS, Terminals::PN_CHARS_BASE, Terminals::PN_CHARS_BODY, Terminals::PN_CHARS_U, Terminals::PN_LOCAL, Terminals::PN_LOCAL_BODY, Terminals::PN_LOCAL_ESC, Terminals::PN_PREFIX, Terminals::PREFIX, Terminals::STRING_LITERAL_LONG_QUOTE, Terminals::STRING_LITERAL_LONG_SINGLE_QUOTE, Terminals::STRING_LITERAL_QUOTE, Terminals::STRING_LITERAL_SINGLE_QUOTE, Terminals::UCHAR, Terminals::U_CHARS1, Terminals::U_CHARS2, Terminals::WS

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.formatObject



12
# File 'lib/rdf/turtle/freebase_reader.rb', line 12

def self.format; RDF::Turtle::Format; end

Instance Method Details

#read_booleanRDF::Literal::Boolean

Read a boolean value

Returns:

  • (RDF::Literal::Boolean)


123
124
125
126
127
# File 'lib/rdf/turtle/freebase_reader.rb', line 123

def read_boolean
  if bool_str = match(/^(true|false)/)
    RDF::Literal::Boolean.new(bool_str, canonicalize:  canonicalize?)
  end
end

#read_literalRDF::Literal

Returns:

  • (RDF::Literal)

See Also:



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/rdf/turtle/freebase_reader.rb', line 86

def read_literal
  if literal_str = match(LITERAL_PLAIN)
    literal_str = self.class.unescape(literal_str)
    literal = case
      when lang_dir = match(RDF::Turtle::Reader::LANG_DIR)
        language, direction = lang_dir.split('--')
        RDF::Literal.new(literal_str, language:  language, direction: direction)
      when datatype = match(/^(\^\^)/)
        RDF::Literal.new(literal_str, datatype:  read_pname(intern:  true) || read_uriref || fail_object)
      else
        RDF::Literal.new(literal_str) # plain string literal
    end
    literal.validate!     if validate?
    literal.canonicalize! if canonicalize?
    literal
  end
end

#read_numericRDF::Literal::Integer, ...

Read a numeric value

Returns:

  • (RDF::Literal::Integer, RDF::Literal::Float, RDF::Literal::Double)


107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/rdf/turtle/freebase_reader.rb', line 107

def read_numeric
  case
  when double_str = match(/^(#{DOUBLE})/)
    double_str = double_str.sub(/\.([eE])/, '.0\1')
    RDF::Literal::Double.new(double_str, canonicalize:  canonicalize?)
  when decimal_str = match(/^(#{DECIMAL})/)
    decimal_str = "0#{decimal_str}" if decimal_str[0,1] == "."
    RDF::Literal::Decimal.new(decimal_str, canonicalize:  canonicalize?)
  when integer_str = match(/^(#{INTEGER})/)
    RDF::Literal::Integer.new(integer_str, canonicalize:  canonicalize?)
  end
end

#read_pname(**options) ⇒ RDF::URI

Read a PNAME of the form prefix:suffix.

Returns:

  • (RDF::URI)


58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/rdf/turtle/freebase_reader.rb', line 58

def read_pname(**options)
  if pname_str = match(/^(\w+:\S+)/)
    ns, suffix = pname_str.split(':', 2)
    if suffix[-1,1] == "."
      suffix.chop!  # Remove end of statement
      @line.insert(0, ".")
    end
    pfx_iri = prefix(ns)
    raise RDF::ReaderError.new("ERROR [line #{lineno}] prefix #{ns.inspect} is not defined", lineno: lineno) unless pfx_iri

    # Unescape PN_LOCAL_ESC
    suffix = suffix.gsub(PN_LOCAL_ESC) {|esc| esc[1]} if
      suffix.match?(PN_LOCAL_ESC)

    # Remove any redundant leading hash from suffix
    suffix = suffix.sub(/^\#/, "") if pfx_iri.to_s.index("#")

    uri = RDF::URI(pfx_iri + suffix)
    uri.validate!     if validate?
    uri
  end
rescue ArgumentError => e
  raise RDF::ReaderError.new("ERROR [line #{lineno}] invalid PName", lineno: lineno)
end

#read_prefixRDF::URI

Read a prefix of the form ‘@prefix pfx: <uri> .

Add prefix definition to prefixes

Returns:

  • (RDF::URI)


47
48
49
50
51
52
53
# File 'lib/rdf/turtle/freebase_reader.rb', line 47

def read_prefix
  if prefix_str = match(/^@prefix\s+(\w+:\s+#{IRIREF})\s*.$/)
    prefix, iri = prefix_str.split(/:\s+/)
    return nil unless iri
    prefix(prefix, iri[1..-2])
  end
end

#read_tripleObject

Extension to N-Triples reader, includes reading pnames and prefixes



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/rdf/turtle/freebase_reader.rb', line 17

def read_triple
  loop do
    begin
      readline.strip!
      line = @line
      unless blank? || read_prefix
        subject   = read_pname(intern:  true) || fail_subject
        predicate = read_pname(intern:  true) || fail_predicate
        object    = read_pname || read_uriref || read_boolean || read_numeric || read_literal || fail_object
        if validate? && !read_eos
          log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
        end
        return [subject, predicate, object]
      end
    rescue RDF::ReaderError =>  e
      raise e if validate?
      if @options[:errors]
        @options[:errors] << e.message
      else
        $stderr.puts e.message
      end
    end
  end
end