Class: RDF::Normalize::RDFC10::NormalizationState

Inherits:
Object
  • Object
show all
Includes:
Util::Logger
Defined in:
lib/rdf/normalize/rdfc10.rb

Direct Known Subclasses

URGNA2012::NormalizationState

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(**options) ⇒ NormalizationState

Returns a new instance of NormalizationState.



173
174
175
176
177
178
# File 'lib/rdf/normalize/rdfc10.rb', line 173

def initialize(**options)
  @options = options
  @hash_algorithm = Digest.const_get(options.fetch(:hash_algorithm, :SHA256))
  @bnode_to_statements, @hash_to_bnodes, @canonical_issuer = {}, {}, IdentifierIssuer.new("c14n")
  @max_calls, @total_calls = nil, 0
end

Instance Attribute Details

#bnode_to_statementsObject

Returns the value of attribute bnode_to_statements.



166
167
168
# File 'lib/rdf/normalize/rdfc10.rb', line 166

def bnode_to_statements
  @bnode_to_statements
end

#canonical_issuerObject

Returns the value of attribute canonical_issuer.



169
170
171
# File 'lib/rdf/normalize/rdfc10.rb', line 169

def canonical_issuer
  @canonical_issuer
end

#hash_algorithmObject

Returns the value of attribute hash_algorithm.



167
168
169
# File 'lib/rdf/normalize/rdfc10.rb', line 167

def hash_algorithm
  @hash_algorithm
end

#hash_to_bnodesObject

Returns the value of attribute hash_to_bnodes.



168
169
170
# File 'lib/rdf/normalize/rdfc10.rb', line 168

def hash_to_bnodes
  @hash_to_bnodes
end

#max_callsObject

Returns the value of attribute max_calls.



170
171
172
# File 'lib/rdf/normalize/rdfc10.rb', line 170

def max_calls
  @max_calls
end

#total_callsObject

Returns the value of attribute total_calls.



171
172
173
# File 'lib/rdf/normalize/rdfc10.rb', line 171

def total_calls
  @total_calls
end

Instance Method Details

#add_bnode_hash(node, hash) ⇒ Object



185
186
187
188
189
# File 'lib/rdf/normalize/rdfc10.rb', line 185

def add_bnode_hash(node, hash)
  hash_to_bnodes[hash] ||= []
  # Match on object IDs of nodes, rather than simple node equality
  hash_to_bnodes[hash] << node unless hash_to_bnodes[hash].any? {|n| n.eql?(node)}
end

#add_statement(node, statement) ⇒ Object



180
181
182
183
# File 'lib/rdf/normalize/rdfc10.rb', line 180

def add_statement(node, statement)
  bnode_to_statements[node] ||= []
  bnode_to_statements[node] << statement unless bnode_to_statements[node].any? {|st| st.eql?(statement)}
end

#hash_first_degree_quads(node) ⇒ String

This algorithm calculates a hash for a given blank node across the quads in a dataset in which that blank node is a component. If the hash uniquely identifies that blank node, no further examination is necessary. Otherwise, a hash will be created for the blank node using the algorithm in 4.9 Hash N-Degree Quads invoked via 4.5 Canonicalization Algorithm.

Parameters:

  • node (RDF::Node)

    The reference blank node identifier

Returns:

  • (String)

    the SHA256 hexdigest hash of statements using this node, with replacements



195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'lib/rdf/normalize/rdfc10.rb', line 195

def hash_first_degree_quads(node)
  nquads = bnode_to_statements[node].
    map do |statement|
      quad = statement.to_quad.map do |t|
        case t
        when node then RDF::Node("a")
        when RDF::Node then RDF::Node("z")
        else t
        end
      end
      RDF::Statement.from(quad).to_nquads
    end
  log_debug("log point", "Hash First Degree Quads function (4.7.3).")
  log_debug("nquads:")
  nquads.each do |q|
    log_debug {"  - #{q.strip}"}
  end

  result = hexdigest(nquads.sort.join)
  log_debug("hash") {result}
  result
end

#hash_n_degree_quads(node, issuer) ⇒ Array<String,IdentifierIssuer>

Returns the Hash and issuer.

Parameters:

Returns:

Raises:

  • (MaxCallsExceeded)

    If total number of calls has exceeded max_calls times the number of blank nodes in the dataset.



245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# File 'lib/rdf/normalize/rdfc10.rb', line 245

def hash_n_degree_quads(node, issuer)
  log_debug("hndq:")
  log_debug("  log point", "Hash N-Degree Quads function (4.9.3).")
  log_debug("  identifier") {node.id}
  log_debug("  issuer") {issuer.inspect}

  if max_calls && total_calls >= max_calls
    raise MaxCallsExceeded, "Exceeded maximum number of calls (#{total_calls}) allowed to hash_n_degree_quads"
  end
  @total_calls += 1

  # hash to related blank nodes map
  hn = {}

  log_debug("  hndq.2:")
  log_debug("    log point", "Quads for identifier (4.9.3 (2)).")
  log_debug("    quads:")
  bnode_to_statements[node].each do |s|
    log_debug {"    - #{s.to_nquads.strip}"}
  end

  # Step 3
  log_debug("  hndq.3:")
  log_debug("    log point", "Hash N-Degree Quads function (4.9.3 (3)).")
  log_debug("    with:") unless bnode_to_statements[node].empty?
  bnode_to_statements[node].each do |statement|
    log_debug {"      - quad: #{statement.to_nquads.strip}"}
    log_debug("        hndq.3.1:")
    log_debug("          log point", "Hash related bnode component (4.9.3 (3.1))")
    log_depth(depth: 10) {hash_related_statement(node, statement, issuer, hn)}
  end
  log_debug("    Hash to bnodes:")
  hn.each do |k,v|
    log_debug("      #{k}:")
    v.each do |vv|
      log_debug("        - #{vv.id}")
    end
  end

  data_to_hash = ""

  # Step 5
  log_debug("  hndq.5:")
  log_debug("    log point", "Hash N-Degree Quads function (4.9.3 (5)), entering loop.")
  log_debug("    with:")
  hn.keys.sort.each do |hash|
    log_debug("      - related hash", hash)
    log_debug("        data to hash") {data_to_hash.to_json}
    list = hn[hash]
    # Iterate over related nodes
    chosen_path, chosen_issuer = "", nil
    data_to_hash += hash

    log_debug("        hndq.5.4:")
    log_debug("          log point", "Hash N-Degree Quads function (4.9.3 (5.4)), entering loop.")
    log_debug("          with:") unless list.empty?
    list.permutation do |permutation|
      log_debug("          - perm") {permutation.map(&:id).to_json(indent: ' ', space: ' ')}
      issuer_copy, path, recursion_list = issuer.dup, "", []

      log_debug("            hndq.5.4.4:")
      log_debug("              log point", "Hash N-Degree Quads function (4.9.3 (5.4.4)), entering loop.")
      log_debug("              with:")
      permutation.each do |related|
        log_debug("                - related") {related.id}
        log_debug("                  path") {path.to_json}
        if canonical_issuer.identifier(related)
          path << '_:' + canonical_issuer.issue_identifier(related)
        else
          recursion_list << related if !issuer_copy.identifier(related)
          path << '_:' + issuer_copy.issue_identifier(related)
        end

        # Skip to the next permutation if chosen path isn't empty and the path is greater than the chosen path
        break if !chosen_path.empty? && path.length >= chosen_path.length
      end

      log_debug("            hndq.5.4.5:")
      log_debug("              log point", "Hash N-Degree Quads function (4.9.3 (5.4.5)), before possible recursion.")
      log_debug("              recursion list") {recursion_list.map(&:id).to_json(indent: ' ')}
      log_debug("              path") {path.to_json}
      log_debug("              with:") unless recursion_list.empty?
      recursion_list.each do |related|
        log_debug("                - related") {related.id}
        result = log_depth(depth: 18) do
          hash_n_degree_quads(related, issuer_copy)
        end
        path << '_:' + issuer_copy.issue_identifier(related)
        path << "<#{result.first}>"
        issuer_copy = result.last
        log_debug("                  hndq.5.4.5.4:") 
        log_debug("                    log point", "Hash N-Degree Quads function (4.9.3 (5.4.5.4)), combine result of recursion.")
        log_debug("                    path") {path.to_json}
        log_debug("                    issuer copy") {issuer_copy.inspect}
        break if !chosen_path.empty? && path.length >= chosen_path.length && path > chosen_path
      end

      if chosen_path.empty? || path < chosen_path
        chosen_path, chosen_issuer = path, issuer_copy
      end
    end

    data_to_hash += chosen_path
    log_debug("        hndq.5.5:")
    log_debug("          log point", "Hash N-Degree Quads function (4.9.3 (5.5). End of current loop with Hn hashes.")
    log_debug("          chosen path") {chosen_path.to_json}
    log_debug("          data to hash") {data_to_hash.to_json}
    issuer = chosen_issuer
  end

  log_debug("  hndq.6:")
  log_debug("    log point", "Leaving Hash N-Degree Quads function (4.9.3).")
  log_debug("    hash") {hexdigest(data_to_hash)}
  log_depth(depth: 4) {log_debug("issuer") {issuer.inspect}}
  return [hexdigest(data_to_hash), issuer]
end

Returns the SHA256 hexdigest hash.

Parameters:

  • related (RDF::Node)
  • statement (RDF::Statement)
  • issuer (IdentifierIssuer)
  • position (String)

    one of :s, :o, or :g

Returns:

  • (String)

    the SHA256 hexdigest hash



223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'lib/rdf/normalize/rdfc10.rb', line 223

def hash_related_node(related, statement, issuer, position)
  log_debug("related") {related.id}
  input = "#{position}"
  input << statement.predicate.to_ntriples unless position == :g
  if identifier = (canonical_issuer.identifier(related) ||
                   issuer.identifier(related))
    input << "_:#{identifier}"
  else
    log_debug("h1dq:")
    input << log_depth(depth: 2) do
      hash_first_degree_quads(related)
    end
  end
  log_debug("input") {input.inspect}
  log_debug("hash") {hexdigest(input)}
  hexdigest(input)
end

Group adjacent bnodes by hash



382
383
384
385
386
387
388
389
390
391
392
# File 'lib/rdf/normalize/rdfc10.rb', line 382

def hash_related_statement(node, statement, issuer, map)
  log_debug("with:") if statement.to_h.values.any? {|t| t.is_a?(RDF::Node)}
  statement.to_h(:s, :p, :o, :g).each do |pos, term|
    next if !term.is_a?(RDF::Node) || term == node

    log_debug("  - position", pos)
    hash = log_depth(depth: 4) {hash_related_node(term, statement, issuer, pos)}
    map[hash] ||= []
    map[hash] << term unless map[hash].any? {|n| n.eql?(term)}
  end
end

#hexdigest(val) ⇒ Object (protected)



377
378
379
# File 'lib/rdf/normalize/rdfc10.rb', line 377

def hexdigest(val)
  hash_algorithm.hexdigest(val)
end

#inspectObject



362
363
364
# File 'lib/rdf/normalize/rdfc10.rb', line 362

def inspect
  "NormalizationState:\nbnode_to_statements: #{inspect_bnode_to_statements}\nhash_to_bnodes: #{inspect_hash_to_bnodes}\ncanonical_issuer: #{canonical_issuer.inspect}"
end

#inspect_bnode_to_statementsObject



366
367
368
369
370
# File 'lib/rdf/normalize/rdfc10.rb', line 366

def inspect_bnode_to_statements
  bnode_to_statements.map do |n, statements|
    "#{n.id}: #{statements.map {|s| s.to_nquads.strip}}"
  end.join(", ")
end

#inspect_hash_to_bnodesObject



372
373
# File 'lib/rdf/normalize/rdfc10.rb', line 372

def inspect_hash_to_bnodes
end