Class: RDF::Normalize::RDFC10
- Includes:
- Enumerable, Util::Logger
- Defined in:
- lib/rdf/normalize/rdfc10.rb
Direct Known Subclasses
Defined Under Namespace
Classes: IdentifierIssuer, NormalizationState
Instance Attribute Summary
Attributes inherited from Base
Instance Method Summary collapse
-
#each(&block) ⇒ Object
Yields each normalized statement.
-
#initialize(enumerable, **options) ⇒ RDF::Enumerable
constructor
Create an enumerable with grounded nodes.
- #normalize_statements(ns, &block) ⇒ Object protected
-
#to_hash ⇒ Hash{String => String}
Returns a map from input blank node identifiers to canonical blank node identifiers.
Methods included from Enumerable
Constructor Details
#initialize(enumerable, **options) ⇒ RDF::Enumerable
Create an enumerable with grounded nodes
raise [RuntimeError] if the maximum number of levels of recursion is exceeded.
24 25 26 27 28 29 30 |
# File 'lib/rdf/normalize/rdfc10.rb', line 24 def initialize(enumerable, **) @dataset, @options = enumerable, @options[:hash_algorithm] ||= :SHA256 unless %i{MD5 SHA1 SHA2 SHA256 SHA384 SHA512}.include?(@options[:hash_algorithm]) raise UnknownHashAlgorithm, "UnknownHashAlgorithm: #{@options[:hash_algorithm].inspect}. Use one of MD5, SHA1, SHA2, SHA256, SHA384, or SHA512" end end |
Instance Method Details
#each(&block) ⇒ Object
Yields each normalized statement
33 34 35 36 37 38 |
# File 'lib/rdf/normalize/rdfc10.rb', line 33 def each(&block) ns = NormalizationState.new(**@options) log_debug("ca:") log_debug(" log point", "Entering the canonicalization function (4.5.3).") log_depth(depth: 2) {normalize_statements(ns, &block)} end |
#normalize_statements(ns, &block) ⇒ Object (protected)
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/rdf/normalize/rdfc10.rb', line 53 def normalize_statements(ns, &block) # Step 2: Map BNodes to the statements they are used by dataset.each_statement do |statement| statement.to_quad.compact.select(&:node?).each do |node| ns.add_statement(node, statement) end end log_debug("ca.2:") log_debug(" log point", "Extract quads for each bnode (4.5.3 (2)).") log_debug(" Bnode to quads:") if logger && logger.level == 0 ns.bnode_to_statements.each do |bn, statements| log_debug(" #{bn.id}:") statements.each do |s| log_debug {" - #{s.to_nquads.strip}"} end end end ns.hash_to_bnodes = {} # Step 3: Calculate hashes for first degree nodes log_debug("ca.3:") log_debug(" log point", "Calculated first degree hashes (4.5.3 (3)).") log_debug(" with:") ns.bnode_to_statements.each_key do |node| log_debug(" - identifier") {node.id} log_debug(" h1dq:") hash = log_depth(depth: 8) {ns.hash_first_degree_quads(node)} ns.add_bnode_hash(node, hash) end # Step 4: Create canonical replacements for hashes mapping to a single node log_debug("ca.4:") log_debug(" log point", "Create canonical replacements for hashes mapping to a single node (4.5.3 (4)).") log_debug(" with:") unless ns.hash_to_bnodes.empty? ns.hash_to_bnodes.keys.sort.each do |hash| identifier_list = ns.hash_to_bnodes[hash] next if identifier_list.length > 1 node = identifier_list.first id = ns.canonical_issuer.issue_identifier(node) log_debug(" - identifier") {node.id} log_debug(" hash", hash) log_debug(" canonical label", id) ns.hash_to_bnodes.delete(hash) end # Step 5: Iterate over hashs having more than one node log_debug("ca.5:") unless ns.hash_to_bnodes.empty? log_debug(" log point", "Calculate hashes for identifiers with shared hashes (4.5.3 (5)).") log_debug(" with:") unless ns.hash_to_bnodes.empty? # Initialize the number of calls allowed to hash_n_degree_quads # as a multiple of the total number of blank nodes in the dataset. ns.max_calls = ns.bnode_to_statements.keys.length * @options.fetch(:max_calls, 40) ns.hash_to_bnodes.keys.sort.each do |hash| identifier_list = ns.hash_to_bnodes[hash] log_debug(" - hash", hash) log_debug(" identifier list") {identifier_list.map(&:id).to_json(indent: ' ')} hash_path_list = [] # Create a hash_path_list for all bnodes using a temporary identifier used to create canonical replacements log_debug(" ca.5.2:") log_debug(" log point", "Calculate hashes for identifiers with shared hashes (4.5.3 (5.2)).") log_debug(" with:") unless identifier_list.empty? identifier_list.each do |identifier| next if ns.canonical_issuer.issued.include?(identifier) temporary_issuer = IdentifierIssuer.new("b") temporary_issuer.issue_identifier(identifier) log_debug(" - identifier") {identifier.id} hash_path_list << log_depth(depth: 12) {ns.hash_n_degree_quads(identifier, temporary_issuer)} end # Create canonical replacements for nodes log_debug(" ca.5.3:") unless hash_path_list.empty? log_debug(" log point", "Canonical identifiers for temporary identifiers (4.5.3 (5.3)).") log_debug(" issuer:") unless hash_path_list.empty? hash_path_list.sort_by(&:first).each do |result, issuer| issuer.issued.each do |node| id = ns.canonical_issuer.issue_identifier(node) log_debug(" - blank node") {node.id} log_debug(" canonical identifier", id) end end end # Step 6: Yield statements using BNodes from canonical replacements if block_given? dataset.each_statement do |statement| if statement.has_blank_nodes? quad = statement.to_quad.compact.map do |term| term.node? ? RDF::Node.intern(ns.canonical_issuer.identifier(term)) : term end block.call RDF::Statement.from(quad) else block.call statement end end end log_debug("ca.6:") log_debug(" log point", "Issued identifiers map (4.4.3 (6)).") log_debug(" issued identifiers map: #{ns.canonical_issuer.inspect}") dataset end |
#to_hash ⇒ Hash{String => String}
Returns a map from input blank node identifiers to canonical blank node identifiers.
43 44 45 46 47 48 49 |
# File 'lib/rdf/normalize/rdfc10.rb', line 43 def to_hash ns = NormalizationState.new(**@options) log_debug("ca:") log_debug(" log point", "Entering the canonicalization function (4.5.3).") log_depth(depth: 2) {normalize_statements(ns)} ns.canonical_issuer.to_hash end |