diff --git a/src/knowledge-collection-tools.js b/src/knowledge-collection-tools.js index e279641..ff12f38 100644 --- a/src/knowledge-collection-tools.js +++ b/src/knowledge-collection-tools.js @@ -234,7 +234,7 @@ export function groupNquadsBySubject(nquadsArray, sort = false) { const grouped = {}; parser.parse(nquadsArray.join("")).forEach((quad) => { - const { subject, predicate, object } = quad; + const { subject } = quad; let subjectKey; if (subject.termType === "Quad") { @@ -242,8 +242,8 @@ export function groupNquadsBySubject(nquadsArray, sort = false) { const nestedPredicate = subject.predicate.value; const nestedObject = subject.object.termType === "Literal" - ? `"${escapeLiteral(subject.object.value)}"` - : `<${escapeLiteral(subject.object.value)}>`; + ? `"${subject.object.value}"` + : `<${subject.object.value}>`; subjectKey = `<<<${nestedSubject}> <${nestedPredicate}> ${nestedObject}>>`; } else { subjectKey = `<${subject.value}>`; @@ -253,12 +253,14 @@ export function groupNquadsBySubject(nquadsArray, sort = false) { grouped[subjectKey] = []; } - const objectValue = - object.termType === "Literal" - ? `"${escapeLiteral(object.value)}"` - : `<${escapeLiteral(object.value)}>`; + const writer = new N3.Writer({ format: "N-Quads" }); + let quadString = ""; + writer.addQuad(quad); + writer.end((error, result) => { + if (error) throw error; + quadString = result.trim(); + }); - const quadString = `${subjectKey} <${predicate.value}> ${objectValue} .`; grouped[subjectKey].push(quadString); }); @@ -401,17 +403,3 @@ ${nquadsArray.join('\n')} function isEmptyObject(obj) { return Object.keys(obj).length === 0 && obj.constructor === Object; } - -function escapeLiteral(value) { - const ESCAPE_MAP = { - '"': '\\"', - "\\": "\\\\", - "\b": "\\b", - "\f": "\\f", - "\n": "\\n", - "\r": "\\r", - "\t": "\\t", - }; - - return value.replace(/["\\\b\f\n\r\t]/g, (char) => ESCAPE_MAP[char]); -} diff --git a/test/knowledge-collection-tools.test.js b/test/knowledge-collection-tools.test.js index 4e59f88..b24cdd1 100644 --- a/test/knowledge-collection-tools.test.js +++ b/test/knowledge-collection-tools.test.js @@ -1,4 +1,6 @@ import { describe, it } from "mocha"; +import N3 from 'n3'; +import { v4 as uuidv4 } from "uuid"; import { expect } from "chai"; import { formatDataset, @@ -126,19 +128,202 @@ describe("calculateMerkleProof", () => { }); describe("groupNquadsBySubject", () => { - it("should group quads by a single subject", () => { + it("should group quads where the object is a resource", () => { + /* JSON-LD + { + "@context": "http://schema.org", + "@id": "http://example.org/book1", + "type": "Book", + "author": { + "@id": "http://example.org/author1" + } + } + */ const quads = [ - " .", - ' "Literal" .', + " .", + " .", ]; + const grouped = groupNquadsBySubject(quads); expect(grouped).to.have.lengthOf(1); - expect(grouped[0]).to.deep.include( - " ." - ); - expect(grouped[0]).to.deep.include( - ' "Literal" .' - ); + expect(grouped[0]).to.deep.include(quads[0]); + expect(grouped[0]).to.deep.include(quads[1]); + }); + + it("should group quads where the object is a literal", () => { + /* JSON-LD + { + "@context": "http://schema.org", + "@id": "http://example.org/book1", + "type": "Book", + "title": "The Great Book" + } + */ + const quads = [ + ' "The Great Book" .', + ' .', + ]; + + const grouped = groupNquadsBySubject(quads); + expect(grouped).to.have.lengthOf(1); + expect(grouped[0]).to.deep.include(quads[0]); + expect(grouped[0]).to.deep.include(quads[1]); + }); + + it("should group quads where the object is a literal containing an escape character", () => { + /* JSON-LD + { + "@context": "http://schema.org", + "@id": "http://example.org/book1", + "type": "Book", + "title": "The Great Book \n" + } + */ + const quads = [ + // \n is represented as \\n in code + ' "The Great Book \\n" .', + ' .', + ]; + + const grouped = groupNquadsBySubject(quads); + expect(grouped).to.have.lengthOf(1); + expect(grouped[0]).to.deep.include(quads[0]); + expect(grouped[0]).to.deep.include(quads[1]); + }); + + it("should group quads where the object is a typed literal", () => { + /* JSON-LD + { + "@context": "http://schema.org", + "@id": "http://example.org/book1", + "type": "Book", + "publicationDate": { + "@value": "2025-05-28", + "@type": "http://www.w3.org/2001/XMLSchema#date" + } + } + */ + const quads = [ + ' "2025-05-28"^^ .', + ' .', + ]; + + const grouped = groupNquadsBySubject(quads); + expect(grouped).to.have.lengthOf(1); + expect(grouped[0]).to.deep.include(quads[0]); + expect(grouped[0]).to.deep.include(quads[1]); + }); + + it("should group quads where the object is a typed literal that includes an escape character", () => { + /* JSON-LD + { + "@context": "http://schema.org", + "@id": "http://example.org/book1", + "type": "Book", + "publicationDate": { + "@value": "2025-05-28 \n", + "@type": "http://www.w3.org/2001/XMLSchema#date" + } + } + */ + const quads = [ + // \n is represented as \\n in code + ' "2025-05-28 \\n"^^ .', + ' .', + ]; + + const grouped = groupNquadsBySubject(quads); + expect(grouped).to.have.lengthOf(1); + expect(grouped[0]).to.deep.include(quads[0]); + expect(grouped[0]).to.deep.include(quads[1]); + }); + + it("should group quads where the object is a literal with language defined", () => { + /* JSON-LD + { + "@context": "http://schema.org", + "@id": "http://example.org/book1", + "type": "Book", + "description": [ + { + "@value": "A thrilling adventure novel.", + "@language": "en" + }, + { + "@value": "Napeta pustolovska novela.", + "@language": "sl" + } + ] + } + */ + const quads = [ + ' "A thrilling adventure novel."@en .', + ' "Napeta pustolovska novela."@sl .', + ' .', + ]; + + const grouped = groupNquadsBySubject(quads); + expect(grouped).to.have.lengthOf(1); + expect(grouped[0]).to.deep.include(quads[0]); + expect(grouped[0]).to.deep.include(quads[1]); + expect(grouped[0]).to.deep.include(quads[2]); + }); + + it("should group quads where the object is a literal with language defined, containing an escape character", () => { + /* JSON-LD + { + "@context": "http://schema.org", + "@id": "http://example.org/book1", + "type": "Book", + "description": [ + { + "@value": "A thrilling adventure novel. \n", + "@language": "en" + }, + { + "@value": "Napeta pustolovska novela. \n", + "@language": "sl" + } + ] + } + */ + const quads = [ + // \n is represented as \\n in code + ' "A thrilling adventure novel. \\n"@en .', + ' "Napeta pustolovska novela. \\n"@sl .', + ' .', + ]; + + const grouped = groupNquadsBySubject(quads); + expect(grouped).to.have.lengthOf(1); + expect(grouped[0]).to.deep.include(quads[0]); + expect(grouped[0]).to.deep.include(quads[1]); + expect(grouped[0]).to.deep.include(quads[2]); + }); + + it("should group quads where the object is a literal with language defined, while subject is a blank node", () => { + /* JSON-LD + { + "@context": { + "predicate": "http://example.org/predicate" + }, + "@graph": [ + { + "predicate": { + "@value": "something", + "@language": "en" + } + } + ] + } + */ + const quads = [ + `<${uuidv4()}> "something"@en .`, + ]; + + const grouped = groupNquadsBySubject(quads); + expect(grouped).to.have.lengthOf(1); + expect(grouped[0]).to.deep.include(quads[0]); }); it("should group quads by multiple subjects", () => { @@ -352,4 +537,340 @@ describe("generateMissingIdsForBlankNodes", () => { ) ); }); + + it("should replace an object blank node", () => { + /* + JSON-LD + { + "@context": { + "relatedTo": "http://example.org/relatedTo" + }, + "@id": "http://example.org/document/1", + "relatedTo": {} + } + */ + const nquadsArray = [ + " _:c14n0 .", + ]; + + const updatedQuads = generateMissingIdsForBlankNodes(nquadsArray); + + const parser = new N3.Parser(); + const quads = parser.parse(updatedQuads.join('\n')); + + expect(quads[0]._subject.id).equals('http://example.org/document/1'); + + expect(quads[0]._predicate.id).equals('http://example.org/relatedTo'); + + const uuidRegex = /^uuid:[0-9a-fA-F\-]{36}$/; + expect(quads[0]._object.id.match(uuidRegex)); + + expect(quads[0]._graph.id).equals(''); + }); + + it("should replace an occuring object blank node", () => { + /* + JSON-LD + { + "@context": { + "is": {"@id": "http://example.org/is"} + }, + "@graph": [ + { + "@id": "http://example.org/subject1", + "is": {"@id": "_:sharedBlank"} + }, + { + "@id": "http://example.org/subject2", + "is": {"@id": "_:sharedBlank"} + } + ] + } + */ + const nquadsArray = [ + " _:c14n0 .", + " _:c14n0 .", + ]; + + const updatedQuads = generateMissingIdsForBlankNodes(nquadsArray); + + const parser = new N3.Parser(); + const quads = parser.parse(updatedQuads.join('\n')); + + expect(quads[0]._subject.id).equals('http://example.org/subject1'); + expect(quads[1]._subject.id).equals('http://example.org/subject2'); + + expect(quads[0]._predicate.id).equals('http://example.org/is'); + expect(quads[1]._predicate.id).equals('http://example.org/is'); + + const uuidRegex = /^uuid:[0-9a-fA-F\-]{36}$/; + + expect(quads[0]._object.id.match(uuidRegex)); + expect(quads[1]._object.id.match(uuidRegex)); + + expect(quads[0]._graph.id).equals(''); + expect(quads[1]._graph.id).equals(''); + + expect(quads[0]._object.id).equals(quads[1]._object.id); + }); + + it("should replace a subject blank node", () => { + /* + JSON-LD + { + "@context": { + "ex": "http://example.org/" + }, + "@graph": [ + { + "ex:name": "John Doe" + } + ] + } + */ + const nquadsArray = [ + '_:c14n0 "John Doe" .', + ]; + + const updatedQuads = generateMissingIdsForBlankNodes(nquadsArray); + + const parser = new N3.Parser(); + const quads = parser.parse(updatedQuads.join('\n')); + + const uuidRegex = /^uuid:[0-9a-fA-F\-]{36}$/; + expect(quads[0]._subject.id.match(uuidRegex)); + + expect(quads[0]._predicate.id).equals('http://example.org/name'); + + expect(quads[0]._object.id).equals('"John Doe"'); + + expect(quads[0]._graph.id).equals(''); + }); + + it("should replace an occuring subject blank node", () => { + /* + JSON-LD + { + "@context": { + "ex": "http://example.org/" + }, + "@graph": [ + { + "ex:name": "John Doe", + "ex:sex": "male" + } + ] + } + */ + const nquadsArray = [ + '_:c14n0 "John Doe" .', + '_:c14n0 "male" .', + ]; + + const updatedQuads = generateMissingIdsForBlankNodes(nquadsArray); + + const parser = new N3.Parser(); + const quads = parser.parse(updatedQuads.join('\n')); + + const uuidRegex = /^uuid:[0-9a-fA-F\-]{36}$/; + expect(quads[0]._subject.id.match(uuidRegex)); + expect(quads[1]._subject.id.match(uuidRegex)); + + expect(quads[0]._predicate.id).equals('http://example.org/name'); + expect(quads[1]._predicate.id).equals('http://example.org/sex'); + + expect(quads[0]._object.id).equals('"John Doe"'); + expect(quads[1]._object.id).equals('"male"'); + + expect(quads[0]._graph.id).equals(''); + expect(quads[1]._graph.id).equals(''); + + expect(quads[0]._subject.id).equals(quads[1]._subject.id); + }); + + it("should not replace two different subject blank node with the same UUID", () => { + /* + JSON-LD + { + "@context": { + "ex": "http://example.org/" + }, + "@graph": [ + { + "ex:hasName": "Alice", + "ex:sex": "male" + }, + { + "ex:hasName": "Bob", + "ex:sex": "female" + } + ] + } + */ + const nquadsArray = [ + '_:c14n0 "male" .', + '_:c14n0 "Bob" .', + '_:c14n1 "female" .', + '_:c14n1 "Alice" .', + ]; + + const updatedQuads = generateMissingIdsForBlankNodes(nquadsArray); + + const parser = new N3.Parser(); + const quads = parser.parse(updatedQuads.join('\n')); + + const uuidRegex = /^uuid:[0-9a-fA-F\-]{36}$/; + expect(quads[0]._subject.id.match(uuidRegex)); + expect(quads[1]._subject.id.match(uuidRegex)); + expect(quads[2]._subject.id.match(uuidRegex)); + expect(quads[3]._subject.id.match(uuidRegex)); + + expect(quads[0]._predicate.id).equals('http://example.org/sex'); + expect(quads[1]._predicate.id).equals('http://example.org/hasName'); + expect(quads[2]._predicate.id).equals('http://example.org/sex'); + expect(quads[3]._predicate.id).equals('http://example.org/hasName'); + + expect(quads[0]._object.id).equals('"male"'); + expect(quads[1]._object.id).equals('"Bob"'); + expect(quads[2]._object.id).equals('"female"'); + expect(quads[3]._object.id).equals('"Alice"'); + + expect(quads[0]._graph.id).equals(''); + expect(quads[1]._graph.id).equals(''); + expect(quads[2]._graph.id).equals(''); + expect(quads[3]._graph.id).equals(''); + + expect(quads[0]._subject.id).equals(quads[1]._subject.id); + expect(quads[2]._subject.id).equals(quads[3]._subject.id); + expect(quads[0]._subject.id).not.equals(quads[2]._subject.id); + }); + + it("should replace an object blank node, that occurs as a subject, with the same UUID", () => { + /* + JSON-LD + { + "@context": "http://schema.org", + "review": { + "reviewBody": "Excellent book!" + } + } + */ + const nquadsArray = [ + '_:c14n0 "Excellent book!" .', + "_:c14n1 _:c14n0 .", + ]; + + const updatedQuads = generateMissingIdsForBlankNodes(nquadsArray); + + const parser = new N3.Parser(); + const quads = parser.parse(updatedQuads.join('\n')); + + const uuidRegex = /^uuid:[0-9a-fA-F\-]{36}$/; + expect(quads[0]._subject.id.match(uuidRegex)); + expect(quads[1]._subject.id.match(uuidRegex)); + + expect(quads[0]._predicate.id).equals('http://schema.org/reviewBody'); + expect(quads[1]._predicate.id).equals('http://schema.org/review'); + + expect(quads[0]._object.id).equals('"Excellent book!"'); + expect(quads[1]._object.id.match(uuidRegex)); + + expect(quads[0]._graph.id).equals(''); + expect(quads[1]._graph.id).equals(''); + + + expect(quads[0]._subject.id).not.equals(quads[1]._subject.id); + expect(quads[0]._subject.id).equals(quads[1]._object.id); + }); + + it("should fail since graphs aren't supported at this stage", () => { + /* + JSON-LD + { + "@context": { + "@base": "https://example.org/", + "name": "http://schema.org/name", + "knows": { + "@id": "http://schema.org/knows", + "@type": "@id" + }, + "Person": "http://schema.org/Person" + }, + "@graph": [ + { + "@type": "Person", + "name": "Alice", + "knows": [ + { + "@id": "_:bob" + }, + { + "@id": "_:carol" + } + ] + }, + { + "@id": "_:bob", + "@graph": [ + { + "@type": "Person", + "name": "Bob" + } + ] + }, + { + "@id": "_:carol", + "@graph": [ + { + "@type": "Person", + "name": "Carol" + } + ] + } + ] + } + */ + const nquadsArray = [ + '_:c14n2 "Carol" _:c14n0 .', + '_:c14n2 _:c14n0 .', + '_:c14n3 _:c14n0 .', + '_:c14n3 _:c14n1 .', + '_:c14n3 "Alice" .', + '_:c14n3 .', + '_:c14n4 "Bob" _:c14n1 .', + '_:c14n4 _:c14n1 .' + ]; + + try { + generateMissingIdsForBlankNodes(nquadsArray); + } catch (error) { + expect(error.message).equals(` +------------------------------------------------------------------------------------------------ +Unsupported JSON-LD input detected + +After parsing the JSON-LD input, the parser detected creation of new named graphs. +The DKG does not support custom named graphs. + +Problematic Quads: + 1. "Carol" "Carol" _:b31_c14n0 . + + 2. _:b31_c14n0 . + + 3. "Bob" "Bob" _:b31_c14n1 . + + 4. _:b31_c14n1 . + + +Full Parsed N-Quads Array: +_:c14n2 "Carol" _:c14n0 . +_:c14n2 _:c14n0 . +_:c14n3 _:c14n0 . +_:c14n3 _:c14n1 . +_:c14n3 "Alice" . +_:c14n3 . +_:c14n4 "Bob" _:c14n1 . +_:c14n4 _:c14n1 . +`); + } + }); });