Skip to content

Commit

Permalink
Added "dataClassRegex" param to ingestion templates
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul Cuddihy committed Jul 28, 2023
1 parent 3de598c commit e2360af
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ public JSONObject fromCsvUsingClassTemplate(@RequestBody IngestionFromStringsAnd
SparqlGraphJson sgjson = null;
try {

IngestionNodegroupBuilder builder = this.buildTemplate(requestBody.buildConnection(), requestBody.getClassURI(), requestBody.getIdRegex());
IngestionNodegroupBuilder builder = this.buildTemplate(requestBody.buildConnection(), requestBody.getClassURI(), requestBody.getIdRegex(), requestBody.getDataClassRegex());
sgjson = builder.getSgjson();

} catch (Exception e) {
Expand Down Expand Up @@ -281,7 +281,7 @@ public JSONObject getClassTemplateAndCsv(@RequestBody GetClassTemplateRequestBod

try {

IngestionNodegroupBuilder builder = this.buildTemplate(requestBody.buildConnection(), requestBody.getClassURI(), requestBody.getIdRegex());
IngestionNodegroupBuilder builder = this.buildTemplate(requestBody.buildConnection(), requestBody.getClassURI(), requestBody.getIdRegex(), requestBody.getDataClassRegex());
SimpleResultSet result = new SimpleResultSet(true);
result.addResult("sgjson", builder.getSgjson().toJson());
result.addResult("csv", builder.getCsvTemplate());
Expand All @@ -303,12 +303,13 @@ public JSONObject getClassTemplateAndCsv(@RequestBody GetClassTemplateRequestBod
* @return
* @throws Exception
*/
private IngestionNodegroupBuilder buildTemplate(SparqlConnection conn, String classURI, String idRegex) throws Exception {
private IngestionNodegroupBuilder buildTemplate(SparqlConnection conn, String classURI, String idRegex, String dataClassRegex) throws Exception {
// get oInfo from the service (hopefully cached)
OntologyInfo oInfo = oinfo_props.getClient().getOntologyInfo(conn);

IngestionNodegroupBuilder builder = new IngestionNodegroupBuilder(classURI, conn, oInfo);
builder.setIdRegex(idRegex);
builder.setDataClassRegex(dataClassRegex);
builder.build();
return builder;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,12 @@ public class IngestionNodegroupBuilder {
private StringBuilder csvTemplate;
private StringBuilder csvTypes;
private String idRegex = null;
private String dataClassRegex = null;

public void setDataClassRegex(String dataClassRegex) {
this.dataClassRegex = dataClassRegex;
}

/**
*
* @param className
Expand Down Expand Up @@ -129,6 +134,12 @@ public void build() throws Exception {
ispecBuilder.addTypeRestriction(node.getSparqlID());
ispecBuilder.addColumn(colName);
ispecBuilder.addMappingToTypeRestriction(node.getSparqlID(), ispecBuilder.buildMappingWithCol(colName, new String [] {transformId}));
// add to csvTemplate
csvTemplate.append(colName + ",");
csvTypes.append("string,");

// wow: URI lookup by type as the identifier seems very unusual.
// Leaving this in case it was put here for a reason.
if (this.idRegex != null && Pattern.compile(this.idRegex).matcher(colName).find()) {
ispecBuilder.addURILookupToTypeRestriction(node.getSparqlID(), node.getSparqlID());
ispecBuilder.addLookupMode(node.getSparqlID(), ImportSpec.LOOKUP_MODE_CREATE_IF_MISSING);
Expand Down Expand Up @@ -166,62 +177,132 @@ public void build() throws Exception {
for (NodeItem nItem : node.getNodeItemList()) {

for (String rangeUri : nItem.getRangeUris()) {
// Add object property node to nodegroup (optional) and importSpec

// Add object property range node to nodegroup (optional), and to importSpec
String rangeKeyname = new OntologyName(rangeUri).getLocalName();
Node objNode = nodegroup.addNode(rangeUri, node, null, nItem.getUriConnectBy());
nItem.setOptionalMinus(objNode, NodeItem.OPTIONAL_TRUE);

ispecBuilder.addNode(objNode.getSparqlID(), objNode.getUri(), ImportSpec.LOOKUP_MODE_ERR_IF_MISSING);

// we might want to re-add this for a different "flavor" of auto-generated nodegroups
//
// if (oInfo.hasSubclass(className)) {
// // If node has subclasses then NO_CREATE ("error if missing")
// // This will create the need for ingestion order to matter: linked items must be ingested first.
// ispecBuilder.addNode(objNode.getSparqlID(), objNode.getUri(), ImportSpec.LOOKUP_MODE_NO_CREATE);
// } else {
// // If node has NO subclasses then we may create it.
// ispecBuilder.addNode(objNode.getSparqlID(), objNode.getUri(), ImportSpec.LOOKUP_MODE_CREATE);
// }

// give it a name, e.g.: verifies_ENTITY
String objNodeName = nItem.getKeyName() + "_" + rangeKeyname;
nodegroup.setBinding(objNode, objNodeName);

// set data property matching ID_REGEX returned
for (PropertyItem pItem : objNode.getPropertyItems()) {
if (this.idRegex != null && Pattern.compile(this.idRegex).matcher(pItem.getKeyName()).find()) {
// set the lookup ID to be returned
// but not optional (link to node is optional instead)
nodegroup.setIsReturned(pItem, true);

// give ID_REGEX property a meaningful sparqlID
String sparqlID;
if (nItem.getRangeUris().size() > 1) {
// complex range: include the class
sparqlID = nItem.getKeyName() + "_" + rangeKeyname + "_" + pItem.getKeyName();

} else {
// 'default'
sparqlID = nItem.getKeyName() + "_" + pItem.getKeyName();
}
String propId = nodegroup.changeSparqlID(pItem, sparqlID);

if (this.dataClassRegex != null && Pattern.compile(this.dataClassRegex).matcher(rangeUri).find()) {
// range of this object property is a data class

ispecBuilder.addNode(objNode.getSparqlID(), objNode.getUri(), ImportSpec.LOOKUP_MODE_CREATE_IF_MISSING);

// give it a name, e.g.: thickness_value
for (PropertyItem pItem : objNode.getPropertyItems()) {
String propName = nItem.getKeyName() + "_" + pItem.getKeyName();

// set up prop in nodegroup
propName = nodegroup.changeSparqlID(pItem, propName);
nodegroup.setIsReturned(pItem, true);
pItem.setOptMinus(PropertyItem.OPT_MINUS_OPTIONAL);

// add to importspec, using it to look up parent node
// add to import spec
ispecBuilder.addProp(objNode.getSparqlID(), pItem.getUriRelationship());
ispecBuilder.addURILookup(objNode.getSparqlID(), pItem.getUriRelationship(), objNode.getSparqlID());
String colName = buildColName(propName);
ispecBuilder.addColumn(colName);
ispecBuilder.addMapping(objNode.getSparqlID(), pItem.getUriRelationship(), ispecBuilder.buildMappingWithCol(colName, new String [] {transformId}));

// add to csvTemplate
csvTemplate.append(colName + ",");
csvTypes.append(pItem.getValueTypesString(" ") + ",");
}

for (NodeItem measNItem: objNode.getNodeItemList()) {

if (measNItem.getRangeUris().size() == 1) {
String measNItemRange = "";
for (String s : measNItem.getRangeUris()) {
measNItemRange = s;
}
if (this.oInfo.classIsEnumeration(measNItemRange)) {
// found enumerated nodeItem in a Measurement

String measObjName = nItem.getKeyName() + "_" + measNItem.getKeyName();

// add yet another node off the Measurement

// add to nodegroup
Node measObjNode = nodegroup.addNode(measNItemRange, objNode, null, measNItem.getUriConnectBy());
measNItem.setOptionalMinus(measObjNode, NodeItem.OPTIONAL_TRUE);
nodegroup.changeSparqlID(measObjNode, measObjName);
nodegroup.setIsReturned(measObjNode, true);

// add to import spec
ispecBuilder.addNode(measObjNode.getSparqlID(), measNItemRange, ImportSpec.LOOKUP_MODE_CREATE_IF_MISSING);
String colName = buildColName(measObjName);
ispecBuilder.addColumn(colName);
ispecBuilder.addMapping(measObjNode.getSparqlID(), ispecBuilder.buildMappingWithCol(colName, new String [] {transformId}));

// add to csvTemplate
csvTemplate.append(colName + ",");
csvTypes.append("string,");
}
}

}

} else {
// normal object property with id


// give node a name, e.g.: verifies_ENTITY
String objNodeName = nItem.getKeyName() + "_" + rangeKeyname;
nodegroup.changeSparqlID(objNode, objNodeName);

// add node to importSpec
ispecBuilder.addNode(objNode.getSparqlID(), objNode.getUri(), ImportSpec.LOOKUP_MODE_ERR_IF_MISSING);


if (this.oInfo.classIsEnumeration(rangeUri)) {
// node is already in importSpec
// add the column and mapping to the importspec
String colName = buildColName(propId);
String colName = buildColName(objNodeName);
ispecBuilder.addColumn(colName);
ispecBuilder.addMapping(objNode.getSparqlID(), pItem.getUriRelationship(), ispecBuilder.buildMappingWithCol(colName, new String [] {transformId}));
ispecBuilder.addMapping(objNode.getSparqlID(), ispecBuilder.buildMappingWithCol(colName, new String [] {transformId}));

// add to csvTemplate and csvTypes
csvTemplate.append(colName + ",");
csvTypes.append(pItem.getValueTypesString(" ") + ",");
break;
csvTypes.append("uri,");

} else {
// set first data property matching ID_REGEX returned
boolean foundId = false;
for (PropertyItem pItem : objNode.getPropertyItems()) {
if (this.idRegex != null && Pattern.compile(this.idRegex).matcher(pItem.getKeyName()).find()) {
// set the lookup ID to be returned
// but not optional (link to node is optional instead)
nodegroup.setIsReturned(pItem, true);

// give ID_REGEX property a meaningful sparqlID
String sparqlID;
if (nItem.getRangeUris().size() > 1) {
// complex range: include the class
sparqlID = nItem.getKeyName() + "_" + rangeKeyname + "_" + pItem.getKeyName();

} else {
// 'default'
sparqlID = nItem.getKeyName() + "_" + pItem.getKeyName();
}
String propId = nodegroup.changeSparqlID(pItem, sparqlID);

// add to importspec, using it to look up parent node
ispecBuilder.addProp(objNode.getSparqlID(), pItem.getUriRelationship());
ispecBuilder.addURILookup(objNode.getSparqlID(), pItem.getUriRelationship(), objNode.getSparqlID());

// add the column and mapping to the importspec
String colName = buildColName(propId);
ispecBuilder.addColumn(colName);
ispecBuilder.addMapping(objNode.getSparqlID(), pItem.getUriRelationship(), ispecBuilder.buildMappingWithCol(colName, new String [] {transformId}));

// add to csvTemplate and csvTypes
csvTemplate.append(colName + ",");
csvTypes.append(pItem.getValueTypesString(" ") + ",");
foundId = true;
break;
}
}
if (!foundId) throw new Exception("Can't add " + nItem.getKeyName() + " " + rangeKeyname + " node: it must either match the data class regex, or contain a data property that matches id regex");
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public void basicTest() throws Exception {
assertTrue(csv.contains("name"));
assertTrue(csv.contains("cell_cellId"));

// ingest some data without cells: error
// ingest some data when cells don't exist yet: error
String data = "birthday,name,cell_cellId\n" +
"03/23/1966, batt1, cell1\n" +
"03/23/1966, batt2, cell2\n" +
Expand All @@ -73,7 +73,7 @@ public void basicTest() throws Exception {
Dataset ds = new CSVDataset(data, true);
DataLoader dl = new DataLoader(batterySGJson, ds, TestGraph.getUsername(), TestGraph.getPassword());
int records = dl.importData(true);
assertEquals("Error if missing did not occur as expected on cells", 0, records);
assertEquals("URI lookup error on cells did not occur as expected", 0, records);

// ingest the cells
String cellData = "cellId\n" +
Expand All @@ -86,6 +86,10 @@ public void basicTest() throws Exception {
cellBuilder.build();
SparqlGraphJson cellSGJson = cellBuilder.getSgjson();

assertTrue("Missing cellId column", cellBuilder.getCsvTemplate().contains("cellId"));
assertTrue("Missing color_Color column", cellBuilder.getCsvTemplate().contains("color_Color"));


csv = cellBuilder.getCsvTemplate();

Dataset cellDs = new CSVDataset(cellData, true);
Expand All @@ -111,12 +115,83 @@ public void basicTest() throws Exception {
assertTrue(roundTrip.getCellAsString(0, 2).equals("cell1"));
assertTrue(roundTrip.getCellAsString(1, 2).equals("cell2"));
assertTrue(roundTrip.getCellAsString(2, 2).equals("cell3"));



}

@Test
public void classTypeTest() throws Exception {
// load test data
TestGraph.clearGraph();
TestGraph.uploadOwlResource(this, "loadTestDuraBattery.owl");

// build the ingestion template
IngestionNodegroupBuilder battryBuilder = new IngestionNodegroupBuilder("http://kdl.ge.com/durabattery#Battery", TestGraph.getSparqlConn(), TestGraph.getOInfo());
battryBuilder.setIdRegex("(Id$)");
battryBuilder.build();

// check the CSV: superclass should contain _type column
String csv = battryBuilder.getCsvTemplate();
assertTrue(csv.contains("assemblyDate"));
assertTrue(csv.contains("batteryDesc"));
assertTrue(csv.contains("batteryId"));
assertTrue(csv.contains("Battery_type"));


// ingest some data when cells don't exist yet: error
String data = "assemblyDate,batteryId,Battery_type,batteryDesc\n" +
"03/23/1966, batt1, Battery,\n" +
"03/23/1966, batt2, DuraBattery,\n";

SparqlGraphJson batterySGJson = battryBuilder.getSgjson();

Dataset ds = new CSVDataset(data, true);
DataLoader dl = new DataLoader(batterySGJson, ds, TestGraph.getUsername(), TestGraph.getPassword());
int records = dl.importData(true);
assertEquals("Did not ingest proper number of rows", 2, records);
}

@Test
public void dataClassTest() throws Exception {
// load test data
TestGraph.clearGraph();
TestGraph.uploadOwlResource(this, "loadTestDuraBattery.owl");

// build the ingestion template
IngestionNodegroupBuilder battryBuilder = new IngestionNodegroupBuilder("http://kdl.ge.com/durabattery#DuraBattery", TestGraph.getSparqlConn(), TestGraph.getOInfo());
battryBuilder.setIdRegex("(Id$)");
battryBuilder.setDataClassRegex("#Cell$");
battryBuilder.build();

// check the CSV: should have data columns including enumerated type color
String csv = battryBuilder.getCsvTemplate();
for (String colName : "assemblyDate,batteryDesc,batteryId,cell1_cellId,cell1_color,cell2_cellId,cell2_color,cell3_cellId,cell3_color,cell4_cellId,cell4_color".split(",")) {
assertTrue(csv.contains(colName));
}


// ingest data
String data = "batteryId,cell1_cellId,cell1_color\n" +
"batt01,cell0A,blue\n" +
"batt02,cell0B,red\n";

SparqlGraphJson batterySGJson = battryBuilder.getSgjson();

Dataset ds = new CSVDataset(data, true);
DataLoader dl = new DataLoader(batterySGJson, ds, TestGraph.getUsername(), TestGraph.getPassword());
int records = dl.importData(true);
assertEquals("Did not ingest proper number of rows", 2, records);

batterySGJson.getNodeGroup().orderByAll();
Table roundTrip = TestGraph.execTableSelect(batterySGJson);
assertEquals("wrong number of results", 2, roundTrip.getNumRows());
assertTrue(roundTrip.getCellAsString(0, "batteryId").equals("batt01"));
assertTrue(roundTrip.getCellAsString(0, "cell1_cellId").equals("cell0A"));
assertTrue(roundTrip.getCellAsString(0, "cell1_color").endsWith("blue"));
assertTrue(roundTrip.getCellAsString(1, "batteryId").equals("batt02"));
assertTrue(roundTrip.getCellAsString(1, "cell1_cellId").equals("cell0B"));
assertTrue(roundTrip.getCellAsString(1, "cell1_color").endsWith("red"));

}
}


Expand Down
7 changes: 6 additions & 1 deletion sparqlGraphWeb/sparqlGraph/js/modalgettemplatedialog.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ define([ // properly require.config'ed
this.classSelect = IIDXHelper.createSelect(noId, textVals, [], false, "input-xlarge");
this.classSelect.size = 4;
this.idRegexInput = IIDXHelper.createTextInput(noId, "input-xlarge");
this.idRegexInput.value = "identifier"; // default REST param value: making it explicit
this.dataClassRegexInput = IIDXHelper.createTextInput(noId, "input-xlarge");
this.dataClassRegexInput.value = "#Measurement$"; // default REST param value: making it explicit
this.okCallback = function(){throw new Error("this.okCallback is not set");};
};

Expand All @@ -65,10 +68,11 @@ define([ // properly require.config'ed
// already confirmed by validateCallback()
var classUri = IIDXHelper.getSelectValues(this.classSelect)[0];
var idRegex = this.idRegexInput.value;
var dataClassRegex = this.dataClassRegexInput.value;
var okCallback2 = function(sgJsonJson) {
this.okCallback(classUri, sgJsonJson);
}.bind(this);
this.iClient.execGetClassTemplate(classUri, this.conn, idRegex, okCallback2);
this.iClient.execGetClassTemplate(classUri, this.conn, idRegex, dataClassRegex, okCallback2);
},

validateCallback : function() {
Expand All @@ -94,6 +98,7 @@ define([ // properly require.config'ed
fieldset.appendChild(IIDXHelper.buildControlGroup("class: ", this.classSelect, "Retrieve this class' template nodegroup"));
fieldset.appendChild(document.createElement("br"));
fieldset.appendChild(IIDXHelper.buildControlGroup("id regex: ", this.idRegexInput, "Regex to identify unique id properties"));
fieldset.appendChild(IIDXHelper.buildControlGroup("data class regex: ", this.dataClassRegexInput, "Regex to identify classes to treat as data"));

div.appendChild(form);

Expand Down
5 changes: 4 additions & 1 deletion sparqlGraphWeb/sparqlGraph/js/msiclientingestion.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,16 @@ define([ // properly require.config'ed bootstrap-modal


MsiClientIngestion.prototype = {
execGetClassTemplate(uri, conn, idRegex, successJsonCallback) {
execGetClassTemplate(uri, conn, idRegex, dataClassRegex, successJsonCallback) {
var data = {}
data.connection = JSON.stringify(conn.toJson());
data.classURI = uri;
if (idRegex && idRegex.length > 0) {
data.idRegex = idRegex;
}
if (dataClassRegex && dataClassRegex.length > 0) {
data.dataClassRegex = dataClassRegex;
}
callback = function(msiRes) {
successJsonCallback(msiRes.getSimpleResultField("sgjson"));
}
Expand Down
Loading

0 comments on commit e2360af

Please sign in to comment.