Skip to content

Commit

Permalink
Stabilized one-hop check-in-able predicate stats path finding
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul Cuddihy committed Aug 24, 2021
1 parent 566dd90 commit ba23a17
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import com.ge.research.semtk.resultSet.Table;
import com.ge.research.semtk.sparqlX.SparqlConnection;
import com.ge.research.semtk.sparqlX.SparqlToXUtils;
import com.ge.research.semtk.utility.LocalLogger;

/**
* Stores a list of counts of [subject_class pred object_class] from instance data
Expand All @@ -25,7 +26,6 @@ public class PredicateStats {
// exactHash : exact count with no inheritance. subject_class optional_predicate optional_object_class
private Hashtable<String, Long> exactHash = new Hashtable<String, Long>();


/**
* Get stats from the data sei's in a connection.
* Use a direct call to the triplestore.
Expand All @@ -36,7 +36,7 @@ public PredicateStats(SparqlConnection conn, OntologyInfo oInfo) throws Exceptio
dataConn.clearModelInterfaces();
String sparql = SparqlToXUtils.generatePredicateStatsQuery(dataConn, oInfo);
Table statsTab = conn.getDefaultQueryInterface().executeQueryToTable(sparql);
this.storeStats(statsTab, oInfo, null, null, 0, 0);
this.storeStats(statsTab, oInfo, dataConn, null, null, 0, 0);
}

/**
Expand All @@ -58,7 +58,7 @@ public PredicateStats(SparqlConnection conn, OntologyInfo oInfo, JobTracker trac
Table statsTab = conn.getDefaultQueryInterface().executeQueryToTable(sparql);

int queryDonePercent = startPercent + (endPercent - startPercent) / 2;
this.storeStats(statsTab, oInfo, tracker, jobId, queryDonePercent, endPercent);
this.storeStats(statsTab, oInfo, conn, tracker, jobId, queryDonePercent, endPercent);
}

/**
Expand Down Expand Up @@ -93,15 +93,23 @@ public JSONObject toJson() {
* @param predicate
* @param objectClass
* @return
* @throws PathException
*/
public long getExact(String subjectClass, String predicate, String objectClass) {
public long getExact(String subjectClass, String predicate, String objectClass) throws PathException {
String key = this.buildKey(subjectClass, predicate, objectClass);
Long ret = this.exactHash.get(key);
return (ret == null) ? 0 : ret;
}

private String buildKey(String subjectClass, String predicate, String objectClass) {
return subjectClass + "|" + predicate + "|" + objectClass;
private String buildKey(String subjectClass, String predicate, String objectClass) throws PathException {

OntologyPath path = new OntologyPath(subjectClass);
path.addTriple(subjectClass, predicate, objectClass);
return this.buildKey(path);
}

private String buildKey(OntologyPath path) {
return path.toJson().toJSONString();
}

/**
Expand All @@ -127,27 +135,119 @@ private String buildKey(String subjectClass, String predicate, String objectClas
* @throws AuthorizationException
* @throws Exception
*/
private void storeStats(Table t, OntologyInfo oInfo, JobTracker tracker, String jobId, int startPercent, int endPercent) throws AuthorizationException, Exception {
private void storeStats(Table tab, OntologyInfo oInfo, SparqlConnection conn, JobTracker tracker, String jobId, int startPercent, int endPercent) throws AuthorizationException, Exception {
final String STATUS = "calculating stats";
// final String W3 = "http://www.w3.org";

// hash start class or endclass to a list of one-hop triples
Hashtable<String, HashSet<Triple>> tripleHash = new Hashtable<String, HashSet<Triple>>();
// hash an exactHash key to endClass
Hashtable<String, OntologyPath> pathHash = new Hashtable<String, OntologyPath>();


double rows = t.getNumRows();
for (int i=0; i < t.getNumRows(); i++) {
double rows = tab.getNumRows();
for (int i=0; i < tab.getNumRows(); i++) {
if (tracker != null && i % 2000 == 0) {
int percent = (int) (startPercent + (endPercent - startPercent) * (i / rows));
tracker.setJobPercentComplete(jobId, percent, STATUS);
}
// extract info from query result row
String sclass = t.getCell(i, 0);
String pred = t.getCell(i, 1);
String oclass = t.getCell(i, 2);
long count = t.getCellAsInt(i, 3);
String sclass = tab.getCell(i, 0);
String pred = tab.getCell(i, 1);
String oclass = tab.getCell(i, 2);
long count = tab.getCellAsInt(i, 3);

// Need #type for Explore tab, so consider this a failed experiment
// if (sclass.startsWith(W3) || pred.startsWith(W3) || oclass.startsWith(W3))
// continue;

// build one-hop Triple and add to tripleHash twice: once for subject, once for object
// do not include data properties (oclass is empty)
if (!oclass.isEmpty()) {
Triple t = new Triple(sclass, pred, oclass);

if (! tripleHash.containsKey(sclass)) {
tripleHash.put(sclass, new HashSet<Triple>());
}
tripleHash.get(sclass).add(t);
if (! tripleHash.containsKey(oclass)) {
tripleHash.put(oclass, new HashSet<Triple>());
}
tripleHash.get(oclass).add(t);
}


// add one-hops to exactHash
OntologyPath p = new OntologyPath(sclass);
p.addTriple(sclass, pred, oclass);
String key = this.buildKey(p);
this.exactHash.put(key, count);

// hash the other details
pathHash.put(key, p);

// exactHash gets everything
this.exactHash.put(this.buildKey(sclass, pred, oclass), count);
}



//-------- build additional hops ---------//
// Seems to have insurmountable performance problems //
// TODO
// needs to be used by path-finding: check the end of the new path
// smarter setJobPercentComplete()
// needs return only one-hops to SPARQLgraph
// MAX_HOPS would be higher, and probably accessible for findExactPaths()
final int MAX_HOPS = 1;
HashSet<String> lastLenKeys = new HashSet<String>();
lastLenKeys.addAll(this.exactHash.keySet());

// for each length of path up the the max we want to compute
for (int thisLen=2; thisLen <= MAX_HOPS; thisLen++) {
HashSet<String> thisLenKeys = new HashSet<String>();
// for each path of the previous length
for (String key : lastLenKeys) {
OntologyPath p = pathHash.get(key);
// for each triple that could be added to the end
HashSet<Triple> triples = tripleHash.get(p.getEndClassName());
if (triples != null) {
for (Triple t : tripleHash.get(p.getEndClassName())) {
// if new triple wouldn't cause a loop
if (!p.containsClass(t.getSubject()) || !p.containsClass(t.getObject())) {
// add triple to new path
OntologyPath pathCopy = p.deepCopy();
pathCopy.addTriple(t.getSubject(), t.getPredicate(), t.getObject());

long instanceCount = this.countInstanceData(pathCopy, conn, oInfo);
if (instanceCount > 0) {
String k = this.buildKey(pathCopy);
this.exactHash.put(k, instanceCount);
thisLenKeys.add(k);
pathHash.put(k, pathCopy);
}
}
}
}
}

lastLenKeys = thisLenKeys;
}



///////// DEBUG ///////////
//for (OntologyPath p : pathHash.values()) {
// LocalLogger.logToStdOut(p.debugString());
//}
///////////////////////////

if (tracker != null) {
tracker.setJobPercentComplete(jobId, endPercent, STATUS);
}
}

private int countInstanceData(OntologyPath path, SparqlConnection conn, OntologyInfo oInfo) throws Exception {
String query = SparqlToXUtils.generatePathInstanceCountQuery(path, conn, oInfo);
Table tab = conn.getDefaultQueryInterface().executeQueryToTable(query);
return tab.getCellAsInt(0, 0);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public class PathFindingTests_IT {

private final static String REQUIREMENT = "http://arcos.rack/REQUIREMENTS#REQUIREMENT";
private final static String TEST_RESULT = "http://arcos.rack/TESTING#TEST_RESULT";
private final static String TEST = "http://arcos.rack/TESTING#TEST";
private final static String THING = "http://arcos.rack/PROV-S#THING";
private final static String WAS_DERIVED_FROM = "http://arcos.rack/PROV-S#wasDerivedFrom";
private final static String WAS_IMPACTED_BY = "http://arcos.rack/PROV-S#wasImpactedBy";
Expand Down Expand Up @@ -96,14 +97,16 @@ public void testWithPredicateStats() throws Exception {
PredicateStats stats = new PredicateStats(TestGraph.getSparqlConn(), oInfo);

ArrayList<OntologyPath> paths2 = oInfo.findAllPaths(TEST_RESULT, REQUIREMENT, stats);
for (int i=0; i < paths2.size(); i++) {
System.out.println(paths2.get(i).asString());
}

assertTrue(true);
assertEquals("Number of paths found", 1, paths2.size());
OntologyPath path = paths2.get(0);
assertEquals("Path start", TEST_RESULT, path.getStartClassName());
assertEquals("Path end", REQUIREMENT, path.getEndClassName());
assertTrue(path.containsClass(TEST));

}


@Test
/**
* The RACK ontology has lots of super/sub stuff and it is used heavily by path-finding
Expand Down
72 changes: 39 additions & 33 deletions sparqlGraphWeb/sparqlGraph/js/exploretab.js
Original file line number Diff line number Diff line change
Expand Up @@ -563,45 +563,51 @@ define([ // properly require.config'ed

// first pass: add nodes for each type with count
for (var key in blob.exactTab) {
var triple = key.split('|');
var count = blob.exactTab[key];

var oSubjectClass = new OntologyName(triple[0]);
var oPredicate = new OntologyName(triple[1]);
var oObjectClass = new OntologyName(triple[2]);

// skipping Type since w already have oSubjectClass
if ( oPredicate.getLocalName() == "type") {
var myLabel = oSubjectClass.getLocalName() + " " + count;
nodeData.push({id: oSubjectClass.getFullName(), label: myLabel, title: oSubjectClass.getFullName(), group: oSubjectClass.getNamespace() });
var jObj = JSON.parse(key);

// only visualize one-hops
if (jObj.triples.length == 1) {
var count = blob.exactTab[key];
var oSubjectClass = new OntologyName(jObj.triples[0].s);
var oPredicate = new OntologyName(jObj.triples[0].p);
var oObjectClass = new OntologyName(jObj.triples[0].o);

// skipping Type since w already have oSubjectClass
if ( oPredicate.getLocalName() == "type") {
var myLabel = oSubjectClass.getLocalName() + " " + count;
nodeData.push({id: oSubjectClass.getFullName(), label: myLabel, title: oSubjectClass.getFullName(), group: oSubjectClass.getNamespace() });
}
}
}

// second pass: add edges
for (var key in blob.exactTab) {
var triple = key.split('|');
var count = blob.exactTab[key];

var oSubjectClass = new OntologyName(triple[0]);
var oPredicate = new OntologyName(triple[1]);
var oObjectClass = new OntologyName(triple[2]);

// skipping Type since w already have oSubjectClass
if ( oPredicate.getLocalName() != "type") {
var width = Math.ceil(Math.log10(count));

if (oObjectClass.getFullName() == "") {
// connection to data, not a class
if (SHOW_DATA) {
// data: separate each into it's own node
var dataId = triple[0] + "|" + triple[1] + "|data";
nodeData.push({id: dataId, label: " ", group: "data" });
edgeData.push({from: oSubjectClass.getFullName(), to: dataId, label: oPredicate.getLocalName() + " " + count, arrows: 'to', width: width});
}
var jObj = JSON.parse(key);

// only visualize one-hops
if (jObj.triples.length == 1) {
var count = blob.exactTab[key];
var oSubjectClass = new OntologyName(jObj.triples[0].s);
var oPredicate = new OntologyName(jObj.triples[0].p);
var oObjectClass = new OntologyName(jObj.triples[0].o);

// skipping Type since w already have oSubjectClass
if ( oPredicate.getLocalName() != "type") {
var width = Math.ceil(Math.log10(count));

if (oObjectClass.getFullName() == "") {
// connection to data, not a class
if (SHOW_DATA) {
// data: separate each into it's own node
var dataId = oSubjectClass.getFullName() + "|" + oPredicate.getFullName() + "|data";
nodeData.push({id: dataId, label: " ", group: "data" });
edgeData.push({from: oSubjectClass.getFullName(), to: dataId, label: oPredicate.getLocalName() + " " + count, arrows: 'to', width: width});
}

} else {
// normal class-to-class (all class nodes already added by pass1)
edgeData.push({from: oSubjectClass.getFullName(), to: oObjectClass.getFullName(), label: oPredicate.getLocalName() + " " + count, arrows: 'to', width: width});
} else {
// normal class-to-class (all class nodes already added by pass1)
edgeData.push({from: oSubjectClass.getFullName(), to: oObjectClass.getFullName(), label: oPredicate.getLocalName() + " " + count, arrows: 'to', width: width});
}
}
}
}
Expand Down

0 comments on commit ba23a17

Please sign in to comment.