Skip to content

Commit 6dc3473

Browse files
authored
feat: add --datatypes option to make output easier to SPARQL (#4)
1 parent d9320d8 commit 6dc3473

File tree

2 files changed

+60
-5
lines changed

2 files changed

+60
-5
lines changed

src/cli.ts

+5
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ export async function cli() {
2323
.choices("mode", ["facade-x", "csv"])
2424
.option("base", { description: "Base IRI for the Facade-X generated data" })
2525
.alias("base", "base-iri")
26+
.option("datatypes", { desc: "Change column datatypes for easier SPARQL", type: "string" })
27+
.choices("datatypes", ["original", "easy-sparql"])
28+
.default("datatypes", "original")
29+
.strictOptions()
2630
.help()
2731
.parse();
2832

@@ -36,6 +40,7 @@ export async function cli() {
3640
password: argv.password as string,
3741
quadMode: argv.mode as unknown as any,
3842
baseIRI: (argv.base as string) ?? pathToFileURL(argv.input).href + "#",
43+
datatypeMode: argv.datatypes as unknown as any,
3944
});
4045
const writer = new N3.StreamWriter({ format: "nquads" });
4146

src/msaccess.ts

+55-5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ export interface MSAccessConstructorOptions extends MDBOptions {
1717
* `"csv"` generates a quad per table per column per row value.
1818
*/
1919
quadMode: "facade-x" | "csv";
20+
/**
21+
* Datatypes can simplify querying.
22+
*
23+
* `"original"` (default) are most similar to the Access column datatypes
24+
* `"easy-sparql"`: maps to xsd:integer, xsd:decimal (with .) and xsd:double (with E)
25+
*/
26+
datatypeMode: "original" | "easy-sparql";
2027
/** Base URI, required for the Façade-X ontology. */
2128
baseIRI: string;
2229
/** Used to create all the data model instances */
@@ -26,6 +33,7 @@ export interface MSAccessConstructorOptions extends MDBOptions {
2633
export class MSAccess extends Readable implements RDF.Stream {
2734
#db: MDBReader;
2835
#quadMode: MSAccessConstructorOptions["quadMode"];
36+
#datatypeMode: MSAccessConstructorOptions["datatypeMode"];
2937
#baseURI: string;
3038
#df: RDF.DataFactory;
3139
shouldRead: boolean;
@@ -47,6 +55,7 @@ export class MSAccess extends Readable implements RDF.Stream {
4755

4856
// Default mode is facade-x
4957
this.#quadMode = options.quadMode ?? "facade-x";
58+
this.#datatypeMode = options.datatypeMode ?? "original";
5059
this.#baseURI =
5160
options.baseIRI ?? database instanceof Buffer
5261
? "http://example.org/data#"
@@ -90,6 +99,8 @@ export class MSAccess extends Readable implements RDF.Stream {
9099

91100
/** Generate quads with a model akin to Facade-X. */
92101
private *facadeXQuads() {
102+
const valueFunc = this.#datatypeMode == "original" ? this.originalValue : this.easySparqlValue;
103+
93104
const TABLE = this.#baseURI;
94105

95106
for (const tableName of this.#db.getTableNames()) {
@@ -111,7 +122,7 @@ export class MSAccess extends Readable implements RDF.Stream {
111122
const columnType = tableData.getColumn(column).type;
112123

113124
const predicate = XYZ(encodeURI(column));
114-
const object = this.mdbValueToObject(value, columnType);
125+
const object = this.mdbValueToObject(value, columnType, valueFunc);
115126

116127
yield this.#df.quad(row, predicate, object, graph);
117128
}
@@ -122,6 +133,8 @@ export class MSAccess extends Readable implements RDF.Stream {
122133

123134
/** Generate <csv:> quads. */
124135
private *csvQuads() {
136+
const valueFunc = this.#datatypeMode == "original" ? this.originalValue : this.easySparqlValue;
137+
125138
for (const tableName of this.#db.getTableNames()) {
126139
const table = this.#db.getTable(tableName);
127140
// Each table is a used as a graph
@@ -137,7 +150,7 @@ export class MSAccess extends Readable implements RDF.Stream {
137150

138151
const subject = CSVNS(`table/${encodeURI(tableName)}/row/${i_row}`);
139152
const predicate = CSVNS(encodeURI(column));
140-
const object = this.mdbValueToObject(value, columnType);
153+
const object = this.mdbValueToObject(value, columnType, valueFunc);
141154

142155
yield this.#df.quad(subject, predicate, object, context);
143156
}
@@ -146,9 +159,38 @@ export class MSAccess extends Readable implements RDF.Stream {
146159
}
147160
}
148161

162+
private easySparqlValue(value: Value, columnType: ColumnType): [string, N3.NamedNode] {
163+
const conv: Record<ColumnType, (v: Value) => [string, N3.NamedNode]> = {
164+
[ColumnType.Binary]: (v: Buffer) => [v.toString("base64"), XSD("base64Binary")],
165+
[ColumnType.OLE]: (v: Buffer) => [v.toString("base64"), XSD("base64Binary")],
166+
167+
[ColumnType.Boolean]: (v: boolean) => [v ? "true" : "false", XSD("boolean")],
168+
169+
[ColumnType.DateTime]: (v: Date) => [v.toISOString(), XSD("dateTime")],
170+
171+
[ColumnType.Double]: (v: number) => [v.toString(), XSD("double")],
172+
173+
[ColumnType.Float]: (v: number) => [v.toString(), XSD("decimal")],
174+
175+
[ColumnType.BigInt]: (v: bigint) => [v.toString(), XSD("integer")],
176+
[ColumnType.Byte]: (v: number) => [v.toString(), XSD("integer")],
177+
[ColumnType.Integer]: (v: number) => [v.toFixed(0), XSD("integer")],
178+
[ColumnType.Complex]: (v: number) => [v.toString(), XSD("integer")],
179+
[ColumnType.Long]: (v: number) => [v.toFixed(0), XSD("integer")],
180+
181+
[ColumnType.Currency]: (v: string) => [v, XSD("string")],
182+
[ColumnType.DateTimeExtended]: (v: string) => [v, XSD("string")],
183+
[ColumnType.Memo]: (v: string) => [v, XSD("string")],
184+
[ColumnType.Numeric]: (v: string) => [v, XSD("string")],
185+
[ColumnType.RepID]: (v: string) => [v, XSD("string")],
186+
[ColumnType.Text]: (v: string) => [v, XSD("string")],
187+
};
188+
return conv[columnType](value);
189+
}
190+
149191
/** Convert a MDB value to a RDF value with a specific datatype */
150-
mdbValueToObject(value: Value, columnType: ColumnType): RDF.Literal {
151-
// TODO: Not all datatypes have been checked with what Access produces
192+
private originalValue(value: Value, columnType: ColumnType): [string, N3.NamedNode] {
193+
// Alphabetical order, ColumnTypes from mdb-reader
152194
const conv: Record<ColumnType, (v: Value) => [string, N3.NamedNode]> = {
153195
[ColumnType.BigInt]: (v: bigint) => [v.toString(), XSD("integer")],
154196
[ColumnType.Binary]: (v: Buffer) => [v.toString("base64"), XSD("base64Binary")],
@@ -168,9 +210,17 @@ export class MSAccess extends Readable implements RDF.Stream {
168210
[ColumnType.RepID]: (v: string) => [v, XSD("string")],
169211
[ColumnType.Text]: (v: string) => [v, XSD("string")],
170212
};
213+
return conv[columnType](value);
214+
}
171215

216+
mdbValueToObject(
217+
value: Value,
218+
columnType: ColumnType,
219+
valueFunc: (value: Value, columnType: ColumnType) => [string, N3.NamedNode] | undefined
220+
): RDF.Literal {
221+
valueFunc = valueFunc ?? this.originalValue;
172222
try {
173-
const [nativeValue, languageOrDatatype] = conv[columnType](value);
223+
const [nativeValue, languageOrDatatype] = valueFunc(columnType, value);
174224
return this.#df.literal(nativeValue, languageOrDatatype);
175225
} catch (e) {
176226
return this.#df.literal(value as string);

0 commit comments

Comments
 (0)