Skip to content

Commit

Permalink
Fixes works filtering
Browse files Browse the repository at this point in the history
Details:
- Fixes indices, aggregations, wording and filtering for work nodes
- Improves Graph typing
- Removes TODO list
  • Loading branch information
jacomyal committed Nov 26, 2024
1 parent ca16b4f commit 72131ba
Show file tree
Hide file tree
Showing 10 changed files with 96 additions and 34 deletions.
8 changes: 0 additions & 8 deletions TODO.md

This file was deleted.

6 changes: 4 additions & 2 deletions src/components/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { getDefaultFilters, getFilteredGraph } from "../lib/filters";
import { aggregateFieldIndices } from "../lib/getAggregations";
import { prepareGraph } from "../lib/prepareGraph";
import { Aggregations, FieldIndices, FiltersType, Work } from "../lib/types";
import { wait } from "../lib/utils";
import "./App.css";
import Filters from "./Filters";
import Home from "./Home";
Expand All @@ -24,10 +25,11 @@ const App: FC = () => {
const [loaderMessage, setLoaderMessage] = useState<string | null>(null);

const prepareData = useCallback(
async (works: Work[]) => {
async (promise: Promise<Work[]>) => {
setIsLoading(true);
const works = await promise;
const indices = await indexWorks(works);
const aggregations = aggregateFieldIndices(indices);
const aggregations = aggregateFieldIndices(indices, works);

setIsLoading(false);
setData({
Expand Down
7 changes: 5 additions & 2 deletions src/components/Filters.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,14 @@ const Filters: FC<{
const maxCount = max(agg.values.map((v) => v.count));
const count = aggValue ? aggValue.count : 0;

const intro = field === "records" ? "with at least" : "occurring in at least";
const unity = field === "records" ? "citation" : "record";

return (
<div key={label}>
<div>
Keep the <strong>{count}</strong> <span className="hg">{label.toLowerCase()}</span> occurring in at
least <strong>{value}</strong> record
Keep the <strong>{count}</strong> <span className="hg">{label.toLowerCase()}</span> {intro}{" "}
<strong>{value}</strong> {unity}
{value > 1 ? "s" : ""}
</div>
<div>
Expand Down
6 changes: 3 additions & 3 deletions src/components/Home.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import "./Home.css";
const STORAGE_LAST_QUERY_KEY = "lastQueryKey";

const Home: FC<{
onSubmit(dataset: Work[]): void;
onSubmit(promise: Promise<Work[]>): void;
}> = ({ onSubmit }) => {
const [initialQueryURL, setInitialQueryURL] = useLocalStorage(STORAGE_LAST_QUERY_KEY);
const [queryURL, setQueryURL] = useState<string | null | undefined>(initialQueryURL);
Expand Down Expand Up @@ -93,10 +93,10 @@ const Home: FC<{
disabled={!files.length && !queryURL}
onClick={() => {
if (files.length) {
fetchFiles(files).then((results) => onSubmit(results));
onSubmit(fetchFiles(files));
} else if (queryURL) {
setInitialQueryURL(queryURL);
fetchQuery(queryURL).then((results) => onSubmit(results));
onSubmit(fetchQuery(queryURL));
}
}}
>
Expand Down
19 changes: 19 additions & 0 deletions src/lib/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,25 @@ export async function fetchQuery(
url.searchParams.set("sort", "cited_by_count:desc");
url.searchParams.set("mailto", DEFAULT_MAILTO);
url.searchParams.set("per-page", `${PER_PAGE}`);
url.searchParams.set(
"select",
[
"id",
"display_name",
"authorships",
"cited_by_count",
"grants",
"keywords",
"locations",
"primary_location",
"primary_topic",
"primary_topic",
"publication_year",
"referenced_works",
"title",
"topics",
].join(","),
);

let works = await Promise.all(
[...Array(numReq).keys()].map(async (i) => {
Expand Down
4 changes: 2 additions & 2 deletions src/lib/consts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ export const FIELDS_META: Record<
})),
},
records: {
label: "Records",
label: "Works",
color: "#A6A6A6",
threshold: 50,
getValues: (work: Work) => ({ id: work.id, label: work.display_name || work.title || work.id }),
getValues: () => [],
},
refs: {
label: "References",
Expand Down
44 changes: 34 additions & 10 deletions src/lib/filters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,17 @@ import { sortBy, zipObject } from "lodash";
import { combinations } from "obliterator";

import { FIELDS_META, cleanFieldValues } from "./consts";
import { Aggregations, FIELD_IDS, FieldID, FieldIndices, FiltersType, Work } from "./types";
import {
Aggregations,
BiblioGraph,
EdgeAttributes,
FIELD_IDS,
FieldID,
FieldIndices,
FiltersType,
NodeAttributes,
Work,
} from "./types";

export function getDefaultFilters(aggregations: Aggregations): FiltersType {
return zipObject(
Expand All @@ -26,15 +36,28 @@ export function isValueOK(field: FieldID, value: string, indices: FieldIndices,
return (indices[field][value]?.count || 0) >= filters[field];
}

export async function getFilteredGraph(works: Work[], indices: FieldIndices, filters: FiltersType): Promise<Graph> {
const graph = new Graph();
export async function getFilteredGraph(
works: Work[],
indices: FieldIndices,
filters: FiltersType,
): Promise<BiblioGraph> {
const graph = new Graph<NodeAttributes, EdgeAttributes>();

for (let workIndex = 0; workIndex < works.length; workIndex++) {
const work = works[workIndex];

// Index nodes:
const referenceNodes: string[] = [];
const metadataNodes: string[] = [];

const work = works[workIndex];
if ((work.cited_by_count || 0) >= filters.records) {
const [workNode] = graph.mergeNode(`records::${work.id}`, {
label: work.display_name,
dataType: "records",
color: FIELDS_META.records.color,
});
metadataNodes.push(workNode);
}

for (let fieldIndex = 0; fieldIndex < FIELD_IDS.length; fieldIndex++) {
const field = FIELD_IDS[fieldIndex];
const { getValues, color } = FIELDS_META[field];
Expand All @@ -43,12 +66,13 @@ export async function getFilteredGraph(works: Work[], indices: FieldIndices, fil
values
.filter((v) => isValueOK(field, v.id, indices, filters))
.forEach(({ id, label }) => {
const [n] = graph.mergeNode(id, {
const [n] = graph.mergeNode(`${field}::${id}`, {
entityId: id,
label,
dataType: field,
color,
});
const nbArticles = (graph.getNodeAttribute(id, "nbArticles") || 0) + 1;
const nbArticles = (graph.getNodeAttribute(n, "nbArticles") || 0) + 1;
graph.mergeNodeAttributes(n, {
nbArticles,
size: Math.sqrt(nbArticles),
Expand All @@ -74,14 +98,14 @@ export async function getFilteredGraph(works: Work[], indices: FieldIndices, fil
}

// Add edges between refs and metadata
referenceNodes.forEach((ref) =>
referenceNodes.forEach((ref) => {
metadataNodes.forEach((m) => {
graph.mergeEdge(ref, m);
graph.mergeEdgeAttributes(ref, m, {
weight: (graph.getEdgeAttribute(ref, m, "weight") || 0) + 1,
});
}),
);
});
});
}

// Remove orphans
Expand Down
10 changes: 7 additions & 3 deletions src/lib/getAggregations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import {
zipObject,
} from "lodash";

import { Aggregation, Aggregations, FIELD_IDS, FieldIndices } from "./types";
import { Aggregation, Aggregations, FIELD_IDS, FieldIndices, Work } from "./types";

function aggregateCumulativeNumbers(values: number[]): Aggregation {
// count cumulative number of occurrences
Expand Down Expand Up @@ -43,7 +43,7 @@ function aggregateCumulativeNumbers(values: number[]): Aggregation {
};
}

export function aggregateFieldIndices(fieldIndices: FieldIndices): Aggregations {
export function aggregateFieldIndices(fieldIndices: FieldIndices, works: Work[]): Aggregations {
const aggregations = zipObject(
FIELD_IDS,
FIELD_IDS.map(() => ({})),
Expand All @@ -52,7 +52,11 @@ export function aggregateFieldIndices(fieldIndices: FieldIndices): Aggregations
// Aggregate the indices:
FIELD_IDS.forEach((field) => {
// Calculate cumulative buckets
aggregations[field] = aggregateCumulativeNumbers(map(fieldIndices[field], ({ count }) => count));
if (field === "records") {
aggregations[field] = aggregateCumulativeNumbers(works.map((work) => work.cited_by_count || 0));
} else {
aggregations[field] = aggregateCumulativeNumbers(map(fieldIndices[field], ({ count }) => count));
}
});

return aggregations;
Expand Down
8 changes: 4 additions & 4 deletions src/lib/prepareGraph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@ import { largestConnectedComponent } from "graphology-components";
import { circular } from "graphology-layout";
import forceAtlas2 from "graphology-layout-forceatlas2";
import { subgraph } from "graphology-operators";
import { zipObject } from "lodash";
import { Coordinates } from "sigma/types";

import { fetchRefsLabels } from "./api";
import { sampleKPoints } from "./kMeans";
import { BiblioGraph } from "./types";
import { wait } from "./utils";

const maxNodeSizes = {
references: 30,
metadata: 50,
};

export async function prepareGraph(graph: Graph): Promise<Graph> {
export async function prepareGraph(graph: BiblioGraph): Promise<BiblioGraph> {
const largest = largestConnectedComponent(graph);
const mainGraph = subgraph(graph, largest);

Expand Down Expand Up @@ -121,11 +121,11 @@ export async function prepareGraph(graph: Graph): Promise<Graph> {
const { x, y } = mainGraph.getNodeAttributes(node) as Coordinates;
return { id: node, coordinates: { x, y } };
});
const refsWithLabels = sampleKPoints(allRefs, 15, 5).map((p) => p.id);
const refsWithLabels = (allRefs.length > 20 ? sampleKPoints(allRefs, 15, 5) : allRefs).map((p) => p.id);

const labels = await fetchRefsLabels(refsWithLabels);
mainGraph.forEachNode((node, attributes) => {
if (attributes.dataType === "refs") mainGraph.setNodeAttribute(node, "label", labels[node] || null);
if (attributes.dataType === "refs") mainGraph.setNodeAttribute(node, "label", labels[attributes.entityId] || null);
});

return Promise.resolve(mainGraph);
Expand Down
18 changes: 18 additions & 0 deletions src/lib/types.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import Graph from "graphology";

export const FIELD_IDS = [
"authors",
"countries",
Expand Down Expand Up @@ -28,6 +30,22 @@ export type Aggregation = {
};
export type Aggregations = Record<FieldID, Aggregation>;

export type NodeAttributes = {
entityId: string;
label: string | null;
dataType: FieldID;
color: string;
nbArticles: number;
size?: number;
// Classic sigma ttributes:
x: number;
y: number;
fixed?: boolean;
};

export type EdgeAttributes = { weight: number };
export type BiblioGraph = Graph<NodeAttributes, EdgeAttributes>;

/**
* OPEN ALEX DATA TYPES:
* *********************
Expand Down

0 comments on commit 72131ba

Please sign in to comment.