Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Each stage is a single function in its own module. They communicate through plai
| `cache.py` | `check_semantic_cache / save_semantic_cache` | files → (cached, uncached) split |
| `security.py` | validation helpers | URL / path / label → validated or raises |
| `validate.py` | `validate_extraction(data)` | extraction dict → raises on schema errors |
| `storage.py` | `init_db / ingest_extraction / ingest_communities` | extraction dict → NeuG `graph.db` (optional, requires `neug`) |
| `serve.py` | `start_server(graph_path)` | graph file path → MCP stdio server |
| `watch.py` | `watch(root, flag_path)` | directory → writes flag file on change |
| `benchmark.py` | `run_benchmark(graph_path)` | graph file → corpus vs subgraph token comparison |
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ Install only what you need:
| `video` | Video/audio transcription (faster-whisper + yt-dlp) | `uv tool install "graphifyy[video]"` |
| `mcp` | MCP stdio server | `uv tool install "graphifyy[mcp]"` |
| `neo4j` | Neo4j push support | `uv tool install "graphifyy[neo4j]"` |
| `neug` | [NeuG](https://github.com/alibaba/neug) embedded graph database — Cypher queries on your graph | `uv tool install "graphifyy[neug]"` |
| `svg` | SVG graph export | `uv tool install "graphifyy[svg]"` |
| `leiden` | Leiden community detection (Python < 3.13 only) | `uv tool install "graphifyy[leiden]"` |
| `ollama` | Ollama local inference | `uv tool install "graphifyy[ollama]"` |
Expand Down Expand Up @@ -449,6 +450,9 @@ graphify install # overwrites the skill file
/graphify ./raw --graphml # export for Gephi / yEd
/graphify ./raw --neo4j # generate cypher.txt for Neo4j
/graphify ./raw --neo4j-push bolt://localhost:7687

graphify cypher "MATCH (n) RETURN n LIMIT 10" # query graph.db with Cypher (requires neug)
graphify cypher "MATCH (n:code)-[e]->(m) RETURN n.id, e, m.id LIMIT 10" --db path/to/graph.db # default: graphify-out/graph.db
/graphify ./raw --watch # auto-sync as files change
/graphify ./raw --mcp # start MCP stdio server

Expand Down
58 changes: 58 additions & 0 deletions graphify/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1761,6 +1761,8 @@ def main() -> None:
print(" --backend=<name> backend to use for community naming (default: auto-detect)")
print(" label <path> (re)name communities with the configured LLM backend, regenerate report")
print(" --backend=<name> backend to use (default: auto-detect from API keys)")
print(" cypher \"MATCH ...\" execute a Cypher query against graph.db (requires neug)")
print(" --db <path> path to graph.db (default graphify-out/graph.db)")
print(" query \"<question>\" BFS traversal of graph.json for a question")
print(" --dfs use depth-first instead of breadth-first")
print(" --context C explicit edge-context filter (repeatable)")
Expand Down Expand Up @@ -2240,6 +2242,31 @@ def main() -> None:
else:
print("Usage: graphify hook [install|uninstall|status]", file=sys.stderr)
sys.exit(1)
elif cmd == "cypher":
if len(sys.argv) < 3:
print('Usage: graphify cypher "MATCH ..." [--db path]', file=sys.stderr)
sys.exit(1)
query_str = sys.argv[2]
db_path = str(Path(_GRAPHIFY_OUT) / "graph.db")
args = sys.argv[3:]
for i, a in enumerate(args):
if a == "--db" and i + 1 < len(args):
db_path = args[i + 1]
try:
from graphify.storage import init_db, execute_cypher, close_db
except ImportError:
print("error: neug is not installed. Run: pip install neug", file=sys.stderr)
sys.exit(1)
if not Path(db_path).exists():
print(f"error: database not found: {db_path}", file=sys.stderr)
sys.exit(1)
db, conn = init_db(db_path)
try:
results = execute_cypher(conn, query_str)
for row in results:
print("\t".join(str(v) for v in row))
finally:
close_db(db, conn)
elif cmd == "query":
if len(sys.argv) < 3:
print("Usage: graphify query \"<question>\" [--dfs] [--context C] [--budget N] [--graph path]", file=sys.stderr)
Expand Down Expand Up @@ -3829,6 +3856,21 @@ def _progress(idx: int, total: int, _result: dict) -> None:
graph_json_path.write_text(
json.dumps(merged, indent=2), encoding="utf-8"
)
try:
from graphify.storage import init_db as _init_db, ensure_schema as _ensure_schema, ingest_extraction as _ingest, close_db as _close_db
_db_path = str(graphify_out / "graph.db")
_is_inc = Path(_db_path).exists()
_db, _conn = _init_db(_db_path)
_known = _ensure_schema(_conn, create_tables=not _is_inc)
_ingest(_conn, merged, incremental=_is_inc,
prune_sources=deleted_files or None, root=target,
known_tables=_known)
_close_db(_db, _conn)
print("[graphify extract] graph.db written (powered by NeuG)")
except ImportError:
pass
except Exception as _exc:
print(f"[graphify extract] warning: NeuG write failed: {_exc}", file=sys.stderr)
cost = _estimate_cost(
backend, merged["input_tokens"], merged["output_tokens"]
)
Expand Down Expand Up @@ -3906,6 +3948,22 @@ def _progress(idx: int, total: int, _result: dict) -> None:
from graphify.export import backup_if_protected as _backup
_backup(graphify_out)
_to_json(G, communities, str(graph_json_path), force=True)
try:
from graphify.storage import init_db as _init_db, ensure_schema as _ensure_schema, ingest_extraction as _ingest, ingest_communities as _ingest_comm, close_db as _close_db
_db_path = str(graphify_out / "graph.db")
_is_inc = Path(_db_path).exists()
_db, _conn = _init_db(_db_path)
_known = _ensure_schema(_conn, create_tables=not _is_inc)
_ntypes = _ingest(_conn, merged, incremental=_is_inc,
prune_sources=deleted_files or None, root=target,
known_tables=_known)
_ingest_comm(_conn, communities, node_types=_ntypes)
_close_db(_db, _conn)
print("[graphify extract] graph.db written (powered by NeuG)")
except ImportError:
pass
except Exception as _exc:
print(f"[graphify extract] warning: NeuG write failed: {_exc}", file=sys.stderr)
if merged.get("output_tokens", 0) > 0:
(graphify_out / ".graphify_semantic_marker").write_text(
json.dumps({"output_tokens": merged["output_tokens"]}), encoding="utf-8"
Expand Down
45 changes: 45 additions & 0 deletions graphify/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,20 @@ def serve(graph_path: str = "graphify-out/graph.json") -> None:
G = _load_graph(graph_path)
communities = _communities_from_graph(G)

_neug_conn = None
_neug_db = None
_neug_execute = None
try:
from graphify.storage import init_db as _neug_init, execute_cypher as _neug_exec, close_db as _neug_close
_neug_db_path = str(Path(graph_path).parent / "graph.db")
if Path(_neug_db_path).exists():
_neug_db, _neug_conn = _neug_init(_neug_db_path)
_neug_execute = _neug_exec
except ImportError:
pass
except Exception:
pass

# Hot-reload state: mtime+size key lets us detect graph.json changes without
# polling. Initialised from the file stat at startup so the first tool call
# never triggers a redundant reload.
Expand Down Expand Up @@ -646,6 +660,20 @@ async def list_tools() -> list[types.Tool]:
},
},
),
types.Tool(
name="cypher_query",
description=(
"Execute a Cypher query against the NeuG graph database. "
"Returns tabular results. Requires neug to be installed and graph.db to exist."
),
inputSchema={
"type": "object",
"properties": {
"query": {"type": "string", "description": "Cypher query string"},
},
"required": ["query"],
},
),
]

def _tool_query_graph(arguments: dict) -> str:
Expand Down Expand Up @@ -882,6 +910,22 @@ def _tool_triage_prs(arguments: dict) -> str:
)
return "\n\n".join(lines)

def _tool_cypher_query(arguments: dict) -> str:
if _neug_conn is None:
return "NeuG not available (not installed or graph.db not found)."
query = arguments["query"]
from graphify.storage import execute_cypher as _exec_cypher
try:
results = _exec_cypher(_neug_conn, query)
except RuntimeError as exc:
return f"Cypher error: {exc}"
if not results:
return "No results."
lines = []
for row in results:
lines.append("\t".join(str(v) for v in row))
return "\n".join(lines)

_handlers = {
"query_graph": _tool_query_graph,
"get_node": _tool_get_node,
Expand All @@ -893,6 +937,7 @@ def _tool_triage_prs(arguments: dict) -> str:
"list_prs": _tool_list_prs,
"get_pr_impact": _tool_get_pr_impact,
"triage_prs": _tool_triage_prs,
"cypher_query": _tool_cypher_query,
}

def _load_community_labels() -> dict[int, str]:
Expand Down
Loading