Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,5 +116,8 @@ praxis = "praxis.cli:main"
[tool.setuptools]
package-dir = {"" = "src"}

[tool.setuptools.package-data]
praxis = ["demo_data/stackoverflow_developer_survey/*"]

[tool.setuptools.packages.find]
where = ["src"]
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,7 @@
package_dir={"": "src"},
packages=find_packages(where="src"),
python_requires=">=3.9",
include_package_data=True,
package_data={"praxis": ["demo_data/stackoverflow_developer_survey/*"]},
entry_points={"console_scripts": ["praxis=praxis.cli:main"]},
)
59 changes: 26 additions & 33 deletions src/praxis/commands/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,23 @@
from __future__ import annotations

import argparse
import shutil
from importlib import resources
from pathlib import Path

from praxis.cli import main as praxis_main
from praxis.paths import default_root, research_dir


MODULES = {
"core": "Capture one local source, index it, and search it with explanations.",
"core": "Ingest a bundled Stack Overflow survey aggregate, index it, and search it with explanations.",
"reach": "Create one fixture GTM client, produce evidence, and build a context pack.",
"agency": "Create two fixture clients and run one workflow across both.",
"all": "Run the Core, Reach, and Agency demos.",
}

CORE_SOURCE = """# Praxis Core Demo Source

Praxis Core turns source material into searchable, source-traceable agent knowledge.

The important idea is not just retrieval. Praxis keeps raw evidence, summaries,
chunks, source IDs, hashes, graph links, confidence metadata, conflict warnings,
and rollback records attached to the knowledge agents use.

This demo source exists so a new user can see the Core loop without needing a
web page, API key, or private corpus.

## What An Agent Can Reuse

- Capture useful sources during real work.
- Search them later with semantic, keyword, and graph signals.
- Inspect why a result matched before trusting it.
- Promote selected knowledge into reusable instructions or references.
"""
CORE_DEMO_DATA = ("demo_data", "stackoverflow_developer_survey")
CORE_DEMO_SOURCE_ID = "src:stackoverflow-dev-survey-ai-tooling-mini"


def run_step(root: Path, label: str, args: list[str]) -> int:
Expand All @@ -46,17 +32,24 @@ def run_step(root: Path, label: str, args: list[str]) -> int:
return int(code)


def write_core_demo_source(root: Path) -> Path:
path = research_dir(root) / "demo_sources" / "praxis-core-demo.md"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(CORE_SOURCE, encoding="utf-8")
return path
def copy_core_demo_dataset(root: Path) -> Path:
target = research_dir(root) / "demo_sources" / "stackoverflow_developer_survey"
if target.exists():
shutil.rmtree(target)
target.mkdir(parents=True, exist_ok=True)
data_root = resources.files("praxis").joinpath(*CORE_DEMO_DATA)
for item in data_root.iterdir():
if item.is_file():
(target / item.name).write_bytes(item.read_bytes())
return target


def demo_core(root: Path) -> int:
print("# Praxis Core demo")
print("\nThis demo captures one local source, writes provisional SkillGraph memory, chunks it, embeds it, and searches it.")
source_path = write_core_demo_source(root)
print(
"\nThis demo ingests a bundled Stack Overflow Developer Survey aggregate, writes provisional SkillGraph memory, chunks it, embeds it, and searches it."
)
source_path = copy_core_demo_dataset(root)
steps = [
("Initialize relational DB", ["init-db"]),
("Initialize SkillGraph", ["init-graph"]),
Expand All @@ -66,22 +59,22 @@ def demo_core(root: Path) -> int:
"ingest",
str(source_path),
"--title",
"Praxis Core Demo Source",
"Stack Overflow Developer Survey AI Tooling Mini Dataset",
"--source-type",
"docs",
"survey",
"--source-id",
"src:praxis-core-demo",
CORE_DEMO_SOURCE_ID,
"--freshness-window-days",
"90",
"365",
"--notes",
"Generated by praxis demo core.",
"Bundled aggregate demo dataset derived from official Stack Overflow Developer Survey 2024 and 2025 archive files.",
],
),
("Chunk changed sources", ["chunk", "--changed-only", "--no-runtimes", "--no-skills"]),
("Embed chunks locally", ["embed", "--provider", "local-hash"]),
(
"Search with explanations",
["search", "how does Praxis keep agent knowledge reusable and traceable?", "--explain", "--limit", "3"],
["search", "Stack Overflow developer survey AI tool adoption trust accuracy developer segments", "--explain", "--limit", "3"],
),
]
for label, args in steps:
Expand All @@ -93,7 +86,7 @@ def demo_core(root: Path) -> int:
Core demo complete.

You saw the full source-to-search path:
source -> evidence archive -> provisional SkillGraph memory -> chunks -> embeddings -> explained search.
bundled aggregate survey data -> evidence archive -> provisional SkillGraph memory -> chunks -> embeddings -> explained search.

Try next:
praxis changes list
Expand Down
2 changes: 1 addition & 1 deletion src/praxis/commands/research_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def infer_source_type(source: str, text: str) -> str:


def credibility_score(source_type: str, metadata: dict[str, Any]) -> int:
if source_type in {"repo", "package", "paper", "docs"}:
if source_type in {"repo", "package", "paper", "docs", "survey"}:
return 4
if source_type == "local":
return 3
Expand Down
42 changes: 42 additions & 0 deletions src/praxis/demo_data/stackoverflow_developer_survey/BUILD.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Build Notes

This demo dataset is an aggregate-only excerpt derived from the official Stack Overflow Developer Survey archives.

## Source Inputs

- 2024 schema: `https://media.githubusercontent.com/media/StackExchange/Survey/main/packages/archive/2024/schema.csv`
- 2024 results: `https://media.githubusercontent.com/media/StackExchange/Survey/main/packages/archive/2024/results.csv`
- 2025 schema: `https://media.githubusercontent.com/media/StackExchange/Survey/main/packages/archive/2025/schema.csv`
- 2025 results: `https://media.githubusercontent.com/media/StackExchange/Survey/main/packages/archive/2025/results.csv`

The raw source files are not committed to this repository. Their downloaded byte counts and SHA-256 checksums are recorded in `source_manifest.json`.

## Aggregation

The bundled CSVs are generated from shared 2024/2025 AI and tooling fields:

- `AISelect`
- `AISent`
- `AIAcc`
- `AIComplex`
- `AIThreat`
- `DevType`
- `LanguageHaveWorkedWith`
- `DatabaseHaveWorkedWith`
- `PlatformHaveWorkedWith`
- `WebframeHaveWorkedWith`

Generated files include:

- `ai_attitudes_by_year.csv`: response counts and percentages for shared AI attitude fields.
- `ai_developer_segments.csv`: developer-segment rollups for adoption, favorability, trust, complex-task handling, and job-threat perception.
- `developer_tooling_top_items.csv`: top shared tooling items by year.
- `shared_schema_fields.csv`: selected schema fields and 2024/2025 result-column mappings.

## Privacy Boundary

Only aggregate rows are bundled. Raw respondent-level rows, free-text answers, and survey PDFs are excluded from the repository.

## License

The source survey database is distributed by Stack Overflow under ODbL 1.0, with database contents under DbCL 1.0. Preserve attribution and license terms if redistributing or modifying this derived dataset.
34 changes: 34 additions & 0 deletions src/praxis/demo_data/stackoverflow_developer_survey/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Stack Overflow Developer Survey AI Tooling Mini Dataset

This bundled Praxis Core demo source is a small aggregate excerpt derived from the official Stack Overflow Developer Survey archives for 2024 and 2025.

It is designed for a first-run Praxis demo: no credentials, no large download, no respondent-level records. The raw survey files stay outside the repo; only aggregate counts, percentages, schema field notes, and source provenance are bundled.

## What Praxis Can Retrieve

- AI adoption by year from `AISelect`.
- Developer favorability toward AI tools from `AISent`.
- Trust in AI output accuracy from `AIAcc`.
- Perceived ability to handle complex tasks from `AIComplex`.
- Job-threat perception from `AIThreat`.
- Developer-segment rollups by `DevType`.
- Top shared tooling items from language, database, platform, and web framework columns.

## Example Aggregate Findings

- In the 2024 survey excerpt, 37,662 of 65,437 respondents answered that they used AI tools in their development process.
- In the 2025 survey excerpt, 26,469 of 49,191 respondents reported daily, weekly, monthly, or infrequent AI tool use.
- The segment file lets Praxis compare AI use, favorability, trust, complex-task handling, and job-threat perception across common developer types.
- The tooling file lets Praxis retrieve common language, database, platform, and web framework signals alongside AI attitudes.

## Files

- `ai_attitudes_by_year.csv`: response counts and percentages for shared AI attitude fields.
- `ai_developer_segments.csv`: aggregate AI metrics by developer segment.
- `developer_tooling_top_items.csv`: top tooling items from shared HaveWorkedWith columns.
- `shared_schema_fields.csv`: selected shared schema fields and result-column mappings.
- `source_manifest.json`: source URLs, access dates, licenses, and checksums.

## License And Attribution

This aggregate excerpt is derived from Stack Overflow Developer Survey data. The source survey database is distributed by Stack Overflow under ODbL 1.0, with database contents under DbCL 1.0. Preserve attribution and source-license terms if redistributing or modifying this derived dataset.
Loading
Loading