Skip to content

Commit dddf5b4

Browse files
committed
Add test filesection indexer
This is a test implementation of storing the section data in a similar way to filetreediff. The goal would be to build a command pallette that initially lets users do some basic filtering of just the filenames.
1 parent e4d23c3 commit dddf5b4

File tree

5 files changed

+165
-0
lines changed

5 files changed

+165
-0
lines changed

readthedocs/filesections/__init__.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""
2+
Module for the file sections feature.
3+
4+
This feature is used to store the title and path name of each page in the index.
5+
"""
6+
7+
import json
8+
import logging
9+
10+
from readthedocs.builds.models import Version
11+
from readthedocs.filesections.dataclasses import FileSectionManifest
12+
from readthedocs.projects.constants import MEDIA_TYPE_SECTIONS
13+
from readthedocs.storage import build_media_storage
14+
15+
SECTION_MANIFEST_FILE_NAME = "sections_manifest.json"
16+
17+
log = logging.getLogger(__name__)
18+
19+
20+
def get_section_manifest(version: Version) -> FileSectionManifest | None:
21+
storage_path = version.project.get_storage_path(
22+
type_=MEDIA_TYPE_SECTIONS,
23+
version_slug=version.slug,
24+
include_file=False,
25+
version_type=version.type,
26+
)
27+
manifest_path = build_media_storage.join(storage_path, SECTION_MANIFEST_FILE_NAME)
28+
try:
29+
with build_media_storage.open(manifest_path) as manifest_file:
30+
manifest = json.load(manifest_file)
31+
log.info(f"Loaded section manifest from {manifest_path}")
32+
except FileNotFoundError:
33+
log.warning(f"Section manifest not found at {manifest_path}")
34+
return None
35+
36+
return FileSectionManifest.from_dict(manifest)
37+
38+
39+
def write_section_manifest(version: Version, manifest: FileSectionManifest):
40+
storage_path = version.project.get_storage_path(
41+
type_=MEDIA_TYPE_SECTIONS,
42+
version_slug=version.slug,
43+
include_file=False,
44+
version_type=version.type,
45+
)
46+
manifest_path = build_media_storage.join(storage_path, SECTION_MANIFEST_FILE_NAME)
47+
with build_media_storage.open(manifest_path, "w") as f:
48+
manifest_dict = manifest.as_dict()
49+
log.info(f"Writing section manifest: {manifest_dict}")
50+
json.dump(manifest_dict, f)
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from dataclasses import asdict, dataclass
2+
3+
4+
@dataclass(slots=True)
5+
class FileSection:
6+
id: str
7+
title: str
8+
9+
10+
@dataclass(slots=True)
11+
class Page:
12+
path: str
13+
sections: list[FileSection]
14+
15+
16+
@dataclass(slots=True)
17+
class FileSectionManifest:
18+
build: int
19+
pages: list[Page]
20+
21+
def __init__(self, build_id: int, pages: list[Page]):
22+
self.build = build_id
23+
self.pages = pages
24+
25+
@classmethod
26+
def from_dict(cls, data: dict) -> "FileSectionManifest":
27+
build_id = data["build"]
28+
pages = [
29+
Page(
30+
path=page["path"],
31+
sections=[FileSection(**section) for section in page["sections"]],
32+
)
33+
for page in data["pages"]
34+
]
35+
return cls(build_id, pages)
36+
37+
def as_dict(self) -> dict:
38+
return asdict(self)

readthedocs/projects/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
MEDIA_TYPE_HTMLZIP = "htmlzip"
3636
MEDIA_TYPE_JSON = "json"
3737
MEDIA_TYPE_DIFF = "diff"
38+
MEDIA_TYPE_SECTIONS = "sections"
3839
DOWNLOADABLE_MEDIA_TYPES = (
3940
MEDIA_TYPE_PDF,
4041
MEDIA_TYPE_EPUB,
@@ -47,6 +48,7 @@
4748
MEDIA_TYPE_HTMLZIP,
4849
MEDIA_TYPE_JSON,
4950
MEDIA_TYPE_DIFF,
51+
MEDIA_TYPE_SECTIONS,
5052
)
5153

5254
BUILD_COMMANDS_OUTPUT_PATH = "_readthedocs/"

readthedocs/projects/tasks/search.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
from readthedocs.builds.constants import BUILD_STATE_FINISHED, INTERNAL, LATEST
66
from readthedocs.builds.models import Build, Version
7+
from readthedocs.filesections import write_section_manifest
8+
from readthedocs.filesections.dataclasses import FileSection, FileSectionManifest, Page
79
from readthedocs.filetreediff import write_manifest
810
from readthedocs.filetreediff.dataclasses import FileTreeDiffFile, FileTreeDiffManifest
911
from readthedocs.projects.models import Feature, HTMLFile, Project
@@ -142,6 +144,36 @@ def collect(self, sync_id: int):
142144
write_manifest(self.version, manifest)
143145

144146

147+
class FileSectionIndexer(Indexer):
148+
def __init__(self, version: Version, build: Build):
149+
self.version = version
150+
self.build = build
151+
self.pages = []
152+
153+
def process(self, html_file: HTMLFile, sync_id: int):
154+
log.debug("Processing file for sections", path=html_file.path)
155+
processed_json = html_file.processed_json
156+
if processed_json:
157+
sections = [
158+
FileSection(
159+
id=section["id"],
160+
title=section["title"],
161+
)
162+
for section in processed_json.get("sections", [])
163+
]
164+
self.pages.append(Page(path=html_file.path, sections=sections))
165+
log.debug("Finished processing file for sections", path=html_file.path)
166+
167+
def collect(self, sync_id: int):
168+
log.debug("Collecting sections for manifest", build_id=self.build.id)
169+
manifest = FileSectionManifest(
170+
build_id=self.build.id,
171+
pages=self.pages,
172+
)
173+
write_section_manifest(self.version, manifest)
174+
log.debug("Finished collecting sections for manifest", build_id=self.build.id)
175+
176+
145177
def _get_indexers(*, version: Version, build: Build, search_index_name=None):
146178
build_config = build.config or {}
147179
search_config = build_config.get("search", {})
@@ -182,6 +214,13 @@ def _get_indexers(*, version: Version, build: Build, search_index_name=None):
182214
version=version,
183215
)
184216
indexers.append(index_file_indexer)
217+
218+
file_section_indexer = FileSectionIndexer(
219+
version=version,
220+
build=build,
221+
)
222+
indexers.append(file_section_indexer)
223+
185224
return indexers
186225

187226

readthedocs/proxito/views/hosting.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from readthedocs.core.resolver import Resolver
2424
from readthedocs.core.unresolver import UnresolverError, unresolver
2525
from readthedocs.core.utils.extend import SettingsOverrideObject
26+
from readthedocs.filesections import get_section_manifest
2627
from readthedocs.filetreediff import get_diff
2728
from readthedocs.projects.constants import (
2829
ADDONS_FLYOUT_SORTING_CALVER,
@@ -535,6 +536,9 @@ def _v1(self, project, version, build, filename, url, request):
535536
"filetreediff": {
536537
"enabled": False,
537538
},
539+
"filesections": {
540+
"enabled": False,
541+
},
538542
},
539543
}
540544

@@ -548,6 +552,13 @@ def _v1(self, project, version, build, filename, url, request):
548552
if response:
549553
data["addons"]["filetreediff"].update(response)
550554

555+
sections_response = self._get_filesections_response(
556+
project=project,
557+
version=version,
558+
)
559+
if sections_response:
560+
data["addons"]["filesections"].update(sections_response)
561+
551562
# Show the subprojects filter on the parent project and subproject
552563
# TODO: Remove these queries and try to find a way to get this data
553564
# from the resolver, which has already done these queries.
@@ -711,6 +722,31 @@ def _get_filetreediff_response(self, *, request, project, version, resolver):
711722
},
712723
}
713724

725+
def _get_filesections_response(self, *, project, version):
726+
"""
727+
Get the file sections response for the given version.
728+
"""
729+
manifest = get_section_manifest(version)
730+
if not manifest:
731+
return None
732+
733+
return {
734+
"enabled": True,
735+
"sections": [
736+
{
737+
"path": page.path,
738+
"sections": [
739+
{
740+
"id": section.id,
741+
"title": section.title,
742+
}
743+
for section in page.sections
744+
],
745+
}
746+
for page in manifest.pages
747+
],
748+
}
749+
714750
def _v2(self, project, version, build, filename, url, user):
715751
return {
716752
"api_version": "2",

0 commit comments

Comments
 (0)