Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to
- ✅(export) add PDF regression tests #1762
- 📝(docs) Add language configuration documentation #1757
- 🔒(helm) Set default security context #1750
- ✨ Import of documents #7765
- ✨(backend) use langfuse to monitor AI actions #1776

### Changed
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ logs: ## display app-dev logs (follow mode)
.PHONY: logs

run-backend: ## Start only the backend application and all needed services
@$(COMPOSE) up --force-recreate -d docspec
@$(COMPOSE) up --force-recreate -d celery-dev
@$(COMPOSE) up --force-recreate -d y-provider-development
@$(COMPOSE) up --force-recreate -d nginx
Expand Down
3 changes: 3 additions & 0 deletions bin/Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ docker_build(
dockerfile='../Dockerfile',
only=['./src/backend', './src/mail', './docker'],
target = 'backend-production',
build_args={'DOCKER_USER': '1000:1000'},
live_update=[
sync('../src/backend', '/app'),
run(
Expand All @@ -23,6 +24,7 @@ docker_build(
dockerfile='../src/frontend/servers/y-provider/Dockerfile',
only=['./src/frontend/', './docker/', './.dockerignore'],
target = 'y-provider',
build_args={'DOCKER_USER': '1000:1000'},
live_update=[
sync('../src/frontend/servers/y-provider/src', '/home/frontend/servers/y-provider/src'),
]
Expand All @@ -34,6 +36,7 @@ docker_build(
dockerfile='../src/frontend/Dockerfile',
only=['./src/frontend', './docker', './.dockerignore'],
target = 'impress',
build_args={'DOCKER_USER': '1000:1000'},
live_update=[
sync('../src/frontend', '/home/frontend'),
]
Expand Down
5 changes: 5 additions & 0 deletions compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,11 @@ services:
condition: service_healthy
restart: true

docspec:
image: ghcr.io/docspecio/api:2.6.3
ports:
- "4000:4000"

networks:
lasuite:
name: lasuite-network
Expand Down
1 change: 1 addition & 0 deletions docs/env.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ These are the environment variables you can set for the `impress-backend` contai
| DJANGO_EMAIL_USE_TLS | Use tls for email host connection | false |
| DJANGO_SECRET_KEY | Secret key | |
| DJANGO_SERVER_TO_SERVER_API_TOKENS | | [] |
| DOCSPEC_API_URL | URL to endpoint of DocSpec conversion API | |
| DOCUMENT_IMAGE_MAX_SIZE | Maximum size of document in bytes | 10485760 |
| FRONTEND_CSS_URL | To add a external css file to the app | |
| FRONTEND_JS_URL | To add a external js file to the app | |
Expand Down
2 changes: 2 additions & 0 deletions env.d/development/common
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ DJANGO_SERVER_TO_SERVER_API_TOKENS=server-api-token
Y_PROVIDER_API_BASE_URL=http://y-provider-development:4444/api/
Y_PROVIDER_API_KEY=yprovider-api-key

DOCSPEC_API_URL=http://docspec:4000/conversion

# Theme customization
THEME_CUSTOMIZATION_CACHE_TIMEOUT=15

Expand Down
2 changes: 1 addition & 1 deletion env.d/development/common.e2e
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ Y_PROVIDER_API_BASE_URL=http://y-provider:4444/api/

# Throttle
API_DOCUMENT_THROTTLE_RATE=1000/min
API_CONFIG_THROTTLE_RATE=1000/min
API_CONFIG_THROTTLE_RATE=1000/min
9 changes: 7 additions & 2 deletions src/backend/core/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
from rest_framework import serializers

from core import choices, enums, models, utils, validators
from core.services import mime_types
from core.services.ai_services import AI_ACTIONS
from core.services.converter_services import (
ConversionError,
YdocConverter,
Converter,
)


Expand Down Expand Up @@ -188,6 +189,7 @@ class DocumentSerializer(ListDocumentSerializer):

content = serializers.CharField(required=False)
websocket = serializers.BooleanField(required=False, write_only=True)
file = serializers.FileField(required=False, write_only=True, allow_null=True)

class Meta:
model = models.Document
Expand All @@ -204,6 +206,7 @@ class Meta:
"deleted_at",
"depth",
"excerpt",
"file",
"is_favorite",
"link_role",
"link_reach",
Expand Down Expand Up @@ -461,7 +464,9 @@ def create(self, validated_data):
language = user.language or language

try:
document_content = YdocConverter().convert(validated_data["content"])
document_content = Converter().convert(
validated_data["content"], mime_types.MARKDOWN, mime_types.YJS
)
except ConversionError as err:
raise serializers.ValidationError(
{"content": ["Could not convert content"]}
Expand Down
40 changes: 32 additions & 8 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,18 @@

from core import authentication, choices, enums, models
from core.api.filters import remove_accents
from core.services import mime_types
from core.services.ai_services import AIService
from core.services.collaboration_services import CollaborationService
from core.services.converter_services import (
ServiceUnavailableError as YProviderServiceUnavailableError,
ConversionError,
Converter,
)
from core.services.converter_services import (
ValidationError as YProviderValidationError,
ServiceUnavailableError as YProviderServiceUnavailableError,
)
from core.services.converter_services import (
YdocConverter,
ValidationError as YProviderValidationError,
)
from core.services.search_indexers import (
get_document_indexer,
Expand Down Expand Up @@ -527,6 +529,28 @@ def perform_create(self, serializer):
"IN SHARE ROW EXCLUSIVE MODE;"
)

# Remove file from validated_data as it's not a model field
# Process it if present
uploaded_file = serializer.validated_data.pop("file", None)

# If a file is uploaded, convert it to Yjs format and set as content
if uploaded_file:
try:
file_content = uploaded_file.read()

converter = Converter()
converted_content = converter.convert(
file_content,
content_type=uploaded_file.content_type,
accept=mime_types.YJS,
)
serializer.validated_data["content"] = converted_content
serializer.validated_data["title"] = uploaded_file.name
except ConversionError as err:
raise drf.exceptions.ValidationError(
{"file": ["Could not convert file content"]}
) from err

obj = models.Document.add_root(
creator=self.request.user,
**serializer.validated_data,
Expand Down Expand Up @@ -1864,14 +1888,14 @@ def content(self, request, pk=None):
if base64_content is not None:
# Convert using the y-provider service
try:
yprovider = YdocConverter()
yprovider = Converter()
result = yprovider.convert(
base64.b64decode(base64_content),
"application/vnd.yjs.doc",
mime_types.YJS,
{
"markdown": "text/markdown",
"html": "text/html",
"json": "application/json",
"markdown": mime_types.MARKDOWN,
"html": mime_types.HTML,
"json": mime_types.JSON,
}[content_format],
)
content = result
Expand Down
113 changes: 97 additions & 16 deletions src/backend/core/services/converter_services.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
"""Y-Provider API services."""

import logging
import typing
from base64 import b64encode

from django.conf import settings

import requests

from core.services import mime_types

logger = logging.getLogger(__name__)


class ConversionError(Exception):
"""Base exception for conversion-related errors."""
Expand All @@ -19,8 +25,81 @@ class ServiceUnavailableError(ConversionError):
"""Raised when the conversion service is unavailable."""


class ConverterProtocol(typing.Protocol):
"""Protocol for converter classes."""

def convert(self, data, content_type, accept):
"""Convert content from one format to another."""


class Converter:
"""Orchestrates conversion between different formats using specialized converters."""

docspec: ConverterProtocol
ydoc: ConverterProtocol

def __init__(self):
self.docspec = DocSpecConverter()
self.ydoc = YdocConverter()

def convert(self, data, content_type, accept):
"""Convert input into other formats using external microservices."""

if content_type == mime_types.DOCX and accept == mime_types.YJS:
blocknote_data = self.docspec.convert(
data, mime_types.DOCX, mime_types.BLOCKNOTE
)
return self.ydoc.convert(
blocknote_data, mime_types.BLOCKNOTE, mime_types.YJS
)

return self.ydoc.convert(data, content_type, accept)


class DocSpecConverter:
"""Service class for DocSpec conversion-related operations."""

def _request(self, url, data, content_type):
"""Make a request to the DocSpec API."""

response = requests.post(
url,
headers={"Accept": mime_types.BLOCKNOTE},
files={"file": ("document.docx", data, content_type)},
timeout=settings.CONVERSION_API_TIMEOUT,
verify=settings.CONVERSION_API_SECURE,
)
if not response.ok:
logger.error(
"DocSpec API error: url=%s, status=%d, response=%s",
url,
response.status_code,
response.text[:200] if response.text else "empty",
)
response.raise_for_status()
return response

def convert(self, data, content_type, accept):
"""Convert a Document to BlockNote."""
if not data:
raise ValidationError("Input data cannot be empty")

if content_type != mime_types.DOCX or accept != mime_types.BLOCKNOTE:
raise ValidationError(
f"Conversion from {content_type} to {accept} is not supported."
)

try:
return self._request(settings.DOCSPEC_API_URL, data, content_type).content
except requests.RequestException as err:
logger.exception("DocSpec service error: url=%s", settings.DOCSPEC_API_URL)
raise ServiceUnavailableError(
"Failed to connect to DocSpec conversion service",
) from err


class YdocConverter:
"""Service class for conversion-related operations."""
"""Service class for YDoc conversion-related operations."""

@property
def auth_header(self):
Expand All @@ -41,32 +120,34 @@ def _request(self, url, data, content_type, accept):
timeout=settings.CONVERSION_API_TIMEOUT,
verify=settings.CONVERSION_API_SECURE,
)
if not response.ok:
logger.error(
"Y-Provider API error: url=%s, status=%d, response=%s",
url,
response.status_code,
response.text[:200] if response.text else "empty",
)
response.raise_for_status()
return response

def convert(
self, text, content_type="text/markdown", accept="application/vnd.yjs.doc"
):
def convert(self, data, content_type=mime_types.MARKDOWN, accept=mime_types.YJS):
"""Convert a Markdown text into our internal format using an external microservice."""

if not text:
raise ValidationError("Input text cannot be empty")
if not data:
raise ValidationError("Input data cannot be empty")

url = f"{settings.Y_PROVIDER_API_BASE_URL}{settings.CONVERSION_API_ENDPOINT}/"
try:
response = self._request(
f"{settings.Y_PROVIDER_API_BASE_URL}{settings.CONVERSION_API_ENDPOINT}/",
text,
content_type,
accept,
)
if accept == "application/vnd.yjs.doc":
response = self._request(url, data, content_type, accept)
if accept == mime_types.YJS:
return b64encode(response.content).decode("utf-8")
if accept in {"text/markdown", "text/html"}:
if accept in {mime_types.MARKDOWN, "text/html"}:
return response.text
if accept == "application/json":
if accept == mime_types.JSON:
return response.json()
raise ValidationError("Unsupported format")
except requests.RequestException as err:
logger.exception("Y-Provider service error: url=%s", url)
raise ServiceUnavailableError(
"Failed to connect to conversion service",
f"Failed to connect to YDoc conversion service {content_type}, {accept}",
) from err
8 changes: 8 additions & 0 deletions src/backend/core/services/mime_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""MIME type constants for document conversion."""

BLOCKNOTE = "application/vnd.blocknote+json"
YJS = "application/vnd.yjs.doc"
MARKDOWN = "text/markdown"
JSON = "application/json"
DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
HTML = "text/html"
Loading
Loading