Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add streaming find endpoint #87

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/app/AppComponents.scala
Original file line number Diff line number Diff line change
@@ -176,7 +176,7 @@ class AppComponents(context: Context, config: Config)
val workspacesController = new Workspaces(authControllerComponents, annotations, esResources, manifest)
val commentsController = new Comments(authControllerComponents, manifest, esResources, annotations)
val usersController = new Users(authControllerComponents, userProvider)
val pagesController = new PagesController(authControllerComponents, manifest, esResources, pages2, annotations, previewStorage)
val pagesController = new PagesController(authControllerComponents, manifest, esResources, pages2, annotations, previewStorage, materializer)

val workerControl = config.aws match {
case Some(awsDiscoveryConfig) =>
26 changes: 26 additions & 0 deletions backend/app/controllers/api/Authentication.scala
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
package controllers.api

import akka.NotUsed
import akka.actor.ActorSystem
import akka.stream.{Materializer, OverflowStrategy}
import akka.stream.scaladsl.{Source, SourceQueueWithComplete}
import model.Uri
import model.index.FrontendPage
import pdi.jwt.JwtSession._
import pdi.jwt.JwtTime
import play.api.libs.json.Json
@@ -11,6 +17,7 @@ import utils.attempt._
import utils.auth._
import utils.auth.providers.UserProvider
import utils.controller.{AuthControllerComponents, OptionalAuthApiController}

import java.time.Clock
import play.api.Configuration

@@ -23,6 +30,25 @@ class Authentication(override val controllerComponents: AuthControllerComponents
Right(Ok("Ok"))
}

def liveClock() = noAuth.ApiAction {
import java.time.ZonedDateTime
import java.time.format.DateTimeFormatter
import javax.inject.Singleton
import akka.stream.scaladsl.Source
import play.api.http.ContentTypes
import play.api.libs.EventSource
import play.api.mvc._

import scala.concurrent.duration._

val df: DateTimeFormatter = DateTimeFormatter.ofPattern("HH mm ss")
val tickSource = Source.tick(0.millis, 100.millis, "TICK")
val source = tickSource.map { (tick) =>
df.format(ZonedDateTime.now())
}
Right(Ok.chunked(source via EventSource.flow).as(ContentTypes.EVENT_STREAM))
}

def token() = noAuth.ApiAction.attempt { implicit request:Request[AnyContent] =>
val time = Epoch.now
for {
50 changes: 49 additions & 1 deletion backend/app/controllers/api/PagesController.scala
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
package controllers.api

import akka.NotUsed
import akka.actor.{ActorSystem, ClassicActorSystemProvider}
import akka.stream.scaladsl.SourceQueueWithComplete
import akka.stream.{Materializer, OverflowStrategy}
import akka.stream.scaladsl.Source
import commands.{GetPagePreview, GetResource, ResourceFetchMode}
import model.frontend.{Chips, HighlightableText, TextHighlight}
import model.index.{FrontendPage, HighlightForSearchNavigation, PageHighlight}
import model.{Language, Languages, Uri}
import org.apache.pdfbox.pdmodel.PDDocument
import play.api.http.ContentTypes
import play.api.libs.EventSource
import play.api.libs.json.Json
import play.api.mvc.{ResponseHeader, Result}
import services.ObjectStorage
@@ -16,8 +23,10 @@ import utils.PDFUtil
import utils.attempt.Attempt
import utils.controller.{AuthApiController, AuthControllerComponents}

import scala.concurrent.Future

class PagesController(val controllerComponents: AuthControllerComponents, manifest: Manifest,
index: Index, pagesService: Pages2, annotations: Annotations, previewStorage: ObjectStorage) extends AuthApiController {
index: Index, pagesService: Pages2, annotations: Annotations, previewStorage: ObjectStorage, materializer: Materializer) extends AuthApiController {

def getPageCount(uri: Uri) = ApiAction.attempt { req =>

@@ -118,6 +127,11 @@ class PagesController(val controllerComponents: AuthControllerComponents, manife
}
}

private def createSourceOfHighlights(): (SourceQueueWithComplete[FrontendPage], Source[FrontendPage, NotUsed]) = {
val initialSourceOfStatuses = Source.queue[FrontendPage](100, OverflowStrategy.dropHead)
initialSourceOfStatuses.preMaterialize()(materializer)
}

private def getHighlights(uri: Uri, query: String, username: String, isSearch: Boolean): Attempt[Seq[HighlightForSearchNavigation]] = {
val searchQuery = if (isSearch) Some(query) else None
val findQuery = if (isSearch) None else Some(query)
@@ -138,13 +152,47 @@ class PagesController(val controllerComponents: AuthControllerComponents, manife
}
}

private def getHighlightsStream(sourceQueue: SourceQueueWithComplete[FrontendPage], uri: Uri, query: String, username: String, isSearch: Boolean): Attempt[Unit] = {
val searchQuery = if (isSearch) Some(query) else None
val findQuery = if (isSearch) None else Some(query)
for {
pagesWithHits <- pagesService.findInPages(uri, query)
} yield {
pagesWithHits.foreach(page => {
frontendPageFromQuery(uri, page, username, searchQuery, findQuery).map { frontendPage =>
sourceQueue.offer(frontendPage)
}
})

()
}
}

// This endpoint is used to get highlights for "find in document" on-demand queries.
def findInDocument(uri: Uri, q: String) = ApiAction.attempt { req =>
getHighlights(uri, q, req.user.username, isSearch = false).map(highlights =>
Ok(Json.toJson(highlights))
)
}

def findInDocumentStream(uri: Uri, q: String) = ApiAction { req =>
val (sourceQueue, sourceOfFrontendPages) = createSourceOfHighlights()
getHighlightsStream(sourceQueue, uri, q, req.user.username, isSearch = false)

val sourceOfHighlights = sourceOfFrontendPages.map(frontendPage => {
(for {
highlight <- frontendPage.highlights
} yield {
HighlightForSearchNavigation.fromPageHighlight(frontendPage.page, highlight.index, highlight)
}).toString
})

Right(Ok.chunked(sourceOfHighlights via EventSource.flow)
.as(ContentTypes.EVENT_STREAM)
.withHeaders("Cache-Control" -> "no-cache")
.withHeaders("Connection" -> "keep-alive"))
}

// This endpoint is used to get highlights for the "search across documents" query which
// should be fixed for the lifetime of the page viewer of a given document.
// It behaves identically to the findInDocument endpoint, except that it expects its query to be in
5 changes: 5 additions & 0 deletions backend/conf/routes
Original file line number Diff line number Diff line change
@@ -36,6 +36,7 @@ GET /api/pages/preview/:language/:uri/:pageNumber cont

GET /api/pages2/:uri/pageCount controllers.api.PagesController.getPageCount(uri: model.Uri)
GET /api/pages2/:uri/find controllers.api.PagesController.findInDocument(uri: model.Uri, q: String)
GET /api/pages2/:uri/find-stream controllers.api.PagesController.findInDocumentStream(uri: model.Uri, q: String)
GET /api/pages2/:uri/search controllers.api.PagesController.searchInDocument(uri: model.Uri, q: String)
GET /api/pages2/:uri/:pageNumber/preview controllers.api.PagesController.getPagePreview(uri: model.Uri, pageNumber: Int)
GET /api/pages2/:uri/:pageNumber/text controllers.api.PagesController.getPageData(uri: model.Uri, pageNumber: Int, sq: Option[String], fq: Option[String])
@@ -92,6 +93,8 @@ GET /api/keepalive cont

GET /api/config controllers.frontend.App.configuration()



+ NOCSRF
GET /healthcheck controllers.api.Authentication.healthcheck()

@@ -102,6 +105,8 @@ GET /setup cont
+ NOCSRF
PUT /setup controllers.genesis.Genesis.doSetup()

GET /clock controllers.api.Authentication.liveClock()

# === Web Client ===
GET / controllers.frontend.App.index()
GET /third-party/*file controllers.Assets.at(path="/public/third-party", file)