diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7ddd4b8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,23 @@ +logs +project/project +project/target +target +/.target +/.ensime +/.cache +tmp +.history +dist +/.idea +/*.iml +/out +/.idea_modules +/.classpath +/.project +/RUNNING_PID +/.settings +/project/*-shim.sbt +activator.bat +activator +activator-launch-*.jar +activator-*-shim.sbt diff --git a/README.md b/README.md new file mode 100644 index 0000000..944f392 --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +seco-lexicalanalysis-play +========================= + +SeCo lexical analysis services published as a web service using the Scala Play framework. + diff --git a/app/Application.scala b/app/Application.scala new file mode 100644 index 0000000..ce6edf1 --- /dev/null +++ b/app/Application.scala @@ -0,0 +1,10 @@ +import controllers.LexicalAnalysisController + +import com.softwaremill.macwire.MacwireMacros._ +import services.lexicalanalysis.LexicalAnalysisModule + +object Application extends LexicalAnalysisModule { + + val lexicalAnalysisController = wire[LexicalAnalysisController] + +} diff --git a/app/Global.scala b/app/Global.scala new file mode 100644 index 0000000..285e6e7 --- /dev/null +++ b/app/Global.scala @@ -0,0 +1,11 @@ +import com.softwaremill.macwire.{InstanceLookup, Macwire} +import java.util.Locale +import play.api.GlobalSettings +import play.api.mvc.QueryStringBindable.Parsing + +object Global extends GlobalSettings with Macwire { + val instanceLookup = InstanceLookup(valsByClass(Application)) + + override def getControllerInstance[A](controllerClass: Class[A]) = instanceLookup.lookupSingleOrThrow(controllerClass) + +} diff --git a/app/assets/javascripts/index.coffee b/app/assets/javascripts/index.coffee new file mode 100644 index 0000000..31d1965 --- /dev/null +++ b/app/assets/javascripts/index.coffee @@ -0,0 +1,92 @@ +'use strict' + +angular.module('index',['play.routing']) + .controller('IdentifyCtrl', ($scope, playRoutes) -> + $scope.text = "The quick brown fox jumps over the lazy dog" + $scope.$watch('text', _.throttle((text) -> + playRoutes.controllers.LexicalAnalysisController.identifyGET(text).get().success((data) -> + $scope.errorStatus = '' + $scope.guessedLang=data + ).error((data,status) -> + if (status==0) + $scope.errorStatus = 503 + $scope.error = "Service unavailable" + else + $scope.errorStatus = status + $scope.error = data + ) + ,1000)) + ) + .controller('LemmatizeCtrl', ($scope, playRoutes) -> + $scope.text = "Albert osti fagotin ja töräytti puhkuvan melodian." + $scope.$watchCollection('[text,locale]', _.throttle(() -> + locale = $scope.locale + if locale=='' then locale=null + playRoutes.controllers.LexicalAnalysisController.baseformGET($scope.text,locale).get().success((data) -> + $scope.errorStatus = '' + $scope.baseform=data + ).error((data,status) -> + if (status==0) + $scope.errorStatus = 503 + $scope.error = "Service unavailable" + else + $scope.errorStatus = status + $scope.error = data + ) + ,1000)) + ) + .controller('AnalyzeCtrl', ($scope, playRoutes) -> + $scope.text = "Albert osti" + $scope.locale = "fi" + $scope.$watchCollection('[text,locale]', _.throttle(() -> + locale = $scope.locale + if locale=='' then locale=null + playRoutes.controllers.LexicalAnalysisController.analyzeGET($scope.text,locale).get().success((data) -> + $scope.analysis=data + ).error((data,status) -> + if (status==0) + $scope.errorStatus = 503 + $scope.error = "Service unavailable" + else + $scope.errorStatus = status + $scope.error = data + ) + ,1000)) + ) + .controller('InflectionCtrl', ($scope, playRoutes) -> + $scope.text = "Albert osti fagotin ja töräytti puhkuvan melodian." + $scope.locale = "fi" + $scope.baseform=true; + $scope.forms = "V N Nom Sg, N Nom Pl, A Pos Nom Pl" + $scope.$watchCollection('[text,locale,baseform,forms]', _.throttle(() -> + locale = $scope.locale + if locale=='' then locale=null + playRoutes.controllers.LexicalAnalysisController.inflectGET($scope.text, $scope.forms.split(/, */),$scope.baseform,locale).get().success((data) -> + $scope.inflection=data + ).error((data,status) -> + if (status==0) + $scope.errorStatus = 503 + $scope.error = "Service unavailable" + else + $scope.errorStatus = status + $scope.error = data + ) + ,1000)) + ) + .controller('HyphenationCtrl', ($scope, playRoutes) -> + $scope.text = "Albert osti fagotin ja töräytti puhkuvan melodian." + $scope.$watchCollection('[text,locale]', _.throttle(() -> + locale = $scope.locale + if locale=='' then locale=null + playRoutes.controllers.LexicalAnalysisController.hyphenateGET($scope.text,locale).get().success((data) -> + $scope.hyphenation=data + ).error((data,status) -> + if (status==0) + $scope.errorStatus = 503 + $scope.error = "Service unavailable" + else + $scope.errorStatus = status + $scope.error = data + ) + ,1000)) + ) \ No newline at end of file diff --git a/app/assets/javascripts/playroutes.js b/app/assets/javascripts/playroutes.js new file mode 100644 index 0000000..4722029 --- /dev/null +++ b/app/assets/javascripts/playroutes.js @@ -0,0 +1,58 @@ +"use strict"; + +// The service - will be used by controllers or other services, filters, etc. +angular.module("play.routing", []).factory("playRoutes", function($http) { + + /* + * Wrap a Play JS function with a new function that adds the appropriate $http method. + * Note that the url has been already applied to the $http method so you only have to pass in + * the data (if any). + * Note: This is not only easier on the eyes, but must be called in a separate function with its own + * set of arguments, because otherwise JavaScript's function scope will bite us. + * @param playFunction The function from Play's jsRouter to be wrapped + */ + var wrapHttp = function(playFunction) { + return function(/*arguments*/) { + var routeObject = playFunction.apply(this, arguments); + var httpMethod = routeObject.method.toLowerCase(); + var url = routeObject.url; + var res = { + method : httpMethod, url : url, absoluteUrl : routeObject.absoluteURL, webSocketUrl : routeObject.webSocketURL + }; + res[httpMethod] = function(obj) { + return $http[httpMethod](url, obj); + }; + return res; + }; + }; + + // Add package object, in most cases "controllers" + var addPackageObject = function(packageName, service) { + if (!(packageName in playRoutes)) { + playRoutes[packageName] = {}; + } + }; + + // Add controller object, e.g. Application + var addControllerObject = function(packageName, controllerKey, service) { + if (!(controllerKey in playRoutes[packageName])) { + playRoutes[packageName][controllerKey] = {}; + } + }; + + var playRoutes = {}; + // Loop over all items in the jsRoutes generated by Play, wrap and add them to + // playRoutes + for ( var packageKey in jsRoutes) { + var packageObject = jsRoutes[packageKey]; + addPackageObject(packageKey, playRoutes); + for ( var controllerKey in packageObject) { + var controller = packageObject[controllerKey]; + addControllerObject(packageKey, controllerKey, playRoutes); + for ( var controllerMethodKey in controller) { + playRoutes[packageKey][controllerKey][controllerMethodKey] = wrapHttp(controller[controllerMethodKey]); + } + } + } + return playRoutes; +}); diff --git a/app/assets/stylesheets/index.less b/app/assets/stylesheets/index.less new file mode 100644 index 0000000..a2707c3 --- /dev/null +++ b/app/assets/stylesheets/index.less @@ -0,0 +1,14 @@ +body { + padding-top: 70px; + padding-bottom: 70px; +} + +.anchor { + position:relative; + top: -40px; + visibility: hidden; +} + +.affix,.navbar-fixed-top { + -webkit-transform: scale3d(1,1,1); +} diff --git a/app/binders/Binders.scala b/app/binders/Binders.scala new file mode 100644 index 0000000..2c93291 --- /dev/null +++ b/app/binders/Binders.scala @@ -0,0 +1,15 @@ +package binders + +import play.api.mvc.QueryStringBindable.Parsing +import play.api.i18n.Lang +import play.api.mvc.QueryStringBindable +import java.util.Locale + +/** + * Created by jiemakel on 24.10.2013. + */ +object Binders { + implicit object bindableLocale extends Parsing[Locale]( + new Locale(_), _.toString, (key: String, e: Exception) => "Cannot parse parameter %s as Locale: %s".format(key, e.getMessage) + ) +} diff --git a/app/controllers/LexicalAnalysisController.scala b/app/controllers/LexicalAnalysisController.scala new file mode 100644 index 0000000..6a66a66 --- /dev/null +++ b/app/controllers/LexicalAnalysisController.scala @@ -0,0 +1,407 @@ +/** + * + */ +package controllers + +import play.api.mvc._ +import play.api.libs.json.{JsValue, Json} +import play.api.Routes +import fi.seco.lexical.ILexicalAnalysisService +import fi.seco.lexical.LanguageRecognizer +import fi.seco.lexical.CompoundLexicalAnalysisService +import fi.seco.lexical.hfst.HFSTLexicalAnalysisService +import fi.seco.lexical.SnowballLexicalAnalysisService +import scala.collection.convert.WrapAsScala._ +import scala.collection.convert.WrapAsJava._ +import java.util.Locale + +import play.api.libs.json.Writes +import fi.seco.lexical.hfst.HFSTLexicalAnalysisService.WordToResults +import scala.Some +import scala.util.Try +import play.api.mvc.SimpleResult +import java.util +import services.lexicalanalysis.LanguageDetector +import play.api.libs.iteratee.{Iteratee, Concurrent} + +import scala.concurrent.ExecutionContext.Implicits.global + +/** + * @author jiemakel + * + */ +class LexicalAnalysisController(las: CompoundLexicalAnalysisService, hfstlas: HFSTLexicalAnalysisService, snowballlas: SnowballLexicalAnalysisService) extends Controller { + + def CORSAction(f: Request[AnyContent] => Result): Action[AnyContent] = { + Action { request => + f(request).withHeaders("Access-Control-Allow-Origin" -> "*") + } + } + + def CORSAction[A](bp: BodyParser[A])(f: Request[A] => Result): Action[A] = { + Action(bp) { request => + f(request).withHeaders("Access-Control-Allow-Origin" -> "*") + } + } + + def options = Action { + Ok("").withHeaders("Access-Control-Allow-Origin" -> "*", "Access-Control-Allow-Methods" -> "POST, GET, OPTIONS, PUT, DELETE", "Access-Control-Max-Age" -> "3600", "Access-Control-Allow-Headers" -> "Origin, X-Requested-With, Content-Type, Accept", "Access-Control-Allow-Credentials" -> "true") + } + + def index = Action { + Ok(views.html.index(this,LanguageRecognizer.getAvailableLanguages, LanguageDetector.supportedLanguages, snowballlas.getSupportedBaseformLocales.map(_.toString), hfstlas.getSupportedBaseformLocales.map(_.toString), hfstlas.getSupportedAnalyzeLocales.map(_.toString),hfstlas.getSupportedInflectionLocales.map(_.toString),hfstlas.getSupportedHyphenationLocales.map(_.toString) )) + } + + implicit def toResponse(res : Either[(JsValue, String),Either[String,JsValue]])(implicit request : Request[AnyContent]) : SimpleResult = { + res match { + case Left(x) => + if (Accepts.Html.unapply(request)) Redirect(x._2) + else Ok(x._1) + case Right(x) => x match { + case Left(y) => NotImplemented(y) + case Right(y) => Ok(y) + } + } + } + + def getBestLang(text: String, locales: Seq[String]) : Option[String] = { + if (locales.isEmpty) { + val lrResult = Option(LanguageRecognizer.getLanguageAsObject(text)).map(r => Map(r.getLang() -> r.getIndex)) + val detector = LanguageDetector() + detector.append(text) + val ldResult = detector.getProbabilities().map(l => Map(l.lang -> l.prob)) + val hfstResultTmp = hfstlas.getSupportedAnalyzeLocales.map(lang => + (lang.toString(), + hfstlas.analyze(text,lang).foldRight((0,0)) { (ar,count) => + if ((ar.getAnalysis.get(0).getParts().get(0).getTags.isEmpty || ar.getAnalysis.get(0).getParts().get(0).getTags.containsKey("PUNCT")) && ar.getAnalysis.get(0).getGlobalTags.isEmpty) + (count._1,count._2+1) + else (count._1+1,count._2+1) + } + )).filter(_._2._1!=0).toSeq.view.sortBy(_._2._1).reverse.map(p => (p._1 , p._2._1.asInstanceOf[Double]/p._2._2)) + val tc = hfstResultTmp.foldRight(0.0) {_._2+_} + val hfstResult = hfstResultTmp.map(p => Map(p._1 -> p._2/tc)) + Try(Some((ldResult ++ hfstResult ++ lrResult).groupBy(_.keysIterator.next).mapValues(_.foldRight(0.0){(p,r) => r+p.valuesIterator.next}/3.0).maxBy(_._2)._1)).getOrElse(None) + } else { + val lrResult = Option(LanguageRecognizer.getLanguageAsObject(text,locales:_*)).map(r => Map(r.getLang() -> r.getIndex)) + val detector = LanguageDetector() + detector.setPriorMap(new util.HashMap(mapAsJavaMap(locales.map((_,new java.lang.Double(1.0))).toMap))) + detector.append(text) + val ldResult = detector.getProbabilities().map(l => Map(l.lang -> l.prob)) + val hfstResultTmp = locales.map(new Locale(_)).intersect(hfstlas.getSupportedAnalyzeLocales.toSeq).map(lang => + (lang.toString(), + hfstlas.analyze(text,lang).foldRight((0,0)) { (ar,count) => + if ((ar.getAnalysis.get(0).getParts().get(0).getTags.isEmpty || ar.getAnalysis.get(0).getParts().get(0).getTags.containsKey("PUNCT")) && ar.getAnalysis.get(0).getGlobalTags.isEmpty) + (count._1,count._2+1) + else (count._1+1,count._2+1) + } + )).filter(_._2._1!=0).toSeq.view.sortBy(_._2._1).reverse.map(p => (p._1 , p._2._1.asInstanceOf[Double]/p._2._2)) + val tc = hfstResultTmp.foldRight(0.0) {_._2+_} + val hfstResult = hfstResultTmp.map(p => Map(p._1 -> p._2/tc)) + Try(Some((ldResult ++ hfstResult ++ lrResult).groupBy(_.keysIterator.next).mapValues(_.foldRight(0.0){(p,r) => r+p.valuesIterator.next}/3.0).maxBy(_._2)._1)).getOrElse(None) + + } + } + + def identify(text: Option[String], locales: Seq[String]) : Either[(JsValue, String),Either[String,JsValue]] = { + text match { + case Some(text) => + if (!locales.isEmpty) { + val lrResult = Option(LanguageRecognizer.getLanguageAsObject(text,locales:_*)).map(r => Map(r.getLang() -> r.getIndex)) + val detector = LanguageDetector() + detector.setPriorMap(new util.HashMap(mapAsJavaMap(locales.map((_,new java.lang.Double(1.0))).toMap))) + detector.append(text) + val ldResult = detector.getProbabilities().map(l => Map(l.lang -> l.prob)) + val hfstResultTmp = locales.map(new Locale(_)).intersect(hfstlas.getSupportedAnalyzeLocales.toSeq).map(lang => + (lang.toString(), + hfstlas.analyze(text,lang).foldRight((0,0)) { (ar,count) => + if ((ar.getAnalysis.get(0).getParts().get(0).getTags.isEmpty || ar.getAnalysis.get(0).getParts().get(0).getTags.containsKey("PUNCT")) && ar.getAnalysis.get(0).getGlobalTags.isEmpty) + (count._1,count._2+1) + else (count._1+1,count._2+1) + } + )).filter(_._2._1!=0).toSeq.view.sortBy(_._2._1).reverse.map(p => (p._1 , p._2._1.asInstanceOf[Double]/p._2._2)) + val tc = hfstResultTmp.foldRight(0.0) {_._2+_} + val hfstResult = hfstResultTmp.map(p => Map(p._1 -> p._2/tc)) + val bestGuess = Try(Some((ldResult ++ hfstResult ++ lrResult).groupBy(_.keysIterator.next).mapValues(_.foldRight(0.0){(p,r) => r+p.valuesIterator.next}/3.0).maxBy(_._2))).getOrElse(None) + bestGuess match { + case Some(lang) => Right(Right(Json.toJson(Map("locale"->Json.toJson(lang._1),"certainty" -> Json.toJson(lang._2),"details"->Json.toJson(Map("languageRecognizerResults"->Json.toJson(lrResult), "languageDetectorResults" -> Json.toJson(ldResult), "hfstAcceptorResults" -> Json.toJson(hfstResult))))))) + case None => Right(Left(s"Couldn't categorize $text into any of requested languages (${locales.mkString(", ")})")) + } + } else { + val lrResult = Option(LanguageRecognizer.getLanguageAsObject(text)).map(r => Map(r.getLang() -> r.getIndex)) + val detector = LanguageDetector() + detector.append(text) + val ldResult = detector.getProbabilities().map(l => Map(l.lang -> l.prob)) + val hfstResultTmp = hfstlas.getSupportedAnalyzeLocales.map(lang => + (lang.toString(), + hfstlas.analyze(text,lang).foldRight((0,0)) { (ar,count) => + if ((ar.getAnalysis.get(0).getParts().get(0).getTags.isEmpty || ar.getAnalysis.get(0).getParts().get(0).getTags.containsKey("PUNCT")) && ar.getAnalysis.get(0).getGlobalTags.isEmpty) + (count._1,count._2+1) + else (count._1+1,count._2+1) + } + )).filter(_._2._1!=0).toSeq.view.sortBy(_._2._1).reverse.map(p => (p._1 , p._2._1.asInstanceOf[Double]/p._2._2)) + val tc = hfstResultTmp.foldRight(0.0) {_._2+_} + val hfstResult = hfstResultTmp.map(p => Map(p._1 -> p._2/tc)) + val bestGuess = Try(Some((ldResult ++ hfstResult ++ lrResult).groupBy(_.keysIterator.next).mapValues(_.foldRight(0.0){(p,r) => r+p.valuesIterator.next}/3.0).maxBy(_._2))).getOrElse(None) + bestGuess match { + case Some(lang) => Right(Right(Json.toJson(Map("locale"->Json.toJson(lang._1),"certainty" -> Json.toJson(lang._2),"details"->Json.toJson(Map("languageRecognizerResults"->Json.toJson(lrResult), "languageDetectorResults" -> Json.toJson(ldResult), "hfstAcceptorResults" -> Json.toJson(hfstResult))))))) + case None => Right(Left(s"Couldn't categorize $text into any of the supported languages (${(LanguageRecognizer.getAvailableLanguages ++ LanguageDetector.supportedLanguages ++ hfstlas.getSupportedAnalyzeLocales.map(_.toString)).sorted.distinct.mkString(", ")})")) + } + } + case None => + Left((Json.toJson(Map( "acceptedLocales" -> (LanguageRecognizer.getAvailableLanguages ++ LanguageDetector.supportedLanguages ++ hfstlas.getSupportedAnalyzeLocales.map(_.toString)).sorted.distinct)),controllers.routes.LexicalAnalysisController.index + "#language_recognition")) + } + } + + def toWSResponse(res : Either[(JsValue, String),Either[String,JsValue]]) : JsValue = { + res match { + case Left(x) => x._1 + case Right(x) => x match { + case Left(y) => Json.toJson(Map("error" -> y)) + case Right(y) => y + } + } + } + + def identifyGET(text: Option[String], locales: List[String]) = CORSAction { implicit request => + identify(text,locales) + } + + def identifyPOST = CORSAction { implicit request => + val formBody = request.body.asFormUrlEncoded; + val jsonBody = request.body.asJson; + formBody.map { data => + toResponse(identify(data.get("text").map(_.head),data.get("locales").getOrElse(Seq.empty))) + }.getOrElse { + jsonBody.map { data => + toResponse(identify((data \ "text").asOpt[String],(data \ "locales").asOpt[Seq[String]].getOrElse(Seq.empty))) + }.getOrElse { + BadRequest("Expecting either a JSON or a form-url-encoded body") + } + } + } + + def identifyWS = WebSocket.using[JsValue] { req => + + //Concurrent.broadcast returns (Enumerator, Concurrent.Channel) + val (out,channel) = Concurrent.broadcast[JsValue] + + //log the message to stdout and send response back to client + val in = Iteratee.foreach[JsValue] { + data => channel push toWSResponse(identify((data \ "text").asOpt[String],(data \ "locales").asOpt[Seq[String]].getOrElse(Seq.empty))) + } + (in,out) + } + + def baseform(text: Option[String], locale: Option[Locale]) : Either[(JsValue,String),Either[String,JsValue]] = { + text match { + case Some(text) => + locale match { + case Some(locale) => if (las.getSupportedBaseformLocales.contains(locale)) Right(Right(Json.toJson(las.baseform(text, locale)))) else Right(Left(s"Locale $locale not in the supported locales (${las.getSupportedBaseformLocales.mkString(", ")})")) + case None => getBestLang(text,las.getSupportedBaseformLocales.toSeq.map(_.toString)) match { + case Some(lang) => Right(Right(Json.toJson(Map("locale" -> lang, "baseform" -> las.baseform(text, new Locale(lang)))))) + case None => Right(Left(s"Couldn't categorize $text into any of the supported languages (${las.getSupportedBaseformLocales.mkString(", ")})")) + } + } + case None => + Left((Json.toJson(Map( "acceptedLocales" -> las.getSupportedBaseformLocales.map(_.toString).toSeq.sorted)),controllers.routes.LexicalAnalysisController.index + "#lemmatization")) + } + } + + def baseformGET(text: Option[String], locale: Option[Locale]) = CORSAction { implicit request => + baseform(text,locale) + } + + def baseformPOST = CORSAction { implicit request => + val formBody = request.body.asFormUrlEncoded; + val jsonBody = request.body.asJson; + formBody.map { data => + toResponse(baseform(data.get("text").map(_.head),data.get("locale").map(l => new Locale(l.head)))) + }.getOrElse { + jsonBody.map { data => + toResponse(baseform((data \ "text").asOpt[String],(data \ "locale").asOpt[String].map(l => new Locale(l)))) + }.getOrElse { + BadRequest("Expecting either a JSON or a form-url-encoded body") + } + } + } + + def baseformWS = WebSocket.using[JsValue] { req => + + //Concurrent.broadcast returns (Enumerator, Concurrent.Channel) + val (out,channel) = Concurrent.broadcast[JsValue] + + //log the message to stdout and send response back to client + val in = Iteratee.foreach[JsValue] { + data => channel push toWSResponse(baseform((data \ "text").asOpt[String],(data \ "locale").asOpt[String].map(l => new Locale(l)))) + } + (in,out) + } + + implicit val WordPartWrites = new Writes[HFSTLexicalAnalysisService.Result.WordPart] { + def writes(r : HFSTLexicalAnalysisService.Result.WordPart) : JsValue = { + Json.obj( + "lemma" -> r.getLemma, + "tags" -> Json.toJson(r.getTags.toMap.mapValues(iterableAsScalaIterable(_))) + ) + } + } + + implicit val ResultWrites = new Writes[HFSTLexicalAnalysisService.Result] { + def writes(r : HFSTLexicalAnalysisService.Result) : JsValue = { + Json.obj( + "weight" -> r.getWeight, + "wordParts" -> Json.toJson(r.getParts.map(Json.toJson(_))), + "globalTags" -> Json.toJson(r.getGlobalTags.toMap.mapValues(iterableAsScalaIterable(_))) + ) + } + } + + implicit val wordToResultsWrites = new Writes[WordToResults] { + def writes(r: WordToResults) : JsValue = { + Json.obj( + "word" -> r.getWord, + "analysis" -> Json.toJson(r.getAnalysis.map(Json.toJson(_))) + ) + } + } + + def analyze(text: Option[String], locale: Option[Locale]) : Either[(JsValue,String),Either[String,JsValue]] = { + text match { + case Some(text) => + locale match { + case Some(locale) => if (hfstlas.getSupportedAnalyzeLocales.contains(locale)) Right(Right(Json.toJson(hfstlas.analyze(text, locale).toList))) else Right(Left(s"Locale $locale not in the supported locales (${hfstlas.getSupportedAnalyzeLocales.mkString(", ")})")) + case None => getBestLang(text,hfstlas.getSupportedAnalyzeLocales.toSeq.map(_.toString)) match { + case Some(lang) => Right(Right(Json.toJson(Map("locale" -> Json.toJson(lang), "analysis" -> Json.toJson(hfstlas.analyze(text, new Locale(lang)).toList))))) + case None => Right(Left(s"Couldn't categorize $text into any of the supported languages (${hfstlas.getSupportedAnalyzeLocales.mkString(", ")})")) + } + } + case None => + Left((Json.toJson(Map( "acceptedLocales" -> hfstlas.getSupportedAnalyzeLocales.map(_.toString).toSeq.sorted))),controllers.routes.LexicalAnalysisController.index + "#morphological_analysis") + } + } + + def analyzeGET(text: Option[String], locale: Option[Locale]) = CORSAction { implicit request => + analyze(text,locale) + } + + def analyzePOST = CORSAction { implicit request => + val formBody = request.body.asFormUrlEncoded; + val jsonBody = request.body.asJson; + formBody.map { data => + toResponse(analyze(data.get("text").map(_.head),data.get("locale").map(l => new Locale(l.head)))) + }.getOrElse { + jsonBody.map { data => + toResponse(analyze((data \ "text").asOpt[String],(data \ "locale").asOpt[String].map(l => new Locale(l)))) + }.getOrElse { + BadRequest("Expecting either a JSON or a form-url-encoded body") + } + } + } + + def analyzeWS = WebSocket.using[JsValue] { req => + + //Concurrent.broadcast returns (Enumerator, Concurrent.Channel) + val (out,channel) = Concurrent.broadcast[JsValue] + + //log the message to stdout and send response back to client + val in = Iteratee.foreach[JsValue] { + data => channel push toWSResponse(analyze((data \ "text").asOpt[String],(data \ "locale").asOpt[String].map(l => new Locale(l)))) + } + (in,out) + } + + def inflect(text: Option[String], forms: Seq[String], baseform: Boolean, locale : Option[Locale]) : Either[(JsValue, String),Either[String,JsValue]] = { + text match { + case Some(text) => + locale match { + case Some(locale) => if (hfstlas.getSupportedInflectionLocales.contains(locale)) Right(Right(Json.toJson(hfstlas.inflect(text, forms, baseform, locale)))) else Right(Left(s"Locale $locale not in the supported locales (${hfstlas.getSupportedInflectionLocales.mkString(", ")})")) + case None => getBestLang(text,hfstlas.getSupportedInflectionLocales.toSeq.map(_.toString)) match { + case Some(lang) => Right(Right(Json.toJson(Map("locale" -> Json.toJson(lang), "inflection" -> Json.toJson(hfstlas.inflect(text, forms, baseform, new Locale(lang))))))) + case None => Right(Left(s"Couldn't categorize $text into any of the supported languages (${hfstlas.getSupportedInflectionLocales.mkString(", ")})")) + } + } + case None => + Left((Json.toJson(Map( "acceptedLocales" -> hfstlas.getSupportedInflectionLocales.map(_.toString).toSeq.sorted))),controllers.routes.LexicalAnalysisController.index + "#morphological_analysis") + } + } + + + def inflectGET(text: Option[String], forms: Seq[String], baseform: Boolean, locale : Option[Locale]) = CORSAction { implicit request => + inflect(text,forms,baseform,locale) + } + + def inflectPOST = CORSAction { implicit request => + val formBody = request.body.asFormUrlEncoded; + val jsonBody = request.body.asJson; + formBody.map { data => + toResponse(inflect(data.get("text").map(_.head),data.get("forms").getOrElse(Seq.empty),data.get("baseform").map(s => Try(s.head.toBoolean).getOrElse(true)).getOrElse(true),data.get("locale").map(l => new Locale(l.head)))) + }.getOrElse { + jsonBody.map { data => + toResponse(inflect((data \ "text").asOpt[String],(data \ "forms").asOpt[Seq[String]].getOrElse(Seq.empty), (data \ "baseform").asOpt[Boolean].getOrElse(true), (data \ "locale").asOpt[String].map(l => new Locale(l)))) + }.getOrElse { + BadRequest("Expecting either a JSON or a form-url-encoded body") + } + } + } + + def inflectWS = WebSocket.using[JsValue] { req => + + //Concurrent.broadcast returns (Enumerator, Concurrent.Channel) + val (out,channel) = Concurrent.broadcast[JsValue] + + //log the message to stdout and send response back to client + val in = Iteratee.foreach[JsValue] { + data => channel push toWSResponse(inflect((data \ "text").asOpt[String],(data \ "forms").asOpt[Seq[String]].getOrElse(Seq.empty), (data \ "baseform").asOpt[Boolean].getOrElse(true), (data \ "locale").asOpt[String].map(l => new Locale(l)))) + } + (in,out) + } + + def hyphenate(text: Option[String], locale: Option[Locale]) : Either[(JsValue,String),Either[String,JsValue]] = { + text match { + case Some(text) => + locale match { + case Some(locale) => if (hfstlas.getSupportedHyphenationLocales.contains(locale)) Right(Right(Json.toJson(hfstlas.hyphenate(text, locale)))) else Right(Left(s"Locale $locale not in the supported locales (${hfstlas.getSupportedHyphenationLocales.mkString(", ")})")) + case None => getBestLang(text,hfstlas.getSupportedHyphenationLocales.toSeq.map(_.toString)) match { + case Some(lang) => Right(Right(Json.toJson(Map("locale" -> Json.toJson(lang), "hyphenation" -> Json.toJson(hfstlas.hyphenate(text, new Locale(lang))))))) + case None => Right(Left(s"Couldn't categorize $text into any of the supported languages (${hfstlas.getSupportedHyphenationLocales.mkString(", ")})")) + } + } + case None => + Left((Json.toJson(Map( "acceptedLocales" -> hfstlas.getSupportedHyphenationLocales.map(_.toString).toSeq.sorted))),controllers.routes.LexicalAnalysisController.index + "#hyphenation") + } + } + + def hyphenateGET(text: Option[String], locale: Option[Locale]) = CORSAction { implicit request => + hyphenate(text,locale) + } + + def hyphenatePOST = CORSAction { implicit request => + val formBody = request.body.asFormUrlEncoded; + val jsonBody = request.body.asJson; + formBody.map { data => + toResponse(hyphenate(data.get("text").map(_.head),data.get("locale").map(l => new Locale(l.head)))) + }.getOrElse { + jsonBody.map { data => + toResponse(hyphenate((data \ "text").asOpt[String],(data \ "locale").asOpt[String].map(l => new Locale(l)))) + }.getOrElse { + BadRequest("Expecting either a JSON or a form-url-encoded body") + } + } + } + + def hyphenateWS = WebSocket.using[JsValue] { req => + + //Concurrent.broadcast returns (Enumerator, Concurrent.Channel) + val (out,channel) = Concurrent.broadcast[JsValue] + + //log the message to stdout and send response back to client + val in = Iteratee.foreach[JsValue] { + data => channel push toWSResponse(hyphenate((data \ "text").asOpt[String],(data \ "locale").asOpt[String].map(l => new Locale(l)))) + } + (in,out) + } + + def javascriptRoutes = Action { implicit request => + Ok(Routes.javascriptRouter("jsRoutes")(routes.javascript.LexicalAnalysisController.baseformGET, routes.javascript.LexicalAnalysisController.analyzeGET, routes.javascript.LexicalAnalysisController.identifyGET, routes.javascript.LexicalAnalysisController.hyphenateGET,routes.javascript.LexicalAnalysisController.inflectGET)).as(JAVASCRIPT) + } +} \ No newline at end of file diff --git a/app/services/lexicalanalysis/LexicalAnalysisModule.scala b/app/services/lexicalanalysis/LexicalAnalysisModule.scala new file mode 100644 index 0000000..6d89925 --- /dev/null +++ b/app/services/lexicalanalysis/LexicalAnalysisModule.scala @@ -0,0 +1,34 @@ +/** + * + */ +package services.lexicalanalysis + +import com.softwaremill.macwire.MacwireMacros._ +import fi.seco.lexical.CompoundLexicalAnalysisService +import fi.seco.lexical.LanguageRecognizer +import fi.seco.lexical.hfst.HFSTLexicalAnalysisService +import fi.seco.lexical.SnowballLexicalAnalysisService +import com.cybozu.labs.langdetect.Detector +import scala.util.Try +import com.typesafe.scalalogging.slf4j.Logging + +/** + * @author jiemakel + * + */ +trait LexicalAnalysisModule { + lazy val hfstlas = new HFSTLexicalAnalysisService() + lazy val snowballlas = new SnowballLexicalAnalysisService() + lazy val clas = new CompoundLexicalAnalysisService(hfstlas, snowballlas) + +} + +object LanguageDetector extends Logging { + def apply() = com.cybozu.labs.langdetect.DetectorFactory.create() + val supportedLanguages = Array("af","am","ar","az","be","bg","bn","bo","ca","cs","cy","da","de","dv","el","en","es","et","eu","fa","fi","fo","fr","ga","gn","gu","he","hi","hr","hu","hy","id","is","it","ja","jv","ka","kk","km","kn","ko","ky","lb","lij","ln","lt","lv","mi","mk","ml", "mn", "mr", "mt", "my", "ne", "nl", "no", "os", "pa", "pl", "pnb", "pt", "qu", "ro", "si", "sk", "so", "sq", "sr", "sv", "sw", "ta", "te", "th", "tk", "tl", "tr", "tt", "ug", "uk", "ur", "uz", "vi", "yi", "yo", "zh-cn", "zh-tw") + try { + com.cybozu.labs.langdetect.DetectorFactory.loadProfiles(supportedLanguages:_*) + } catch { + case e: Exception => logger.warn("Couldn't load language profiles",e) + } +} \ No newline at end of file diff --git a/app/views/assetMode.scala b/app/views/assetMode.scala new file mode 100644 index 0000000..0eb9a59 --- /dev/null +++ b/app/views/assetMode.scala @@ -0,0 +1,21 @@ +package views.html.helper + +import play.api.templates.Html +import play.api.mvc.{Call} +import play.api.{Play, Mode} +import controllers.routes + +/** Make the app explicit for testing */ +trait RequiresApp { + implicit val app = play.api.Play.current +} + +object assetMode extends RequiresApp { + def apply(scriptNameDev: String)(scriptNameProd: String = scriptNameDev)(scriptNameTest : String = scriptNameProd): String = { + app.mode match { + case Mode.Dev => scriptNameDev + case Mode.Test => scriptNameTest + case Mode.Prod => scriptNameProd + } + } +} diff --git a/app/views/index.scala.html b/app/views/index.scala.html new file mode 100644 index 0000000..a14298f --- /dev/null +++ b/app/views/index.scala.html @@ -0,0 +1,269 @@ +@(ctrl : LexicalAnalysisController, supportedLanguageRecognizerLocales: Iterable[String], supportedLanguageDetectorLocales: Iterable[String], supportedSnowballLocales: Iterable[String],supportedHFSTBaseformLocales: Iterable[String],supportedHFSTAnalyzeLocales: Iterable[String],supportedHFSTInflectionLocales: Iterable[String],supportedHFSTHyphenationLocales: Iterable[String]) + + + +
++ We provide the following JSON-returning Web Services: +
++ The tools backing these services are mostly not originally our own, but we've wrapped them for your convenience. For specifics, see the details of each service. + For general questions about this service, contact eetu.makela@@aalto.fi. +
+
+ Tries to recognize the language of an input. Call with e.g.
+ @controllers.routes.LexicalAnalysisController.identifyGET(Some("The quick brown fox jumps over the lazy dog"),List.empty)
+ or with a list of possible locales, e.g. @controllers.routes.LexicalAnalysisController.identifyGET(Some("The quick brown fox jumps over the lazy dog"),List("fi","en","sv"))
+ Also available using HTTP POST with parameters given either as form-urlencoded or JSON. For intensive use, there is also a JSON-understanding WebSocket-version at @controllers.routes.LexicalAnalysisController.identifyWS. All methods are CORS-enabled.
+
+ Returns results as JSON, e.g.:
+
@ctrl.identify(Some("The quick brown fox jumps over the lazy dog"),List.empty).right.get.right.get+ When called without parameters but with an Accept header other than text/html, returns the supported locales as JSON, e.g.: +
@ctrl.identify(None,Seq.empty).left.get._1+ +
+ In total, the service supports @((supportedLanguageDetectorLocales ++ supportedLanguageRecognizerLocales ++ supportedHFSTAnalyzeLocales.map(_.toString)).toSeq.sorted.distinct.size) locales, combining results from three sources: +
+ Lemmatizes the input into its base form.
+ Call with e.g. @controllers.routes.LexicalAnalysisController.baseformGET(Some("Albert osti fagotin ja töräytti puhkuvan melodian."),Some(new java.util.Locale("fi")))
+ or just @controllers.routes.LexicalAnalysisController.baseformGET(Some("The quick brown fox jumps over the lazy dog"),None) to guess locale.
+ Also available using HTTP POST with parameters given either as form-urlencoded or JSON. For intensive use, there is also a JSON-understanding WebSocket-version at @controllers.routes.LexicalAnalysisController.baseformWS. All methods are CORS-enabled.
+ Returns results as JSON (e.g. @ctrl.baseform(Some("Albert osti fagotin ja töräytti puhkuvan melodian."),Some(new java.util.Locale("fi"))).right.get.right.get
or @ctrl.baseform(Some("The quick brown fox jumps over the lazy dog"),None).right.get.right.get
)
+ When called without parameters but with an Accept header other than text/html, returns the @((supportedSnowballLocales++supportedHFSTBaseformLocales).toSeq.distinct.size) supported locales as JSON.
+
+ Uses finite state transducers provided by the HFST, Omorfi and Giellatekno projects where available (locales @supportedHFSTBaseformLocales.toSeq.sorted.mkString(", ")). Note that the quality and scope of the lemmatization varies wildly between languages.
+ Snowball stemmers are used for locales @((supportedSnowballLocales.toSeq diff supportedHFSTBaseformLocales.toSeq).sorted.mkString(", ")) (not used: @((supportedSnowballLocales.toSeq intersect supportedHFSTBaseformLocales.toSeq).sorted.mkString(", ")))
+
+ Gives a morphological analysis of the text. Call with e.g. @controllers.routes.LexicalAnalysisController.analyzeGET(Some("Albert osti"),Some(new java.util.Locale("fi")))
+ or just @controllers.routes.LexicalAnalysisController.analyzeGET(Some("Bier bitte"),None) to guess locale.
+ Also available using HTTP POST with parameters given either as form-urlencoded or JSON. For intensive use, there is also a JSON-understanding WebSocket-version at @controllers.routes.LexicalAnalysisController.analyzeWS. All methods are CORS-enabled.
+ Returns results as JSON, e.g.:
+
@play.api.libs.json.Json.prettyPrint(ctrl.analyze(Some("Albert osti"),Some(new java.util.Locale("fi"))).right.get.right.get)or +
@play.api.libs.json.Json.prettyPrint(ctrl.analyze(Some("Bier bitte"),None).right.get.right.get)+ When called without parameters but with an Accept header other than text/html, returns the @(supportedHFSTAnalyzeLocales.size) supported locales as JSON (e.g.
@ctrl.analyze(None,None).left.get._1
).
+
+ + Uses finite state transducers provided by the HFST, Omorfi and Giellatekno projects. Note that the quality and scope of analysis as well as tags returned vary wildly between languages. +
+
+ Transforms the text given a set of inflection forms, by default also converting words not matching the inflection forms to their base form. Call with e.g. @controllers.routes.LexicalAnalysisController.inflectGET(Some("Albert osti fagotin"),List("V N Nom Sg","N Nom Pl"),true,Some(new java.util.Locale("fi")))
+ or @controllers.routes.LexicalAnalysisController.inflectGET(Some("Albert osti fagotin"),List("V N Nom Sg","N Nom Pl"),false,Some(new java.util.Locale("fi")))
+ Also available using HTTP POST with parameters given either as form-urlencoded or JSON. For intensive use, there is also a JSON-understanding WebSocket-version at @controllers.routes.LexicalAnalysisController.inflectWS. All methods are CORS-enabled.
+ Returns results as JSON (e.g. @ctrl.inflect(Some("Albert osti fagotin"),List("V N Nom Sg","N Nom Pl"),true,Some(new java.util.Locale("fi"))).right.get.right.get
)
+ When called without parameters but with an Accept header other than text/html, returns the @(supportedHFSTInflectionLocales.size) supported locales as JSON (e.g. @ctrl.inflect(None,Seq.empty,false,None).left.get._1
).
+
+ Uses finite state transducers provided by the HFST, Omorfi and Giellatekno projects. Note that the inflection form syntaxes differ wildly between languages. +
+
+ Hyphenates the given text. Call with e.g. @controllers.routes.LexicalAnalysisController.hyphenateGET(Some("Albert osti fagotin ja töräytti puhkuvan melodian."),Some(new java.util.Locale("fi")))
+ or just @controllers.routes.LexicalAnalysisController.hyphenateGET(Some("ein Bier bitte"),None) to guess locale.
+ Also available using HTTP POST with parameters given either as form-urlencoded or JSON. For intensive use, there is also a JSON-understanding WebSocket-version at @controllers.routes.LexicalAnalysisController.hyphenateWS. All methods are CORS-enabled.
+ Returns results as JSON (e.g. @ctrl.hyphenate(Some("Albert osti fagotin ja töräytti puhkuvan melodian."),Some(new java.util.Locale("fi"))).right.get.right.get
or @ctrl.hyphenate(Some("ein Bier bitte"),None).right.get.right.get
)
+ When called without parameters but with an Accept header other than text/html, returns the @(supportedHFSTHyphenationLocales.size) supported locales as JSON, e.g.:
+
@ctrl.hyphenate(None,None).left.get._1+ +
+ Uses finite state transducers provided by the HFST, Omorfi and Giellatekno projects. Those provided by HFST have been automatically translated from the TeX CTAN distribution's hyphenation rulesets. +
+