Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[add] 全文搜索 & 修复空间收藏BUG #341

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions app/FragmentFormatter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package app

import (
"html"

"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight"
)

const Name = "mm-wiki"

//更换颜色代码,可参考:https://html-color-codes.info/chinese/
const defaultHTMLHighlightBefore = "<span style=\"color:#F03F3F\">"
const defaultHTMLHighlightAfter = "</span>"

type FragmentFormatter struct {
before string
after string
}

func NewFragmentFormatter(before, after string) *FragmentFormatter {
return &FragmentFormatter{
before: before,
after: after,
}
}

func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
rv := ""
curr := f.Start
for _, termLocation := range orderedTermLocations {
if termLocation == nil {
continue
}
// make sure the array positions match
if !termLocation.ArrayPositions.Equals(f.ArrayPositions) {
continue
}
if termLocation.Start < curr {
continue
}
if termLocation.End > f.End {
break
}
// add the stuff before this location
rv += html.EscapeString(string(f.Orig[curr:termLocation.Start]))
// start the <mark> tag
rv += a.before
// add the term itself
rv += html.EscapeString(string(f.Orig[termLocation.Start:termLocation.End]))
// end the <mark> tag
rv += a.after
// update current
curr = termLocation.End
}
// add any remaining text after the last token
rv += html.EscapeString(string(f.Orig[curr:f.End]))

return rv
}

func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.FragmentFormatter, error) {
before := defaultHTMLHighlightBefore
beforeVal, ok := config["before"].(string)
if ok {
before = beforeVal
}
after := defaultHTMLHighlightAfter
afterVal, ok := config["after"].(string)
if ok {
after = afterVal
}
return NewFragmentFormatter(before, after), nil
}

func initFragmentFormatter() {
registry.RegisterFragmentFormatter(Name, Constructor)
}
30 changes: 30 additions & 0 deletions app/analyzer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package app

import (
"errors"

"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)

type JiebaAnalyzer struct {
}

func analyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
tokenizerName, ok := config["tokenizer"].(string)
if !ok {
return nil, errors.New("must specify tokenizer")
}
tokenizer, err := cache.TokenizerNamed(tokenizerName)
if err != nil {
return nil, err
}
alz := &analysis.Analyzer{
Tokenizer: tokenizer,
}
return alz, nil
}

func initAnalyzer() {
registry.RegisterAnalyzer("gojieba", analyzerConstructor)
}
74 changes: 43 additions & 31 deletions app/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,22 @@ package app
import (
"flag"
"fmt"
"log"
"os"
"path"
"path/filepath"
"time"

"github.com/astaxie/beego"
"github.com/astaxie/beego/logs"
"github.com/blevesearch/bleve/v2"
"github.com/fatih/color"
"github.com/phachon/mm-wiki/app/models"
"github.com/phachon/mm-wiki/app/services"
"github.com/phachon/mm-wiki/app/utils"
"github.com/phachon/mm-wiki/app/work"
"github.com/phachon/mm-wiki/global"
"github.com/snail007/go-activerecord/mysql"
"log"
"os"
"path"
"path/filepath"
"time"
"github.com/yanyiwu/gojieba"
)

var (
Expand Down Expand Up @@ -53,8 +56,13 @@ func init() {
initDB()
checkUpgrade()
initDocumentDir()
//initSearch()
//initWork()
// initTokenFilter()
initTokenizer()
initAnalyzer()
initSearch()
initFragmentFormatter()
initHighlighter()
initWork()
StartTime = time.Now().Unix()
}

Expand Down Expand Up @@ -240,32 +248,36 @@ func checkUpgrade() {
}

func initSearch() {

gseFile := filepath.Join(RootDir, "docs/search_dict/dictionary.txt")
stopFile := filepath.Join(RootDir, "docs/search_dict/stop_tokens.txt")
ok, _ := utils.File.PathIsExists(gseFile)
if !ok {
logs.Error("search dict file " + gseFile + " is not exists!")
os.Exit(1)
os.RemoveAll("mm-wiki.bleve")
//选择搜索引擎
err := global.SearchMap.AddCustomTokenizer("gojieba",
map[string]interface{}{
"dictpath": gojieba.DICT_PATH,
"hmmpath": gojieba.HMM_PATH,
"userdictpath": "./docs/search_dict/dictionary.txt",
"idf": gojieba.IDF_PATH,
"stop_words": "./docs/search_dict/stop_tokens.txt",
"type": "gojieba",
},
)
if err != nil {
panic(err)
}
ok, _ = utils.File.PathIsExists(stopFile)
if !ok {
logs.Error("search stop dict file " + stopFile + " is not exists!")
os.Exit(1)
err = global.SearchMap.AddCustomAnalyzer("gojieba",
map[string]interface{}{
"type": "gojieba",
"tokenizer": "gojieba",
},
)
if err != nil {
panic(err)
}
//global.DocSearcher.Init(types.EngineOpts{
// UseStore: true,
// StoreFolder: SearchIndexAbsDir,
// Using: 3,
// //GseDict: "zh",
// GseDict: gseFile,
// StopTokenFile: stopFile,
// IndexerOpts: &types.IndexerOpts{
// IndexType: types.LocsIndex,
// },
//})
global.SearchMap.DefaultAnalyzer = "gojieba"

global.SearchIndex, err = bleve.New("mm-wiki.bleve", global.SearchMap)
services.DocIndexService.UpdateAllDocIndex(50)
}

func initWork() {
work.DocSearchWorker.Start()
go services.DocIndexService.CheckDocIndexs()
}
71 changes: 45 additions & 26 deletions app/controllers/main.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
package controllers

import (
"math"
"sort"
"strings"

"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/search/query"
"github.com/phachon/mm-wiki/app/models"
"github.com/phachon/mm-wiki/global"
)

type MainController struct {
Expand Down Expand Up @@ -43,7 +48,6 @@ func (this *MainController) Default() {
limit := (page - 1) * number

userId := this.UserId

logDocuments, err := models.LogDocumentModel.GetLogDocumentsByLimit(userId, limit, number)
if err != nil {
this.ErrorLog("查找更新文档列表失败:" + err.Error())
Expand Down Expand Up @@ -128,7 +132,6 @@ func (this *MainController) Search() {

keyword := strings.TrimSpace(this.GetString("keyword", ""))
searchType := this.GetString("search_type", "content")

this.Data["search_type"] = searchType
this.Data["keyword"] = keyword
this.Data["count"] = 0
Expand Down Expand Up @@ -159,31 +162,45 @@ func (this *MainController) Search() {
}
}
searchDocContents := make(map[string]string)
DocScore := make(map[string]float64)
var searchDocIds []string
// 默认根据内容搜索
// v0.2.1 下线全文搜索功能
searchType = "title"
//if searchType == "title" {
// documents, err = models.DocumentModel.GetDocumentsByLikeName(keyword)
//} else {
// searchRes := global.DocSearcher.SearchDoc(types.SearchReq{Text: keyword})
// searchDocIds := []string{}
// for _, searchDoc := range searchRes.Docs {
// if len(searchDoc.TokenSnippetLocs) == 0 {
// continue
// }
// docId := searchDoc.DocId
// content := searchDoc.Content
// locIndex := searchDoc.TokenSnippetLocs[0]
// searchContent := utils.Misc.SubStrUnicodeBySubStrIndex(content, keyword, locIndex, 30, 30)
// searchDocContents[docId] = searchContent
// searchDocIds = append(searchDocIds, docId)
// }
// documents, err = models.DocumentModel.GetDocumentsByDocumentIds(searchDocIds)
//}
documents, err = models.DocumentModel.GetDocumentsByLikeName(keyword)
if err != nil {
this.ErrorLog("搜索文档出错:" + err.Error())
this.ViewError("搜索文档错误!")
if searchType == "title" {
documents, err = models.DocumentModel.GetDocumentsByLikeName(keyword)
if err != nil {
this.ErrorLog("搜索文档出错:" + err.Error())
this.ViewError("搜索文档错误!")
}
} else {
// 提取关键词,支持多关键词搜索
keyList := strings.Split(keyword, " ")
queryList := []query.Query{}
for _, key := range keyList {
keyQuery := bleve.NewMatchQuery(key)
queryList = append(queryList, keyQuery)
}
query := bleve.NewConjunctionQuery(queryList...)
// 开始全文搜索
req := bleve.NewSearchRequestOptions(query, math.MaxInt32, 0, true)
req.Highlight = bleve.NewHighlightWithStyle("mm-wiki")
searchDoc, err := global.SearchIndex.Search(req)
if err != nil {
this.ErrorLog("fail to Search file, err:" + err.Error())
this.ViewError("搜索文档错误!")
}
// 规范化返回结果
for _, searchDoc := range searchDoc.Hits {
resultText := searchDoc.Fragments["Content"][0]
searchDocContents[searchDoc.ID] = resultText
searchDocIds = append(searchDocIds, searchDoc.ID)
DocScore[searchDoc.ID] = searchDoc.Score
}
documents, err = models.DocumentModel.GetDocumentsByDocumentIds(searchDocIds)
if err != nil {
this.ErrorLog("搜索文档出错:" + err.Error())
this.ViewError("搜索文档错误!")
}
}
// 过滤一下没权限的空间
realDocuments := []map[string]string{}
Expand All @@ -202,7 +219,9 @@ func (this *MainController) Search() {
}
realDocuments = append(realDocuments, document)
}

sort.Slice(realDocuments, func(i, j int) bool {
return DocScore[realDocuments[i]["document_id"]] > DocScore[realDocuments[j]["document_id"]]
})
this.Data["search_type"] = searchType
this.Data["keyword"] = keyword
this.Data["documents"] = realDocuments
Expand Down
33 changes: 33 additions & 0 deletions app/highlight.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package app

import (
"fmt"

"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight"
simpleFragmenter "github.com/blevesearch/bleve/v2/search/highlight/fragmenter/simple"
simpleHighlighter "github.com/blevesearch/bleve/v2/search/highlight/highlighter/simple"
)

func HighlighterConstructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {

fragmenter, err := cache.FragmenterNamed(simpleFragmenter.Name)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}

formatter, err := cache.FragmentFormatterNamed(Name)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}

return simpleHighlighter.NewHighlighter(
fragmenter,
formatter,
simpleHighlighter.DefaultSeparator),
nil
}

func initHighlighter() {
registry.RegisterHighlighter(Name, HighlighterConstructor)
}
Loading