forked from ironsweet/golucene
-
Notifications
You must be signed in to change notification settings - Fork 1
/
gl.go
106 lines (95 loc) · 3.51 KB
/
gl.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
package main
import (
"fmt"
"log"
"os"
"runtime"
std "github.com/jtejido/golucene/analysis/standard"
_ "github.com/jtejido/golucene/core/codec/lucene410"
"github.com/jtejido/golucene/core/document"
"github.com/jtejido/golucene/core/index"
"github.com/jtejido/golucene/core/index/model"
"github.com/jtejido/golucene/core/search"
"github.com/jtejido/golucene/core/search/similarities"
"github.com/jtejido/golucene/core/store"
"github.com/jtejido/golucene/core/util"
"github.com/jtejido/golucene/queryparser/classic"
)
func main() {
n := runtime.NumCPU()
var err error
err = util.SetDefaultInfoStream(util.NewPrintStreamInfoStream(os.Stdout))
if err != nil {
log.Fatalf("error: %s", err.Error())
}
index.DefaultSimilarity = func() index.Similarity {
return similarities.NewDefaultBM25Similarity()
}
directory, err := store.OpenFSDirectory("test_index")
if err != nil {
log.Fatalf("error: %s", err.Error())
}
analyzer := std.NewStandardAnalyzer()
conf := index.NewIndexWriterConfig(util.VERSION_LATEST, analyzer)
conf.MergeScheduler().(*index.ConcurrentMergeScheduler).SetMaxMergesAndRoutines(n, n)
writer, err := index.NewIndexWriter(directory, conf)
if err != nil {
log.Fatalf("error: %s", err.Error())
}
// here's a custom field type to test term vector indexing
ft := document.NewFieldType()
ft.SetIndexed(true)
ft.SetIndexOptions(model.INDEX_OPT_DOCS_ONLY)
ft.SetTokenized(true)
ft.SetStoreTermVectors(true)
ft.SetStoreTermVectorPositions(true)
ft.SetStored(true)
d := document.NewDocument()
d.Add(document.NewFieldFromString("body", "test 1 Lorem", ft))
writer.AddDocument(d.Fields())
d2 := document.NewDocument()
ft2 := document.NewFieldType()
ft2.SetIndexed(true)
ft2.SetTokenized(true)
ft2.SetStored(true)
ft2.SetStoreTermVectors(true)
ft2.SetStoreTermVectorPositions(true)
ft2.SetOmitNorms(true)
ft2.SetIndexOptions(model.INDEX_OPT_DOCS_ONLY)
ft2.SetNumericType(document.FIELD_TYPE_NUMERIC_INT)
ft2.SetNumericPrecisionStep(util.NUMERIC_PRECISION_STEP_DEFAULT_32)
// or just use below, reranking requires that we store term vectors so a custom fieldtype had to be created
// in order to set StoreTermVectors and StoreTermVectorPositions to true
// d2.Add(document.NewIntField("body", 2, STORE_YES))
d2.Add(document.NewIntFieldFromType("body", 2, ft2))
writer.AddDocument(d2.Fields())
d3 := document.NewDocument()
d3.Add(document.NewFieldFromString("body", "test 2 Lorem", ft))
writer.AddDocument(d3.Fields())
// writer.Commit()
writer.Close() // ensure index is written
reader, _ := index.OpenDirectoryReader(directory)
searcher := search.NewIndexSearcher(reader)
parser := classic.NewQueryParser(util.VERSION_LATEST, "body", analyzer)
// parser := simple.NewSimpleQueryParser(analyzer, "body")
// parser.SetDefaultOperator(search.MUST)
var q search.Query
// testing prefix
if q, err = parser.Parse(`2 NOT test`); err != nil {
log.Fatalf("error: %s", err.Error())
}
// reranking requires that you use STORE_YES as it iterates terms, it will panic on terms that isn't stored.
// exp := expansion.NewDefaultRocchioReranker(analyzer, "body")
// ctx := search.NewExpanderContext(searcher, q, "test")
// res, err := searcher.SearchExpand(q, nil, 1000, exp, ctx)
// if err != nil {
// log.Fatalf("error: %s", err.Error())
// }
res, _ := searcher.Search(q, nil, 1000)
fmt.Printf("Found %v hit(s).\n", res.TotalHits)
for _, hit := range res.ScoreDocs {
fmt.Printf("Doc %v score: %v\n", hit.Doc, hit.Score)
doc, _ := reader.Document(hit.Doc)
fmt.Printf("body -> %v\n", doc.Get("body"))
}
}