Skip to content

Commit 430fe7a

Browse files
Lars Maierneunhoef
Lars Maier
authored andcommitted
Feature/custom analzyers (#216)
* Added test for views in backup. * Added internal support for custom analzyers.
1 parent d66ef1f commit 430fe7a

File tree

3 files changed

+153
-1
lines changed

3 files changed

+153
-1
lines changed

go.mod

+1
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ require (
66
github.com/arangodb/go-velocypack v0.0.0-20190129082528-7896a965b4ad
77
github.com/coreos/go-iptables v0.4.3
88
github.com/dgrijalva/jwt-go v3.2.0+incompatible
9+
github.com/stretchr/testify v1.2.2
910
github.com/pkg/errors v0.8.1
1011
)

test/view_test.go

+71
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ import (
2828
"testing"
2929

3030
driver "github.com/arangodb/go-driver"
31+
32+
"github.com/stretchr/testify/require"
3133
)
3234

3335
// ensureArangoSearchView is a helper to check if an arangosearch view exists and create it if needed.
@@ -694,3 +696,72 @@ func TestArangoSearchPrimarySort(t *testing.T) {
694696
})
695697
}
696698
}
699+
700+
func newBool(v bool) *bool {
701+
return &v
702+
}
703+
704+
// TestArangoSearchViewProperties353 tests for custom analyzers.
705+
func TestArangoSearchViewProperties353(t *testing.T) {
706+
ctx := context.Background()
707+
c := createClientFromEnv(t, true)
708+
skipNoCluster(c, t) // analyzers can only be read in the
709+
skipBelowVersion(c, "3.5.3", t)
710+
db := ensureDatabase(ctx, c, "view_test", nil, t)
711+
colname := "someCol"
712+
ensureCollection(ctx, db, colname, nil, t)
713+
name := "test_get_asview_353"
714+
analyzerName := "myanalyzer"
715+
opts := &driver.ArangoSearchViewProperties{
716+
Links: driver.ArangoSearchLinks{
717+
colname: driver.ArangoSearchElementProperties{
718+
AnalyzerDefinitions: []driver.ArangoSearchAnalyzerDefinition{
719+
driver.ArangoSearchAnalyzerDefinition{
720+
Name: analyzerName,
721+
Type: driver.ArangoSearchAnalyzerTypeNorm,
722+
Properties: driver.ArangoSearchAnalyzerProperties{
723+
Locale: "en_US.utf-8",
724+
Case: driver.ArangoSearchCaseLower,
725+
},
726+
Features: []driver.ArangoSearchAnalyzerFeature{
727+
driver.ArangoSearchAnalyzerFeaturePosition,
728+
driver.ArangoSearchAnalyzerFeatureFrequency,
729+
},
730+
},
731+
},
732+
IncludeAllFields: newBool(true),
733+
},
734+
},
735+
}
736+
_, err := db.CreateArangoSearchView(ctx, name, opts)
737+
require.NoError(t, err)
738+
// Get view
739+
v, err := db.View(ctx, name)
740+
require.NoError(t, err)
741+
asv, err := v.ArangoSearchView()
742+
require.NoError(t, err)
743+
// Check asv properties
744+
p, err := asv.Properties(ctx)
745+
require.NoError(t, err)
746+
require.Contains(t, p.Links, colname)
747+
748+
// get cluster inventory
749+
cluster, err := c.Cluster(ctx)
750+
require.NoError(t, err)
751+
inv, err := cluster.DatabaseInventory(ctx, db)
752+
require.NoError(t, err)
753+
p2, found := inv.ViewByName(name)
754+
require.True(t, found)
755+
756+
require.Contains(t, p2.Links, colname)
757+
link := p2.Links[colname]
758+
require.Len(t, link.AnalyzerDefinitions, 2)
759+
analyzer := &link.AnalyzerDefinitions[1]
760+
require.EqualValues(t, analyzer.Name, analyzerName)
761+
require.EqualValues(t, analyzer.Type, driver.ArangoSearchAnalyzerTypeNorm)
762+
require.Len(t, analyzer.Features, 2)
763+
require.EqualValues(t, analyzer.Features[0], driver.ArangoSearchAnalyzerFeaturePosition)
764+
require.EqualValues(t, analyzer.Features[1], driver.ArangoSearchAnalyzerFeatureFrequency)
765+
require.EqualValues(t, analyzer.Properties.Locale, "en_US.utf-8")
766+
require.EqualValues(t, analyzer.Properties.Case, driver.ArangoSearchCaseLower)
767+
}

view_arangosearch.go

+81-1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,83 @@ type ArangoSearchView interface {
3939
SetProperties(ctx context.Context, options ArangoSearchViewProperties) error
4040
}
4141

42+
// ArangoSearchAnalyzerType specifies type of an analyzer
43+
type ArangoSearchAnalyzerType string
44+
45+
const (
46+
// ArangoSearchAnalyzerTypeIdentity treat value as atom (no transformation)
47+
ArangoSearchAnalyzerTypeIdentity ArangoSearchAnalyzerType = "identity"
48+
// ArangoSearchAnalyzerTypeDelimiter split into tokens at user-defined character
49+
ArangoSearchAnalyzerTypeDelimiter ArangoSearchAnalyzerType = "delimiter"
50+
// ArangoSearchAnalyzerTypeStem apply stemming to the value as a whole
51+
ArangoSearchAnalyzerTypeStem ArangoSearchAnalyzerType = "stem"
52+
// ArangoSearchAnalyzerTypeNorm apply normalization to the value as a whole
53+
ArangoSearchAnalyzerTypeNorm ArangoSearchAnalyzerType = "norm"
54+
// ArangoSearchAnalyzerTypeNGram create n-grams from value with user-defined lengths
55+
ArangoSearchAnalyzerTypeNGram ArangoSearchAnalyzerType = "ngram"
56+
// ArangoSearchAnalyzerTypeText tokenize into words, optionally with stemming, normalization and stop-word filtering
57+
ArangoSearchAnalyzerTypeText ArangoSearchAnalyzerType = "text"
58+
)
59+
60+
// ArangoSearchAnalyzerFeature specifies a feature to an analyzer
61+
type ArangoSearchAnalyzerFeature string
62+
63+
const (
64+
// ArangoSearchAnalyzerFeatureFrequency how often a term is seen, required for PHRASE()
65+
ArangoSearchAnalyzerFeatureFrequency ArangoSearchAnalyzerFeature = "frequency"
66+
// ArangoSearchAnalyzerFeatureNorm the field normalization factor
67+
ArangoSearchAnalyzerFeatureNorm ArangoSearchAnalyzerFeature = "norm"
68+
// ArangoSearchAnalyzerFeaturePosition sequentially increasing term position, required for PHRASE(). If present then the frequency feature is also required
69+
ArangoSearchAnalyzerFeaturePosition ArangoSearchAnalyzerFeature = "position"
70+
)
71+
72+
type ArangoSearchCaseType string
73+
74+
const (
75+
// ArangoSearchCaseUpper to convert to all lower-case characters
76+
ArangoSearchCaseUpper ArangoSearchCaseType = "upper"
77+
// ArangoSearchCaseLower to convert to all upper-case characters
78+
ArangoSearchCaseLower ArangoSearchCaseType = "lower"
79+
// ArangoSearchCaseNone to not change character case (default)
80+
ArangoSearchCaseNone ArangoSearchCaseType = "none"
81+
)
82+
83+
// ArangoSearchAnalyzerProperties specifies options for the analyzer. Which fields are required and
84+
// respected depends on the analyzer type.
85+
// more information can be found here: https://www.arangodb.com/docs/stable/arangosearch-analyzers.html#analyzer-properties
86+
type ArangoSearchAnalyzerProperties struct {
87+
// Locale used by Stem, Norm, Text
88+
Locale string `json:"locale,omitempty"`
89+
// Delimiter used by Delimiter
90+
Delimiter string `json:"delimiter,omitempty"`
91+
// Accent used by Norm, Text
92+
Accent *bool `json:"accent,omitempty"`
93+
// Case used by Norm, Text
94+
Case ArangoSearchCaseType `json:"case,omitempty"`
95+
96+
// Min used by NGram
97+
Min *int64 `json:"min,omitempty"`
98+
// Max used by NGram
99+
Max *int64 `json:"max,omitempty"`
100+
// PreserveOriginal used by NGram
101+
PreserveOriginal *int64 `json:"preserveOriginal,omitempty"`
102+
103+
// Stemming used by Text
104+
Stemming *bool `json:"stemming,omitempty"`
105+
// Stopword used by Text
106+
Stopwords []string `json:"stopwords,omitempty"`
107+
// StopwordsPath used by Text
108+
StopwordsPath []string `json:"stopwordsPath,omitempty"`
109+
}
110+
111+
// ArangoSearchAnalyzerDefinition provides definition of an analyzer
112+
type ArangoSearchAnalyzerDefinition struct {
113+
Name string `json:"name,omitempty"`
114+
Type ArangoSearchAnalyzerType `json:"type,omitempty"`
115+
Properties ArangoSearchAnalyzerProperties `json:"properties,omitempty"`
116+
Features []ArangoSearchAnalyzerFeature `json:"features,omitempty"`
117+
}
118+
42119
// ArangoSearchViewProperties contains properties an an ArangoSearch view.
43120
type ArangoSearchViewProperties struct {
44121
// CleanupIntervalStep specifies the minimum number of commits to wait between
@@ -96,7 +173,9 @@ type ArangoSearchViewProperties struct {
96173
type ArangoSearchSortDirection string
97174

98175
const (
99-
ArangoSearchSortDirectionAsc ArangoSearchSortDirection = "ASC"
176+
// ArangoSearchSortDirectionAsc sort ascending
177+
ArangoSearchSortDirectionAsc ArangoSearchSortDirection = "ASC"
178+
// ArangoSearchSortDirectionDesc sort descending
100179
ArangoSearchSortDirectionDesc ArangoSearchSortDirection = "DESC"
101180
)
102181

@@ -184,6 +263,7 @@ type ArangoSearchFields map[string]ArangoSearchElementProperties
184263
// Note that this structure is recursive. Settings not specified (nil)
185264
// at a given level will inherit their setting from a lower level.
186265
type ArangoSearchElementProperties struct {
266+
AnalyzerDefinitions []ArangoSearchAnalyzerDefinition `json:"analyzerDefinitions,omitempty"`
187267
// The list of analyzers to be used for indexing of string values. Defaults to ["identify"].
188268
Analyzers []string `json:"analyzers,omitempty"`
189269
// If set to true, all fields of this element will be indexed. Defaults to false.

0 commit comments

Comments
 (0)