Skip to content

Commit 870fe73

Browse files
sandeepsukhanitomwilkie
authored andcommitted
Refactored limits to be able to define them dynamically when Cortex is vendored in another project (#1549)
* Refactored limits to be able to define them dynamically when vendoring in Cortex Added OverridesManager which would store default limits and per tenant overrides as an interface Factory method for creating OverridesManager accepts overrides reload config, method to load overrides from yaml file and default limit Sample and Labels validation methods now accept any type which implement required methods to get required limits for validation Signed-off-by: Sandeep Sukhani <[email protected]> * renamed receiver for overrides manager methods Signed-off-by: Sandeep Sukhani <[email protected]> * added comment regarding loading yaml Signed-off-by: Sandeep Sukhani <[email protected]> * exported OverridesLoader for godoc and removed an unused interface Signed-off-by: Sandeep Sukhani <[email protected]> * added struct to hold config for overrides manager Signed-off-by: Sandeep Sukhani <[email protected]> * Added a todo for moving overrides loader to OverridesManager when a bug in yamlv3 decoder is fixed Signed-off-by: Sandeep Sukhani <[email protected]>
1 parent 0460dad commit 870fe73

File tree

7 files changed

+393
-288
lines changed

7 files changed

+393
-288
lines changed

pkg/distributor/distributor.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,15 +331,15 @@ func (d *Distributor) Push(ctx context.Context, req *client.WriteRequest) (*clie
331331
}
332332

333333
labelsHistogram.Observe(float64(len(ts.Labels)))
334-
if err := d.limits.ValidateLabels(userID, ts.Labels); err != nil {
334+
if err := validation.ValidateLabels(d.limits, userID, ts.Labels); err != nil {
335335
lastPartialErr = err
336336
continue
337337
}
338338

339339
metricName, _ := extract.MetricNameFromLabelAdapters(ts.Labels)
340340
samples := make([]client.Sample, 0, len(ts.Samples))
341341
for _, s := range ts.Samples {
342-
if err := d.limits.ValidateSample(userID, metricName, s); err != nil {
342+
if err := validation.ValidateSample(d.limits, userID, metricName, s); err != nil {
343343
lastPartialErr = err
344344
continue
345345
}

pkg/distributor/distributor_test.go

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ func TestDistributorPush(t *testing.T) {
9797
} {
9898
for _, shardByAllLabels := range []bool{true, false} {
9999
t.Run(fmt.Sprintf("[%d](shardByAllLabels=%v)", i, shardByAllLabels), func(t *testing.T) {
100-
d := prepare(t, tc.numIngesters, tc.happyIngesters, 0, shardByAllLabels)
100+
d := prepare(t, tc.numIngesters, tc.happyIngesters, 0, shardByAllLabels, nil)
101101
defer d.Stop()
102102

103103
request := makeWriteRequest(tc.samples)
@@ -150,8 +150,11 @@ func TestDistributorPushHAInstances(t *testing.T) {
150150
} {
151151
for _, shardByAllLabels := range []bool{true, false} {
152152
t.Run(fmt.Sprintf("[%d](shardByAllLabels=%v)", i, shardByAllLabels), func(t *testing.T) {
153-
d := prepare(t, 1, 1, 0, shardByAllLabels)
154-
d.limits.Defaults.AcceptHASamples = true
153+
var limits validation.Limits
154+
flagext.DefaultValues(&limits)
155+
limits.AcceptHASamples = true
156+
157+
d := prepare(t, 1, 1, 0, shardByAllLabels, &limits)
155158
codec := codec.Proto{Factory: ProtoReplicaDescFactory}
156159
mock := kv.PrefixClient(consul.NewInMemoryClient(codec), "prefix")
157160

@@ -273,7 +276,7 @@ func TestDistributorPushQuery(t *testing.T) {
273276

274277
for _, tc := range testcases {
275278
t.Run(tc.name, func(t *testing.T) {
276-
d := prepare(t, tc.numIngesters, tc.happyIngesters, 0, tc.shardByAllLabels)
279+
d := prepare(t, tc.numIngesters, tc.happyIngesters, 0, tc.shardByAllLabels, nil)
277280
defer d.Stop()
278281

279282
request := makeWriteRequest(tc.samples)
@@ -305,7 +308,7 @@ func TestSlowQueries(t *testing.T) {
305308
if nIngesters-happy > 1 {
306309
expectedErr = promql.ErrStorage{Err: errFail}
307310
}
308-
d := prepare(t, nIngesters, happy, 100*time.Millisecond, shardByAllLabels)
311+
d := prepare(t, nIngesters, happy, 100*time.Millisecond, shardByAllLabels, nil)
309312
defer d.Stop()
310313

311314
_, err := d.Query(ctx, 0, 10, nameMatcher)
@@ -317,7 +320,7 @@ func TestSlowQueries(t *testing.T) {
317320
}
318321
}
319322

320-
func prepare(t *testing.T, numIngesters, happyIngesters int, queryDelay time.Duration, shardByAllLabels bool) *Distributor {
323+
func prepare(t *testing.T, numIngesters, happyIngesters int, queryDelay time.Duration, shardByAllLabels bool, limits *validation.Limits) *Distributor {
321324
ingesters := []mockIngester{}
322325
for i := 0; i < happyIngesters; i++ {
323326
ingesters = append(ingesters, mockIngester{
@@ -355,16 +358,20 @@ func prepare(t *testing.T, numIngesters, happyIngesters int, queryDelay time.Dur
355358
}
356359

357360
var cfg Config
358-
var limits validation.Limits
359361
var clientConfig client.Config
360-
flagext.DefaultValues(&cfg, &limits, &clientConfig)
362+
flagext.DefaultValues(&cfg, &clientConfig)
363+
364+
if limits == nil {
365+
limits = &validation.Limits{}
366+
flagext.DefaultValues(limits)
367+
}
361368
limits.IngestionRate = 20
362369
limits.IngestionBurstSize = 20
363370
cfg.ingesterClientFactory = factory
364371
cfg.ShardByAllLabels = shardByAllLabels
365372
cfg.ExtraQueryDelay = 50 * time.Millisecond
366373

367-
overrides, err := validation.NewOverrides(limits)
374+
overrides, err := validation.NewOverrides(*limits)
368375
require.NoError(t, err)
369376

370377
d, err := New(cfg, clientConfig, overrides, ring)
@@ -694,13 +701,16 @@ func TestDistributorValidation(t *testing.T) {
694701
},
695702
} {
696703
t.Run(strconv.Itoa(i), func(t *testing.T) {
697-
d := prepare(t, 3, 3, 0, true)
698-
defer d.Stop()
704+
var limits validation.Limits
705+
flagext.DefaultValues(&limits)
706+
707+
limits.CreationGracePeriod = 2 * time.Hour
708+
limits.RejectOldSamples = true
709+
limits.RejectOldSamplesMaxAge = 24 * time.Hour
710+
limits.MaxLabelNamesPerSeries = 2
699711

700-
d.limits.Defaults.CreationGracePeriod = 2 * time.Hour
701-
d.limits.Defaults.RejectOldSamples = true
702-
d.limits.Defaults.RejectOldSamplesMaxAge = 24 * time.Hour
703-
d.limits.Defaults.MaxLabelNamesPerSeries = 2
712+
d := prepare(t, 3, 3, 0, true, &limits)
713+
defer d.Stop()
704714

705715
_, err := d.Push(ctx, client.ToWriteRequest(tc.samples, client.API))
706716
require.Equal(t, tc.err, err)

pkg/util/validation/limits.go

Lines changed: 181 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ package validation
22

33
import (
44
"flag"
5+
"os"
56
"time"
7+
8+
"gopkg.in/yaml.v2"
69
)
710

811
// Limits describe all the limits for users; can be used to describe global default
@@ -73,7 +76,184 @@ func (l *Limits) UnmarshalYAML(unmarshal func(interface{}) error) error {
7376
// We want to set c to the defaults and then overwrite it with the input.
7477
// To make unmarshal fill the plain data struct rather than calling UnmarshalYAML
7578
// again, we have to hide it using a type indirection. See prometheus/config.
76-
*l = defaultLimits
79+
80+
// During startup we wont have a default value so we don't want to overwrite them
81+
if defaultLimits != nil {
82+
*l = *defaultLimits
83+
}
7784
type plain Limits
7885
return unmarshal((*plain)(l))
7986
}
87+
88+
// When we load YAML from disk, we want the various per-customer limits
89+
// to default to any values specified on the command line, not default
90+
// command line values. This global contains those values. I (Tom) cannot
91+
// find a nicer way I'm afraid.
92+
var defaultLimits *Limits
93+
94+
// Overrides periodically fetch a set of per-user overrides, and provides convenience
95+
// functions for fetching the correct value.
96+
type Overrides struct {
97+
overridesManager *OverridesManager
98+
}
99+
100+
// NewOverrides makes a new Overrides.
101+
// We store the supplied limits in a global variable to ensure per-tenant limits
102+
// are defaulted to those values. As such, the last call to NewOverrides will
103+
// become the new global defaults.
104+
func NewOverrides(defaults Limits) (*Overrides, error) {
105+
defaultLimits = &defaults
106+
overridesManagerConfig := OverridesManagerConfig{
107+
OverridesReloadPeriod: defaults.PerTenantOverridePeriod,
108+
OverridesLoadPath: defaults.PerTenantOverrideConfig,
109+
OverridesLoader: loadOverrides,
110+
Defaults: &defaults,
111+
}
112+
113+
overridesManager, err := NewOverridesManager(overridesManagerConfig)
114+
if err != nil {
115+
return nil, err
116+
}
117+
118+
return &Overrides{
119+
overridesManager: overridesManager,
120+
}, nil
121+
}
122+
123+
// Stop background reloading of overrides.
124+
func (o *Overrides) Stop() {
125+
o.overridesManager.Stop()
126+
}
127+
128+
// IngestionRate returns the limit on ingester rate (samples per second).
129+
func (o *Overrides) IngestionRate(userID string) float64 {
130+
return o.overridesManager.GetLimits(userID).(*Limits).IngestionRate
131+
}
132+
133+
// IngestionBurstSize returns the burst size for ingestion rate.
134+
func (o *Overrides) IngestionBurstSize(userID string) int {
135+
return o.overridesManager.GetLimits(userID).(*Limits).IngestionBurstSize
136+
}
137+
138+
// AcceptHASamples returns whether the distributor should track and accept samples from HA replicas for this user.
139+
func (o *Overrides) AcceptHASamples(userID string) bool {
140+
return o.overridesManager.GetLimits(userID).(*Limits).AcceptHASamples
141+
}
142+
143+
// HAReplicaLabel returns the replica label to look for when deciding whether to accept a sample from a Prometheus HA replica.
144+
func (o *Overrides) HAReplicaLabel(userID string) string {
145+
return o.overridesManager.GetLimits(userID).(*Limits).HAReplicaLabel
146+
}
147+
148+
// HAClusterLabel returns the cluster label to look for when deciding whether to accept a sample from a Prometheus HA replica.
149+
func (o *Overrides) HAClusterLabel(userID string) string {
150+
return o.overridesManager.GetLimits(userID).(*Limits).HAClusterLabel
151+
}
152+
153+
// MaxLabelNameLength returns maximum length a label name can be.
154+
func (o *Overrides) MaxLabelNameLength(userID string) int {
155+
return o.overridesManager.GetLimits(userID).(*Limits).MaxLabelNameLength
156+
}
157+
158+
// MaxLabelValueLength returns maximum length a label value can be. This also is
159+
// the maximum length of a metric name.
160+
func (o *Overrides) MaxLabelValueLength(userID string) int {
161+
return o.overridesManager.GetLimits(userID).(*Limits).MaxLabelValueLength
162+
}
163+
164+
// MaxLabelNamesPerSeries returns maximum number of label/value pairs timeseries.
165+
func (o *Overrides) MaxLabelNamesPerSeries(userID string) int {
166+
return o.overridesManager.GetLimits(userID).(*Limits).MaxLabelNamesPerSeries
167+
}
168+
169+
// RejectOldSamples returns true when we should reject samples older than certain
170+
// age.
171+
func (o *Overrides) RejectOldSamples(userID string) bool {
172+
return o.overridesManager.GetLimits(userID).(*Limits).RejectOldSamples
173+
}
174+
175+
// RejectOldSamplesMaxAge returns the age at which samples should be rejected.
176+
func (o *Overrides) RejectOldSamplesMaxAge(userID string) time.Duration {
177+
return o.overridesManager.GetLimits(userID).(*Limits).RejectOldSamplesMaxAge
178+
}
179+
180+
// CreationGracePeriod is misnamed, and actually returns how far into the future
181+
// we should accept samples.
182+
func (o *Overrides) CreationGracePeriod(userID string) time.Duration {
183+
return o.overridesManager.GetLimits(userID).(*Limits).CreationGracePeriod
184+
}
185+
186+
// MaxSeriesPerQuery returns the maximum number of series a query is allowed to hit.
187+
func (o *Overrides) MaxSeriesPerQuery(userID string) int {
188+
return o.overridesManager.GetLimits(userID).(*Limits).MaxSeriesPerQuery
189+
}
190+
191+
// MaxSamplesPerQuery returns the maximum number of samples in a query (from the ingester).
192+
func (o *Overrides) MaxSamplesPerQuery(userID string) int {
193+
return o.overridesManager.GetLimits(userID).(*Limits).MaxSamplesPerQuery
194+
}
195+
196+
// MaxSeriesPerUser returns the maximum number of series a user is allowed to store.
197+
func (o *Overrides) MaxSeriesPerUser(userID string) int {
198+
return o.overridesManager.GetLimits(userID).(*Limits).MaxSeriesPerUser
199+
}
200+
201+
// MaxSeriesPerMetric returns the maximum number of series allowed per metric.
202+
func (o *Overrides) MaxSeriesPerMetric(userID string) int {
203+
return o.overridesManager.GetLimits(userID).(*Limits).MaxSeriesPerMetric
204+
}
205+
206+
// MaxChunksPerQuery returns the maximum number of chunks allowed per query.
207+
func (o *Overrides) MaxChunksPerQuery(userID string) int {
208+
return o.overridesManager.GetLimits(userID).(*Limits).MaxChunksPerQuery
209+
}
210+
211+
// MaxQueryLength returns the limit of the length (in time) of a query.
212+
func (o *Overrides) MaxQueryLength(userID string) time.Duration {
213+
return o.overridesManager.GetLimits(userID).(*Limits).MaxQueryLength
214+
}
215+
216+
// MaxQueryParallelism returns the limit to the number of sub-queries the
217+
// frontend will process in parallel.
218+
func (o *Overrides) MaxQueryParallelism(userID string) int {
219+
return o.overridesManager.GetLimits(userID).(*Limits).MaxQueryParallelism
220+
}
221+
222+
// EnforceMetricName whether to enforce the presence of a metric name.
223+
func (o *Overrides) EnforceMetricName(userID string) bool {
224+
return o.overridesManager.GetLimits(userID).(*Limits).EnforceMetricName
225+
}
226+
227+
// CardinalityLimit whether to enforce the presence of a metric name.
228+
func (o *Overrides) CardinalityLimit(userID string) int {
229+
return o.overridesManager.GetLimits(userID).(*Limits).CardinalityLimit
230+
}
231+
232+
// Loads overrides and returns the limits as an interface to store them in OverridesManager.
233+
// We need to implement it here since OverridesManager must store type Limits in an interface but
234+
// it doesn't know its definition to initialize it.
235+
// We could have used yamlv3.Node for this but there is no way to enforce strict decoding due to a bug in it
236+
// TODO: Use yamlv3.Node to move this to OverridesManager after https://github.com/go-yaml/yaml/issues/460 is fixed
237+
func loadOverrides(filename string) (map[string]interface{}, error) {
238+
f, err := os.Open(filename)
239+
if err != nil {
240+
return nil, err
241+
}
242+
243+
var overrides struct {
244+
Overrides map[string]*Limits `yaml:"overrides"`
245+
}
246+
247+
decoder := yaml.NewDecoder(f)
248+
decoder.SetStrict(true)
249+
if err := decoder.Decode(&overrides); err != nil {
250+
return nil, err
251+
}
252+
253+
overridesAsInterface := map[string]interface{}{}
254+
for userID := range overrides.Overrides {
255+
overridesAsInterface[userID] = overrides.Overrides[userID]
256+
}
257+
258+
return overridesAsInterface, nil
259+
}

0 commit comments

Comments
 (0)