@@ -40,6 +40,8 @@ type BulkHandler struct {
4040 writer BulkWriter
4141 indexRender IndexRenderFn
4242 transformers map [string ]etl.TransformFn
43+
44+ fields * define.ETLRecordFields
4345}
4446
4547func (b * BulkHandler ) makeRecordID (values map [string ]interface {}) string {
@@ -100,6 +102,9 @@ func (b *BulkHandler) Handle(ctx context.Context, payload define.Payload, killCh
100102 }
101103 }
102104
105+ if b .fields != nil {
106+ etlRecord = b .fields .Filter (etlRecord )
107+ }
103108 return & etlRecord , utils .ParseTimeStamp (* etlRecord .Time ), true
104109}
105110
@@ -170,6 +175,24 @@ func (b *BulkHandler) flush(ctx context.Context, index string, records Records)
170175 return count , errs .AsError ()
171176}
172177
178+ func (b * BulkHandler ) grouping (records Records ) Records {
179+ if b .fields == nil || len (b .fields .GroupKeys ) <= 0 {
180+ return records
181+ }
182+
183+ uniq := make (map [uint64 ]* Record )
184+ for _ , record := range records {
185+ uid := b .fields .GroupID (record .Document )
186+ uniq [uid ] = record
187+ }
188+
189+ dst := make (Records , 0 , len (uniq ))
190+ for _ , item := range uniq {
191+ dst = append (dst , item )
192+ }
193+ return dst
194+ }
195+
173196// Flush :
174197func (b * BulkHandler ) Flush (ctx context.Context , results []interface {}) (count int , err error ) {
175198 lastIndex := ""
@@ -193,9 +216,12 @@ func (b *BulkHandler) Flush(ctx context.Context, results []interface{}) (count i
193216
194217 logging .Debugf ("backend %v ready to flush record %#v to index %s" , b , record , index )
195218
219+ // TODO(mando): grouping 会导致实际写入数量低于 results 数量
220+ // 但实际上并非写入失败
221+
196222 // 处理跨时间间隔
197223 if index != lastIndex && lastIndex != "" {
198- cnt , err := b .flush (ctx , lastIndex , records )
224+ cnt , err := b .flush (ctx , lastIndex , b . grouping ( records ) )
199225 records = records [:0 ]
200226 count += cnt
201227 errs .Add (err )
@@ -205,14 +231,18 @@ func (b *BulkHandler) Flush(ctx context.Context, results []interface{}) (count i
205231 }
206232
207233 if len (records ) > 0 {
208- cnt , err := b .flush (ctx , lastIndex , records )
234+ cnt , err := b .flush (ctx , lastIndex , b . grouping ( records ) )
209235 count += cnt
210236 errs .Add (err )
211237 }
212238
213239 return count , errs .AsError ()
214240}
215241
242+ func (b * BulkHandler ) SetETLRecordFields (f * define.ETLRecordFields ) {
243+ b .fields = f
244+ }
245+
216246// Close :
217247func (b * BulkHandler ) Close () error {
218248 return b .writer .Close ()
@@ -268,7 +298,7 @@ func NewBulkHandler(cluster *config.ElasticSearchMetaClusterInfo, table *config.
268298}
269299
270300// NewBackend :
271- func NewBackend (ctx context.Context , name string , maxQps int ) (define.Backend , error ) {
301+ func NewBackend (ctx context.Context , name string , options * utils. MapHelper ) (define.Backend , error ) {
272302 conf := config .FromContext (ctx )
273303 resultTable := config .ResultTableConfigFromContext (ctx )
274304
@@ -300,6 +330,7 @@ func NewBackend(ctx context.Context, name string, maxQps int) (define.Backend, e
300330 return nil , err
301331 }
302332
333+ maxQps , _ := options .GetInt (config .PipelineConfigOptMaxQps )
303334 return pipeline .NewBulkBackendDefaultAdapter (ctx , name , bulk , maxQps ), nil
304335}
305336
@@ -318,7 +349,6 @@ func init() {
318349 }
319350
320351 options := utils .NewMapHelper (rt .Option )
321- maxQps , _ := options .GetInt (config .PipelineConfigOptMaxQps )
322- return NewBackend (ctx , rt .FormatName (name ), maxQps )
352+ return NewBackend (ctx , rt .FormatName (name ), options )
323353 })
324354}
0 commit comments