5
5
6
6
"github.com/pilosa/go-pilosa"
7
7
"github.com/pilosa/go-pilosa/egpool"
8
+ "github.com/pilosa/pilosa/logger"
8
9
"github.com/pilosa/pilosa/roaring"
9
10
"github.com/pkg/errors"
10
11
)
@@ -102,16 +103,13 @@ type Batch struct {
102
103
// those keys map to.
103
104
toTranslateSets map [string ]map [string ][]int
104
105
105
- // for string ids which we weren't able to immediately translate,
106
- // keep a map of which record(s) each string id maps to.
107
- //
108
- // TODO:
109
- // this is probably super inefficient in the (common) case where
110
- // each record has a different string ID. In that case, a simple
111
- // slice of strings would probably work better.
112
- toTranslateID map [string ][]int
106
+ // toTranslateID maps each string key to a record index - this
107
+ // will get translated into Batch.rowIDs
108
+ toTranslateID []string
113
109
114
110
transCache Translator
111
+
112
+ log logger.Logger
115
113
}
116
114
117
115
func (b * Batch ) Len () int { return len (b .ids ) }
@@ -128,6 +126,13 @@ func OptTranslator(t Translator) BatchOption {
128
126
}
129
127
}
130
128
129
+ func OptLogger (l logger.Logger ) BatchOption {
130
+ return func (b * Batch ) error {
131
+ b .log = l
132
+ return nil
133
+ }
134
+ }
135
+
131
136
// NewBatch initializes a new Batch object which will use the given
132
137
// Pilosa client, index, set of fields, and will take "size" records
133
138
// before returning ErrBatchNowFull. The positions of the Fields in
@@ -180,8 +185,9 @@ func NewBatch(client *pilosa.Client, size int, index *pilosa.Index, fields []*pi
180
185
toTranslate : tt ,
181
186
toTranslateClear : make (map [int ]map [string ][]int ),
182
187
toTranslateSets : ttSets ,
183
- toTranslateID : make (map [string ][]int ),
184
188
transCache : NewMapTranslator (),
189
+
190
+ log : logger .NopLogger ,
185
191
}
186
192
if hasTime {
187
193
b .times = make ([]QuantizedTime , 0 , size )
@@ -314,12 +320,10 @@ func (b *Batch) Add(rec Row) error {
314
320
} else if ok {
315
321
b .ids = append (b .ids , colID )
316
322
} else {
317
- ints , ok := b .toTranslateID [rid ]
318
- if ! ok {
319
- ints = make ([]int , 0 )
323
+ if b .toTranslateID == nil {
324
+ b .toTranslateID = make ([]string , cap (b .ids ))
320
325
}
321
- ints = append (ints , len (b .ids ))
322
- b .toTranslateID [rid ] = ints
326
+ b .toTranslateID [len (b .ids )] = rid
323
327
b .ids = append (b .ids , 0 )
324
328
}
325
329
return nil
@@ -484,6 +488,13 @@ func (b *Batch) Add(rec Row) error {
484
488
default :
485
489
return errors .Errorf ("Clearing a value '%v' Type %[1]T is not currently supported (field '%s')" , val , field .Name ())
486
490
}
491
+ // nil extend b.rowIDs so we don't run into a horrible bug
492
+ // where we skip doing clears because b.rowIDs doesn't have a
493
+ // value for htis field
494
+ for len (b .rowIDs [i ]) <= curPos {
495
+ b .rowIDs [i ] = append (b .rowIDs [i ], nilSentinel )
496
+ }
497
+
487
498
}
488
499
489
500
if len (b .ids ) == cap (b .ids ) {
@@ -524,29 +535,29 @@ func (b *Batch) Import() error {
524
535
}
525
536
526
537
func (b * Batch ) doTranslation () error {
527
- var keys []string
538
+ keys := make ([]string , 0 )
539
+ var indexes []int
528
540
529
541
// translate column keys if there are any
530
- if len (b .toTranslateID ) > 0 {
531
- keys = make ([]string , 0 , len (b .toTranslateID ))
532
- for k := range b .toTranslateID {
542
+ for i , k := range b .toTranslateID {
543
+ if k != "" {
533
544
keys = append (keys , k )
545
+ indexes = append (indexes , i )
534
546
}
547
+ }
548
+ if len (keys ) > 0 {
549
+ start := time .Now ()
535
550
ids , err := b .client .TranslateColumnKeys (b .index , keys )
536
551
if err != nil {
537
552
return errors .Wrap (err , "translating col keys" )
538
553
}
554
+ b .log .Debugf ("translating %d column keys took %v" , len (keys ), time .Since (start ))
539
555
if err := b .transCache .AddCols (b .index .Name (), keys , ids ); err != nil {
540
556
return errors .Wrap (err , "adding cols to cache" )
541
557
}
542
- for j , key := range keys {
543
- id := ids [j ]
544
- for _ , recordIdx := range b .toTranslateID [key ] {
545
- b .ids [recordIdx ] = id
546
- }
558
+ for j , id := range ids {
559
+ b.ids [indexes [j ]] = id
547
560
}
548
- } else {
549
- keys = make ([]string , 0 )
550
561
}
551
562
552
563
// translate row keys
@@ -572,10 +583,12 @@ func (b *Batch) doTranslation() error {
572
583
}
573
584
574
585
// translate keys from Pilosa
586
+ start := time .Now ()
575
587
ids , err := b .client .TranslateRowKeys (b .headerMap [fieldName ], keys )
576
588
if err != nil {
577
589
return errors .Wrap (err , "translating row keys" )
578
590
}
591
+ b .log .Debugf ("translating %d row keys for %s took %v" , len (keys ), fieldName , time .Since (start ))
579
592
if err := b .transCache .AddRows (b .index .Name (), fieldName , keys , ids ); err != nil {
580
593
return errors .Wrap (err , "adding rows to cache" )
581
594
}
@@ -611,10 +624,12 @@ func (b *Batch) doTranslation() error {
611
624
continue
612
625
}
613
626
// translate keys from Pilosa
627
+ start := time .Now ()
614
628
ids , err := b .client .TranslateRowKeys (b .headerMap [fieldName ], keys )
615
629
if err != nil {
616
630
return errors .Wrap (err , "translating row keys (sets)" )
617
631
}
632
+ b .log .Debugf ("translating %d row keys(sets) for %s took %v" , len (keys ), fieldName , time .Since (start ))
618
633
if err := b .transCache .AddRows (b .index .Name (), fieldName , keys , ids ); err != nil {
619
634
return errors .Wrap (err , "adding rows to cache" )
620
635
}
@@ -645,15 +660,20 @@ func (b *Batch) doImport() error {
645
660
field := field
646
661
viewMap := viewMap
647
662
shard := shard
663
+
648
664
eg .Go (func () error {
649
665
clearViewMap := clearFrags .GetViewMap (shard , field )
650
666
if len (clearViewMap ) > 0 {
667
+ start := time .Now ()
651
668
err := b .client .ImportRoaringBitmap (b .index .Field (field ), shard , clearViewMap , true )
652
669
if err != nil {
653
670
return errors .Wrapf (err , "import clearing clearing data for %s" , field )
654
671
}
672
+ b .log .Debugf ("imp-roar-clr %s,shard:%d,views:%d %v" , field , shard , len (clearViewMap ), time .Since (start ))
655
673
}
674
+ start := time .Now ()
656
675
err := b .client .ImportRoaringBitmap (b .index .Field (field ), shard , viewMap , false )
676
+ b .log .Debugf ("imp-roar %s,shard:%d,views:%d %v" , field , shard , len (clearViewMap ), time .Since (start ))
657
677
return errors .Wrapf (err , "importing data for %s" , field )
658
678
})
659
679
}
@@ -845,7 +865,9 @@ func (b *Batch) importValueData() error {
845
865
return errors .Wrap (err , "encoding import values" )
846
866
}
847
867
eg .Go (func () error {
868
+ start := time .Now ()
848
869
err := b .client .DoImportValues (b .index .Name (), shard , path , data )
870
+ b .log .Debugf ("imp-vals %s,shard:%d,data:%d %v" , field , shard , len (data ), time .Since (start ))
849
871
return errors .Wrapf (err , "importing values for %s" , field )
850
872
})
851
873
startIdx = i
@@ -910,7 +932,9 @@ func (b *Batch) importMutexData() error {
910
932
return errors .Wrap (err , "encoding mutex import" )
911
933
}
912
934
eg .Go (func () error {
935
+ start := time .Now ()
913
936
err := b .client .DoImport (b .index .Name (), shard , path , data )
937
+ b .log .Debugf ("imp-vals %s,shard:%d,data:%d %v" , field , shard , len (data ), time .Since (start ))
914
938
return errors .Wrapf (err , "importing values for %s" , field )
915
939
})
916
940
startIdx = i
@@ -952,8 +976,8 @@ func (b *Batch) reset() {
952
976
delete (clearMap , k )
953
977
}
954
978
}
955
- for k := range b .toTranslateID {
956
- delete ( b .toTranslateID , k ) // TODO pool these slices
979
+ for i := range b .toTranslateID {
980
+ b .toTranslateID [ i ] = ""
957
981
}
958
982
for k := range b .values {
959
983
delete (b .values , k ) // TODO pool these slices
0 commit comments