-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathbert_embedding.go
More file actions
114 lines (91 loc) · 2.49 KB
/
bert_embedding.go
File metadata and controls
114 lines (91 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
package embedding
import (
"fmt"
"log"
"github.com/buckhx/gobert/tokenize"
"github.com/buckhx/gobert/tokenize/vocab"
tf "github.com/tensorflow/tensorflow/tensorflow/go"
"github.com/jack139/go-infer/helper"
"github.com/jack139/go-infer/types"
)
/* 训练好的模型权重 */
var (
m *tf.SavedModel
voc vocab.Dict
)
/* 初始化模型 */
func initModel() error {
var err error
voc, err = vocab.FromFile(helper.Settings.Customer["BertVocabPath"])
if err != nil {
return err
}
m, err = tf.LoadSavedModel(helper.Settings.Customer["BertModelPath"], []string{"train"}, nil)
if err != nil {
return err
}
return nil
}
/* 判断是否是英文字符 */
func isAlpha(c byte) bool {
return (c>=65 && c<=90) || (c>=97 && c<=122)
}
/* 定义模型相关参数和方法 */
type BertEMB struct{ types.Base }
func (x *BertEMB) Init() error {
return initModel()
}
func (x *BertEMB) ApiPath() string {
return "/api/embedding"
}
func (x *BertEMB) ApiEntry(reqData *map[string]interface{}) (*map[string]interface{}, error) {
log.Println("ApiEntry_BertEMB")
// 检查参数
text, ok := (*reqData)["text"].(string)
if !ok {
return &map[string]interface{}{"code":1001}, fmt.Errorf("need text")
}
// 构建请求参数
reqDataMap := map[string]interface{}{
"text": text,
}
return &reqDataMap, nil
}
// Bert 推理
func (x *BertEMB) Infer(requestId string, reqData *map[string]interface{}) (*map[string]interface{}, error) {
log.Println("Infer_BertQA")
const MaxSeqLength = 512
text := (*reqData)["text"].(string)
tkz := tokenize.NewTokenizer(voc)
ff := tokenize.FeatureFactory{Tokenizer: tkz, SeqLen: MaxSeqLength}
// 获取 token 向量
f := ff.Feature(text)
tids, err := tf.NewTensor([][]int32{f.TokenIDs})
if err != nil {
return &map[string]interface{}{"code":2001}, err
}
mask, err := tf.NewTensor([][]int32{f.Mask})
if err != nil {
return &map[string]interface{}{"code":2002}, err
}
sids, err := tf.NewTensor([][]int32{f.TypeIDs})
if err != nil {
return &map[string]interface{}{"code":2003}, err
}
res, err := m.Session.Run(
map[tf.Output]*tf.Tensor{
m.Graph.Operation("input_ids").Output(0): tids,
m.Graph.Operation("input_mask").Output(0): mask,
m.Graph.Operation("segment_ids").Output(0): sids,
},
[]tf.Output{
m.Graph.Operation("bert/pooler/Squeeze").Output(0),
},
nil,
)
if err != nil {
return &map[string]interface{}{"code":2004}, err
}
ret := res[0].Value().([][]float32)
return &map[string]interface{}{"embeddings":ret[0]}, nil
}