-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathresult.txt
322 lines (322 loc) · 55.1 KB
/
result.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
{"epoch": 0, "iteration": 1, "train_loss": 2.3221497535705566, "trained_tokens": 50064, "valid_loss": null, "perplexity": null, "time_since_job_start": 227.3044822216034}
{"epoch": 0, "iteration": 2, "train_loss": 2.314403533935547, "trained_tokens": 98032, "valid_loss": null, "perplexity": null, "time_since_job_start": 231.6294596195221}
{"epoch": 0, "iteration": 3, "train_loss": 2.3292670249938965, "trained_tokens": 144456, "valid_loss": null, "perplexity": null, "time_since_job_start": 235.4908800125122}
{"epoch": 0, "iteration": 4, "train_loss": 2.3253674507141113, "trained_tokens": 189192, "valid_loss": null, "perplexity": null, "time_since_job_start": 239.8278090953827}
{"epoch": 0, "iteration": 5, "train_loss": 2.3216190338134766, "trained_tokens": 235144, "valid_loss": null, "perplexity": null, "time_since_job_start": 244.1573703289032}
{"epoch": 0, "iteration": 6, "train_loss": 2.2805304527282715, "trained_tokens": 282144, "valid_loss": null, "perplexity": null, "time_since_job_start": 248.17816472053528}
{"epoch": 0, "iteration": 7, "train_loss": 2.2882590293884277, "trained_tokens": 328048, "valid_loss": null, "perplexity": null, "time_since_job_start": 252.1560423374176}
{"epoch": 0, "iteration": 8, "train_loss": 2.2308297157287598, "trained_tokens": 376504, "valid_loss": null, "perplexity": null, "time_since_job_start": 256.4896879196167}
{"epoch": 0, "iteration": 9, "train_loss": 2.2046663761138916, "trained_tokens": 423016, "valid_loss": null, "perplexity": null, "time_since_job_start": 260.8578734397888}
{"epoch": 0, "iteration": 10, "train_loss": 2.1150949001312256, "trained_tokens": 469112, "valid_loss": null, "perplexity": null, "time_since_job_start": 265.2015211582184}
{"epoch": 0, "iteration": 11, "train_loss": 2.0186944007873535, "trained_tokens": 513392, "valid_loss": null, "perplexity": null, "time_since_job_start": 269.2160704135895}
{"epoch": 0, "iteration": 12, "train_loss": 1.90696120262146, "trained_tokens": 559960, "valid_loss": null, "perplexity": null, "time_since_job_start": 273.6083879470825}
{"epoch": 0, "iteration": 13, "train_loss": 1.8363640308380127, "trained_tokens": 605832, "valid_loss": null, "perplexity": null, "time_since_job_start": 278.0064387321472}
{"epoch": 0, "iteration": 14, "train_loss": 1.7868220806121826, "trained_tokens": 651664, "valid_loss": null, "perplexity": null, "time_since_job_start": 282.4057810306549}
{"epoch": 0, "iteration": 15, "train_loss": 1.7175672054290771, "trained_tokens": 695776, "valid_loss": null, "perplexity": null, "time_since_job_start": 286.37082266807556}
{"epoch": 0, "iteration": 16, "train_loss": 1.679566740989685, "trained_tokens": 741304, "valid_loss": null, "perplexity": null, "time_since_job_start": 290.75806617736816}
{"epoch": 0, "iteration": 17, "train_loss": 1.6113966703414917, "trained_tokens": 787728, "valid_loss": null, "perplexity": null, "time_since_job_start": 295.1485722064972}
{"epoch": 0, "iteration": 18, "train_loss": 1.564872145652771, "trained_tokens": 832896, "valid_loss": null, "perplexity": null, "time_since_job_start": 299.1861400604248}
{"epoch": 0, "iteration": 19, "train_loss": 1.4939286708831787, "trained_tokens": 879496, "valid_loss": null, "perplexity": null, "time_since_job_start": 303.57525420188904}
{"epoch": 0, "iteration": 20, "train_loss": 1.4264986515045166, "trained_tokens": 924752, "valid_loss": null, "perplexity": null, "time_since_job_start": 307.78436183929443}
{"epoch": 0, "iteration": 21, "train_loss": 1.361718773841858, "trained_tokens": 970240, "valid_loss": null, "perplexity": null, "time_since_job_start": 312.20223331451416}
{"epoch": 0, "iteration": 22, "train_loss": 1.3481742143630981, "trained_tokens": 1017064, "valid_loss": null, "perplexity": null, "time_since_job_start": 316.5586416721344}
{"epoch": 0, "iteration": 23, "train_loss": 1.2914012670516968, "trained_tokens": 1063632, "valid_loss": null, "perplexity": null, "time_since_job_start": 320.9642245769501}
{"epoch": 0, "iteration": 24, "train_loss": 1.2511438131332397, "trained_tokens": 1108256, "valid_loss": null, "perplexity": null, "time_since_job_start": 325.24815583229065}
{"epoch": 0, "iteration": 25, "train_loss": 1.2180511951446533, "trained_tokens": 1153832, "valid_loss": null, "perplexity": null, "time_since_job_start": 329.3821585178375}
{"epoch": 0, "iteration": 26, "train_loss": 1.1920355558395386, "trained_tokens": 1197720, "valid_loss": null, "perplexity": null, "time_since_job_start": 333.1500644683838}
{"epoch": 0, "iteration": 27, "train_loss": 1.1774650812149048, "trained_tokens": 1240936, "valid_loss": null, "perplexity": null, "time_since_job_start": 336.7402226924896}
{"epoch": 0, "iteration": 28, "train_loss": 1.1575583219528198, "trained_tokens": 1287000, "valid_loss": null, "perplexity": null, "time_since_job_start": 341.12455558776855}
{"epoch": 0, "iteration": 29, "train_loss": 1.1464130878448486, "trained_tokens": 1332824, "valid_loss": null, "perplexity": null, "time_since_job_start": 345.49801206588745}
{"epoch": 0, "iteration": 30, "train_loss": 1.1310076713562012, "trained_tokens": 1377088, "valid_loss": null, "perplexity": null, "time_since_job_start": 349.88462710380554}
{"epoch": 0, "iteration": 31, "train_loss": 1.1302618980407715, "trained_tokens": 1421992, "valid_loss": null, "perplexity": null, "time_since_job_start": 354.28960704803467}
{"epoch": 0, "iteration": 32, "train_loss": 1.130478858947754, "trained_tokens": 1468816, "valid_loss": null, "perplexity": null, "time_since_job_start": 358.37178111076355}
{"epoch": 0, "iteration": 33, "train_loss": 1.142732858657837, "trained_tokens": 1515760, "valid_loss": null, "perplexity": null, "time_since_job_start": 363.10231256484985}
{"epoch": 0, "iteration": 34, "train_loss": 1.1018273830413818, "trained_tokens": 1563392, "valid_loss": null, "perplexity": null, "time_since_job_start": 367.47786498069763}
{"epoch": 0, "iteration": 35, "train_loss": 1.1228976249694824, "trained_tokens": 1608976, "valid_loss": null, "perplexity": null, "time_since_job_start": 371.41330194473267}
{"epoch": 0, "iteration": 36, "train_loss": 1.1167864799499512, "trained_tokens": 1655328, "valid_loss": null, "perplexity": null, "time_since_job_start": 375.55662202835083}
{"epoch": 0, "iteration": 37, "train_loss": 1.0793828964233398, "trained_tokens": 1699984, "valid_loss": null, "perplexity": null, "time_since_job_start": 379.28686809539795}
{"epoch": 0, "iteration": 38, "train_loss": 1.078770637512207, "trained_tokens": 1743224, "valid_loss": null, "perplexity": null, "time_since_job_start": 383.3239154815674}
{"epoch": 0, "iteration": 39, "train_loss": 1.0842297077178955, "trained_tokens": 1787616, "valid_loss": null, "perplexity": null, "time_since_job_start": 387.3022196292877}
{"epoch": 0, "iteration": 40, "train_loss": 1.0992119312286377, "trained_tokens": 1833384, "valid_loss": null, "perplexity": null, "time_since_job_start": 391.3608958721161}
{"epoch": 0, "iteration": 41, "train_loss": 1.0779606103897095, "trained_tokens": 1878496, "valid_loss": null, "perplexity": null, "time_since_job_start": 395.7342984676361}
{"epoch": 0, "iteration": 42, "train_loss": 1.0883615016937256, "trained_tokens": 1925112, "valid_loss": null, "perplexity": null, "time_since_job_start": 400.1397080421448}
{"epoch": 0, "iteration": 43, "train_loss": 1.0793874263763428, "trained_tokens": 1971136, "valid_loss": null, "perplexity": null, "time_since_job_start": 404.33832359313965}
{"epoch": 0, "iteration": 44, "train_loss": 1.0719950199127197, "trained_tokens": 2018208, "valid_loss": null, "perplexity": null, "time_since_job_start": 408.76080799102783}
{"epoch": 0, "iteration": 45, "train_loss": 1.0630919933319092, "trained_tokens": 2064408, "valid_loss": null, "perplexity": null, "time_since_job_start": 412.8440520763397}
{"epoch": 0, "iteration": 46, "train_loss": 1.0666850805282593, "trained_tokens": 2108792, "valid_loss": null, "perplexity": null, "time_since_job_start": 416.9499309062958}
{"epoch": 0, "iteration": 47, "train_loss": 1.068665623664856, "trained_tokens": 2154960, "valid_loss": null, "perplexity": null, "time_since_job_start": 421.2203335762024}
{"epoch": 0, "iteration": 48, "train_loss": 1.0669662952423096, "trained_tokens": 2202736, "valid_loss": null, "perplexity": null, "time_since_job_start": 425.61326265335083}
{"epoch": 0, "iteration": 49, "train_loss": 1.051154375076294, "trained_tokens": 2248168, "valid_loss": null, "perplexity": null, "time_since_job_start": 429.5363976955414}
{"epoch": 0, "iteration": 50, "train_loss": 1.0404902696609497, "trained_tokens": 2293352, "valid_loss": null, "perplexity": null, "time_since_job_start": 433.91592931747437}
{"epoch": 0, "iteration": 51, "train_loss": 1.0381357669830322, "trained_tokens": 2341000, "valid_loss": null, "perplexity": null, "time_since_job_start": 438.3441364765167}
{"epoch": 0, "iteration": 52, "train_loss": 1.0624972581863403, "trained_tokens": 2384656, "valid_loss": null, "perplexity": null, "time_since_job_start": 442.13081312179565}
{"epoch": 0, "iteration": 53, "train_loss": 1.0346792936325073, "trained_tokens": 2431592, "valid_loss": null, "perplexity": null, "time_since_job_start": 446.5398328304291}
{"epoch": 0, "iteration": 54, "train_loss": 1.0402268171310425, "trained_tokens": 2478576, "valid_loss": null, "perplexity": null, "time_since_job_start": 450.77350425720215}
{"epoch": 0, "iteration": 55, "train_loss": 1.02407968044281, "trained_tokens": 2522672, "valid_loss": null, "perplexity": null, "time_since_job_start": 454.9757385253906}
{"epoch": 0, "iteration": 56, "train_loss": 1.0268598794937134, "trained_tokens": 2569736, "valid_loss": null, "perplexity": null, "time_since_job_start": 459.3805069923401}
{"epoch": 0, "iteration": 57, "train_loss": 0.9858168959617615, "trained_tokens": 2613768, "valid_loss": null, "perplexity": null, "time_since_job_start": 463.0746419429779}
{"epoch": 0, "iteration": 58, "train_loss": 0.9989595413208008, "trained_tokens": 2659152, "valid_loss": null, "perplexity": null, "time_since_job_start": 467.3598828315735}
{"epoch": 0, "iteration": 59, "train_loss": 1.0021169185638428, "trained_tokens": 2704016, "valid_loss": null, "perplexity": null, "time_since_job_start": 471.6286780834198}
{"epoch": 0, "iteration": 60, "train_loss": 1.0054867267608643, "trained_tokens": 2751008, "valid_loss": null, "perplexity": null, "time_since_job_start": 476.02969241142273}
{"epoch": 0, "iteration": 61, "train_loss": 0.978866696357727, "trained_tokens": 2797184, "valid_loss": null, "perplexity": null, "time_since_job_start": 480.9772307872772}
{"epoch": 0, "iteration": 62, "train_loss": 1.0080490112304688, "trained_tokens": 2845104, "valid_loss": null, "perplexity": null, "time_since_job_start": 485.3528411388397}
{"epoch": 0, "iteration": 63, "train_loss": 1.013481855392456, "trained_tokens": 2891800, "valid_loss": null, "perplexity": null, "time_since_job_start": 489.65836453437805}
{"epoch": 0, "iteration": 64, "train_loss": 0.9985435605049133, "trained_tokens": 2937536, "valid_loss": null, "perplexity": null, "time_since_job_start": 493.8209364414215}
{"epoch": 0, "iteration": 65, "train_loss": 1.000836730003357, "trained_tokens": 2983776, "valid_loss": null, "perplexity": null, "time_since_job_start": 498.0207235813141}
{"epoch": 0, "iteration": 66, "train_loss": 0.9992470741271973, "trained_tokens": 3032040, "valid_loss": null, "perplexity": null, "time_since_job_start": 502.04200410842896}
{"epoch": 0, "iteration": 67, "train_loss": 0.999751091003418, "trained_tokens": 3080008, "valid_loss": null, "perplexity": null, "time_since_job_start": 506.4174978733063}
{"epoch": 0, "iteration": 68, "train_loss": 0.9904032945632935, "trained_tokens": 3126720, "valid_loss": null, "perplexity": null, "time_since_job_start": 510.7900538444519}
{"epoch": 0, "iteration": 69, "train_loss": 0.9738575220108032, "trained_tokens": 3173408, "valid_loss": null, "perplexity": null, "time_since_job_start": 515.0822601318359}
{"epoch": 0, "iteration": 70, "train_loss": 0.9910653829574585, "trained_tokens": 3219736, "valid_loss": null, "perplexity": null, "time_since_job_start": 519.4411315917969}
{"epoch": 0, "iteration": 71, "train_loss": 0.9857555627822876, "trained_tokens": 3270496, "valid_loss": null, "perplexity": null, "time_since_job_start": 523.8758280277252}
{"epoch": 0, "iteration": 72, "train_loss": 1.008411169052124, "trained_tokens": 3315144, "valid_loss": null, "perplexity": null, "time_since_job_start": 527.62282538414}
{"epoch": 0, "iteration": 73, "train_loss": 0.993482232093811, "trained_tokens": 3364704, "valid_loss": null, "perplexity": null, "time_since_job_start": 532.0642006397247}
{"epoch": 0, "iteration": 74, "train_loss": 0.9936149716377258, "trained_tokens": 3414560, "valid_loss": null, "perplexity": null, "time_since_job_start": 536.4477393627167}
{"epoch": 0, "iteration": 75, "train_loss": 0.980871856212616, "trained_tokens": 3462048, "valid_loss": null, "perplexity": null, "time_since_job_start": 540.8208794593811}
{"epoch": 0, "iteration": 76, "train_loss": 0.9877785444259644, "trained_tokens": 3509928, "valid_loss": null, "perplexity": null, "time_since_job_start": 545.2238321304321}
{"epoch": 0, "iteration": 77, "train_loss": 0.9846124053001404, "trained_tokens": 3558304, "valid_loss": null, "perplexity": null, "time_since_job_start": 549.6036777496338}
{"epoch": 0, "iteration": 78, "train_loss": 0.9843792915344238, "trained_tokens": 3606208, "valid_loss": null, "perplexity": null, "time_since_job_start": 553.800290107727}
{"epoch": 0, "iteration": 79, "train_loss": 0.9917116761207581, "trained_tokens": 3653520, "valid_loss": null, "perplexity": null, "time_since_job_start": 558.1746530532837}
{"epoch": 0, "iteration": 80, "train_loss": 0.9651538729667664, "trained_tokens": 3701016, "valid_loss": null, "perplexity": null, "time_since_job_start": 562.4397225379944}
{"epoch": 0, "iteration": 81, "train_loss": 0.9715399146080017, "trained_tokens": 3748760, "valid_loss": null, "perplexity": null, "time_since_job_start": 566.8511271476746}
{"epoch": 0, "iteration": 82, "train_loss": 0.9546526670455933, "trained_tokens": 3795784, "valid_loss": null, "perplexity": null, "time_since_job_start": 571.0275573730469}
{"epoch": 0, "iteration": 83, "train_loss": 0.9525634050369263, "trained_tokens": 3841904, "valid_loss": null, "perplexity": null, "time_since_job_start": 575.2982404232025}
{"epoch": 0, "iteration": 84, "train_loss": 0.9723360538482666, "trained_tokens": 3890664, "valid_loss": null, "perplexity": null, "time_since_job_start": 579.715452671051}
{"epoch": 0, "iteration": 85, "train_loss": 0.9771395325660706, "trained_tokens": 3938368, "valid_loss": null, "perplexity": null, "time_since_job_start": 584.119889497757}
{"epoch": 0, "iteration": 86, "train_loss": 0.992313027381897, "trained_tokens": 3986464, "valid_loss": null, "perplexity": null, "time_since_job_start": 588.5482938289642}
{"epoch": 0, "iteration": 87, "train_loss": 0.9778928756713867, "trained_tokens": 4033336, "valid_loss": null, "perplexity": null, "time_since_job_start": 592.9796409606934}
{"epoch": 0, "iteration": 88, "train_loss": 0.9830464720726013, "trained_tokens": 4082688, "valid_loss": null, "perplexity": null, "time_since_job_start": 597.4041891098022}
{"epoch": 0, "iteration": 89, "train_loss": 0.973914623260498, "trained_tokens": 4131368, "valid_loss": null, "perplexity": null, "time_since_job_start": 601.7884335517883}
{"epoch": 0, "iteration": 90, "train_loss": 0.9712284803390503, "trained_tokens": 4178336, "valid_loss": null, "perplexity": null, "time_since_job_start": 605.9598026275635}
{"epoch": 0, "iteration": 91, "train_loss": 0.9616132974624634, "trained_tokens": 4227056, "valid_loss": null, "perplexity": null, "time_since_job_start": 610.3800909519196}
{"epoch": 0, "iteration": 92, "train_loss": 0.9657807350158691, "trained_tokens": 4273760, "valid_loss": null, "perplexity": null, "time_since_job_start": 614.7833223342896}
{"epoch": 0, "iteration": 93, "train_loss": 0.9520615339279175, "trained_tokens": 4324512, "valid_loss": null, "perplexity": null, "time_since_job_start": 619.1996204853058}
{"epoch": 0, "iteration": 94, "train_loss": 0.9424417018890381, "trained_tokens": 4372504, "valid_loss": null, "perplexity": null, "time_since_job_start": 623.6034259796143}
{"epoch": 0, "iteration": 95, "train_loss": 0.9642723202705383, "trained_tokens": 4420632, "valid_loss": null, "perplexity": null, "time_since_job_start": 627.8629832267761}
{"epoch": 0, "iteration": 96, "train_loss": 0.972213089466095, "trained_tokens": 4468632, "valid_loss": null, "perplexity": null, "time_since_job_start": 632.2845101356506}
{"epoch": 0, "iteration": 97, "train_loss": 1.0065470933914185, "trained_tokens": 4516008, "valid_loss": null, "perplexity": null, "time_since_job_start": 636.6799478530884}
{"epoch": 0, "iteration": 98, "train_loss": 0.973542332649231, "trained_tokens": 4564248, "valid_loss": null, "perplexity": null, "time_since_job_start": 641.0977976322174}
{"epoch": 0, "iteration": 99, "train_loss": 0.9623873233795166, "trained_tokens": 4611608, "valid_loss": null, "perplexity": null, "time_since_job_start": 645.4890787601471}
{"epoch": 0, "iteration": 100, "train_loss": 0.9478403925895691, "trained_tokens": 4657768, "valid_loss": null, "perplexity": null, "time_since_job_start": 649.5545496940613}
{"epoch": 0, "iteration": 101, "train_loss": 0.9478430151939392, "trained_tokens": 4705480, "valid_loss": null, "perplexity": null, "time_since_job_start": 653.959242105484}
{"epoch": 0, "iteration": 102, "train_loss": 0.9532029628753662, "trained_tokens": 4752824, "valid_loss": null, "perplexity": null, "time_since_job_start": 658.3130729198456}
{"epoch": 0, "iteration": 103, "train_loss": 0.9679001569747925, "trained_tokens": 4801144, "valid_loss": null, "perplexity": null, "time_since_job_start": 662.6949241161346}
{"epoch": 0, "iteration": 104, "train_loss": 0.9583209753036499, "trained_tokens": 4850568, "valid_loss": null, "perplexity": null, "time_since_job_start": 667.0706388950348}
{"epoch": 0, "iteration": 105, "train_loss": 0.9492303729057312, "trained_tokens": 4899144, "valid_loss": null, "perplexity": null, "time_since_job_start": 671.4684834480286}
{"epoch": 0, "iteration": 106, "train_loss": 0.9521612524986267, "trained_tokens": 4946008, "valid_loss": null, "perplexity": null, "time_since_job_start": 675.896372795105}
{"epoch": 0, "iteration": 107, "train_loss": 0.944176435470581, "trained_tokens": 4994744, "valid_loss": null, "perplexity": null, "time_since_job_start": 680.3039455413818}
{"epoch": 0, "iteration": 108, "train_loss": 0.9444041848182678, "trained_tokens": 5041528, "valid_loss": null, "perplexity": null, "time_since_job_start": 684.2965915203094}
{"epoch": 0, "iteration": 109, "train_loss": 0.9311796426773071, "trained_tokens": 5090008, "valid_loss": null, "perplexity": null, "time_since_job_start": 688.6827719211578}
{"epoch": 0, "iteration": 110, "train_loss": 0.940165638923645, "trained_tokens": 5135704, "valid_loss": null, "perplexity": null, "time_since_job_start": 692.4547662734985}
{"epoch": 0, "iteration": 111, "train_loss": 0.9558852314949036, "trained_tokens": 5182224, "valid_loss": null, "perplexity": null, "time_since_job_start": 696.8356776237488}
{"epoch": 0, "iteration": 112, "train_loss": 0.9376206994056702, "trained_tokens": 5227312, "valid_loss": null, "perplexity": null, "time_since_job_start": 700.7042355537415}
{"epoch": 0, "iteration": 113, "train_loss": 0.9399732351303101, "trained_tokens": 5275824, "valid_loss": null, "perplexity": null, "time_since_job_start": 705.1137948036194}
{"epoch": 0, "iteration": 114, "train_loss": 0.970672607421875, "trained_tokens": 5323056, "valid_loss": null, "perplexity": null, "time_since_job_start": 709.491466999054}
{"epoch": 0, "iteration": 115, "train_loss": 0.9683318138122559, "trained_tokens": 5370832, "valid_loss": null, "perplexity": null, "time_since_job_start": 713.8414318561554}
{"epoch": 0, "iteration": 116, "train_loss": 0.9593852758407593, "trained_tokens": 5420144, "valid_loss": null, "perplexity": null, "time_since_job_start": 718.2308356761932}
{"epoch": 0, "iteration": 117, "train_loss": 0.9611226916313171, "trained_tokens": 5467136, "valid_loss": null, "perplexity": null, "time_since_job_start": 722.4449739456177}
{"epoch": 0, "iteration": 118, "train_loss": 0.9549469947814941, "trained_tokens": 5514048, "valid_loss": null, "perplexity": null, "time_since_job_start": 726.6959536075592}
{"epoch": 0, "iteration": 119, "train_loss": 0.9736496210098267, "trained_tokens": 5563072, "valid_loss": null, "perplexity": null, "time_since_job_start": 731.1143012046814}
{"epoch": 0, "iteration": 120, "train_loss": 0.9210104942321777, "trained_tokens": 5608856, "valid_loss": null, "perplexity": null, "time_since_job_start": 735.4776923656464}
{"epoch": 0, "iteration": 121, "train_loss": 0.946574866771698, "trained_tokens": 5655920, "valid_loss": null, "perplexity": null, "time_since_job_start": 739.7261111736298}
{"epoch": 0, "iteration": 122, "train_loss": 0.9368221759796143, "trained_tokens": 5703328, "valid_loss": null, "perplexity": null, "time_since_job_start": 744.082891702652}
{"epoch": 0, "iteration": 123, "train_loss": 0.97652667760849, "trained_tokens": 5753144, "valid_loss": null, "perplexity": null, "time_since_job_start": 748.6088192462921}
{"epoch": 0, "iteration": 124, "train_loss": 0.9567688703536987, "trained_tokens": 5801560, "valid_loss": null, "perplexity": null, "time_since_job_start": 753.0497841835022}
{"epoch": 0, "iteration": 125, "train_loss": 0.933190643787384, "trained_tokens": 5850680, "valid_loss": null, "perplexity": null, "time_since_job_start": 757.4633808135986}
{"epoch": 0, "iteration": 126, "train_loss": 0.9194918274879456, "trained_tokens": 5897192, "valid_loss": null, "perplexity": null, "time_since_job_start": 762.1772487163544}
{"epoch": 0, "iteration": 127, "train_loss": 0.9421749114990234, "trained_tokens": 5945744, "valid_loss": null, "perplexity": null, "time_since_job_start": 766.5587687492371}
{"epoch": 0, "iteration": 128, "train_loss": 0.9418079853057861, "trained_tokens": 5993016, "valid_loss": null, "perplexity": null, "time_since_job_start": 770.9697148799896}
{"epoch": 0, "iteration": 129, "train_loss": 0.941035270690918, "trained_tokens": 6041056, "valid_loss": null, "perplexity": null, "time_since_job_start": 775.3458144664764}
{"epoch": 0, "iteration": 130, "train_loss": 0.9508941173553467, "trained_tokens": 6088336, "valid_loss": null, "perplexity": null, "time_since_job_start": 779.6883842945099}
{"epoch": 0, "iteration": 131, "train_loss": 0.94286048412323, "trained_tokens": 6136112, "valid_loss": null, "perplexity": null, "time_since_job_start": 783.8747942447662}
{"epoch": 0, "iteration": 132, "train_loss": 0.9233163595199585, "trained_tokens": 6184952, "valid_loss": null, "perplexity": null, "time_since_job_start": 788.2794282436371}
{"epoch": 0, "iteration": 133, "train_loss": 0.9337539672851562, "trained_tokens": 6233928, "valid_loss": null, "perplexity": null, "time_since_job_start": 792.6769828796387}
{"epoch": 0, "iteration": 134, "train_loss": 0.9446961879730225, "trained_tokens": 6280280, "valid_loss": null, "perplexity": null, "time_since_job_start": 796.4532895088196}
{"epoch": 0, "iteration": 135, "train_loss": 0.9352823495864868, "trained_tokens": 6325152, "valid_loss": null, "perplexity": null, "time_since_job_start": 800.8294441699982}
{"epoch": 0, "iteration": 136, "train_loss": 0.9524248838424683, "trained_tokens": 6373048, "valid_loss": null, "perplexity": null, "time_since_job_start": 805.2532203197479}
{"epoch": 0, "iteration": 137, "train_loss": 0.9593297243118286, "trained_tokens": 6421032, "valid_loss": null, "perplexity": null, "time_since_job_start": 809.7193386554718}
{"epoch": 0, "iteration": 138, "train_loss": 0.9417784810066223, "trained_tokens": 6469504, "valid_loss": null, "perplexity": null, "time_since_job_start": 814.099770784378}
{"epoch": 0, "iteration": 139, "train_loss": 0.9621372222900391, "trained_tokens": 6518768, "valid_loss": null, "perplexity": null, "time_since_job_start": 818.5041840076447}
{"epoch": 0, "iteration": 140, "train_loss": 0.9567392468452454, "trained_tokens": 6565296, "valid_loss": null, "perplexity": null, "time_since_job_start": 822.6975197792053}
{"epoch": 0, "iteration": 141, "train_loss": 0.9294645190238953, "trained_tokens": 6616344, "valid_loss": null, "perplexity": null, "time_since_job_start": 827.1412856578827}
{"epoch": 0, "iteration": 142, "train_loss": 0.9420742392539978, "trained_tokens": 6664592, "valid_loss": null, "perplexity": null, "time_since_job_start": 831.5578124523163}
{"epoch": 0, "iteration": 143, "train_loss": 0.9297382831573486, "trained_tokens": 6714648, "valid_loss": null, "perplexity": null, "time_since_job_start": 835.9415512084961}
{"epoch": 0, "iteration": 144, "train_loss": 0.9393666982650757, "trained_tokens": 6761888, "valid_loss": null, "perplexity": null, "time_since_job_start": 840.29532289505}
{"epoch": 0, "iteration": 145, "train_loss": 0.9237152934074402, "trained_tokens": 6811760, "valid_loss": null, "perplexity": null, "time_since_job_start": 844.5930774211884}
{"epoch": 0, "iteration": 146, "train_loss": 0.9295288920402527, "trained_tokens": 6859760, "valid_loss": null, "perplexity": null, "time_since_job_start": 848.9560697078705}
{"epoch": 0, "iteration": 147, "train_loss": 0.9359827637672424, "trained_tokens": 6909216, "valid_loss": null, "perplexity": null, "time_since_job_start": 853.3692662715912}
{"epoch": 0, "iteration": 148, "train_loss": 0.9229446649551392, "trained_tokens": 6955208, "valid_loss": null, "perplexity": null, "time_since_job_start": 857.2881331443787}
{"epoch": 0, "iteration": 149, "train_loss": 0.948832631111145, "trained_tokens": 7002536, "valid_loss": null, "perplexity": null, "time_since_job_start": 861.3579761981964}
{"epoch": 0, "iteration": 150, "train_loss": 0.9292882680892944, "trained_tokens": 7050880, "valid_loss": null, "perplexity": null, "time_since_job_start": 865.7787129878998}
{"epoch": 0, "iteration": 151, "train_loss": 0.9259365200996399, "trained_tokens": 7097304, "valid_loss": null, "perplexity": null, "time_since_job_start": 870.1827023029327}
{"epoch": 0, "iteration": 152, "train_loss": 0.9396798610687256, "trained_tokens": 7144832, "valid_loss": null, "perplexity": null, "time_since_job_start": 874.5697023868561}
{"epoch": 0, "iteration": 153, "train_loss": 0.9311560392379761, "trained_tokens": 7193688, "valid_loss": null, "perplexity": null, "time_since_job_start": 878.9503598213196}
{"epoch": 0, "iteration": 154, "train_loss": 0.9393107891082764, "trained_tokens": 7242200, "valid_loss": null, "perplexity": null, "time_since_job_start": 883.346118927002}
{"epoch": 0, "iteration": 155, "train_loss": 0.9252485036849976, "trained_tokens": 7290512, "valid_loss": null, "perplexity": null, "time_since_job_start": 887.767555475235}
{"epoch": 0, "iteration": 156, "train_loss": 0.9452108144760132, "trained_tokens": 7338024, "valid_loss": null, "perplexity": null, "time_since_job_start": 891.9513013362885}
{"epoch": 0, "iteration": 157, "train_loss": 0.9179238080978394, "trained_tokens": 7385136, "valid_loss": null, "perplexity": null, "time_since_job_start": 896.3158292770386}
{"epoch": 0, "iteration": 158, "train_loss": 0.9423495531082153, "trained_tokens": 7435832, "valid_loss": null, "perplexity": null, "time_since_job_start": 900.7360661029816}
{"epoch": 0, "iteration": 159, "train_loss": 0.9127019047737122, "trained_tokens": 7482072, "valid_loss": null, "perplexity": null, "time_since_job_start": 904.954749584198}
{"epoch": 0, "iteration": 160, "train_loss": 0.9199738502502441, "trained_tokens": 7529120, "valid_loss": null, "perplexity": null, "time_since_job_start": 909.3679387569427}
{"epoch": 0, "iteration": 161, "train_loss": 0.9292933344841003, "trained_tokens": 7577264, "valid_loss": null, "perplexity": null, "time_since_job_start": 913.7601976394653}
{"epoch": 0, "iteration": 162, "train_loss": 0.9339044094085693, "trained_tokens": 7625888, "valid_loss": null, "perplexity": null, "time_since_job_start": 918.2049136161804}
{"epoch": 0, "iteration": 163, "train_loss": 0.9433088898658752, "trained_tokens": 7673448, "valid_loss": null, "perplexity": null, "time_since_job_start": 922.5421476364136}
{"epoch": 0, "iteration": 164, "train_loss": 0.9346109628677368, "trained_tokens": 7717208, "valid_loss": null, "perplexity": null, "time_since_job_start": 926.8887038230896}
{"epoch": 0, "iteration": 165, "train_loss": 0.936431348323822, "trained_tokens": 7763480, "valid_loss": null, "perplexity": null, "time_since_job_start": 931.1009829044342}
{"epoch": 0, "iteration": 166, "train_loss": 0.9210602045059204, "trained_tokens": 7809296, "valid_loss": null, "perplexity": null, "time_since_job_start": 935.3015162944794}
{"epoch": 0, "iteration": 167, "train_loss": 0.9387384057044983, "trained_tokens": 7854288, "valid_loss": null, "perplexity": null, "time_since_job_start": 939.4496955871582}
{"epoch": 0, "iteration": 168, "train_loss": 0.933161735534668, "trained_tokens": 7900136, "valid_loss": null, "perplexity": null, "time_since_job_start": 943.8046972751617}
{"epoch": 0, "iteration": 169, "train_loss": 0.9105812311172485, "trained_tokens": 7942880, "valid_loss": null, "perplexity": null, "time_since_job_start": 947.8167190551758}
{"epoch": 0, "iteration": 170, "train_loss": 0.9560025930404663, "trained_tokens": 7988296, "valid_loss": null, "perplexity": null, "time_since_job_start": 952.19611287117}
{"epoch": 0, "iteration": 171, "train_loss": 0.959585428237915, "trained_tokens": 8033144, "valid_loss": null, "perplexity": null, "time_since_job_start": 956.4262588024139}
{"epoch": 0, "iteration": 172, "train_loss": 0.9170700311660767, "trained_tokens": 8079368, "valid_loss": null, "perplexity": null, "time_since_job_start": 960.8317427635193}
{"epoch": 0, "iteration": 173, "train_loss": 0.9130783677101135, "trained_tokens": 8123376, "valid_loss": null, "perplexity": null, "time_since_job_start": 964.7750973701477}
{"epoch": 0, "iteration": 174, "train_loss": 0.8949670195579529, "trained_tokens": 8167600, "valid_loss": null, "perplexity": null, "time_since_job_start": 968.7537775039673}
{"epoch": 0, "iteration": 175, "train_loss": 0.9142083525657654, "trained_tokens": 8213096, "valid_loss": null, "perplexity": null, "time_since_job_start": 973.1364459991455}
{"epoch": 0, "iteration": 176, "train_loss": 0.9371452331542969, "trained_tokens": 8259416, "valid_loss": null, "perplexity": null, "time_since_job_start": 977.5164561271667}
{"epoch": 0, "iteration": 177, "train_loss": 0.9070518016815186, "trained_tokens": 8304168, "valid_loss": null, "perplexity": null, "time_since_job_start": 981.7616064548492}
{"epoch": 0, "iteration": 178, "train_loss": 0.9533377885818481, "trained_tokens": 8348880, "valid_loss": null, "perplexity": null, "time_since_job_start": 985.7914111614227}
{"epoch": 0, "iteration": 179, "train_loss": 0.9164578318595886, "trained_tokens": 8393240, "valid_loss": null, "perplexity": null, "time_since_job_start": 990.1538562774658}
{"epoch": 0, "iteration": 180, "train_loss": 0.9433624744415283, "trained_tokens": 8437984, "valid_loss": null, "perplexity": null, "time_since_job_start": 994.5066728591919}
{"epoch": 0, "iteration": 181, "train_loss": 0.9134135246276855, "trained_tokens": 8483848, "valid_loss": null, "perplexity": null, "time_since_job_start": 998.8018660545349}
{"epoch": 0, "iteration": 182, "train_loss": 0.9402033686637878, "trained_tokens": 8529656, "valid_loss": null, "perplexity": null, "time_since_job_start": 1003.1622116565704}
{"epoch": 0, "iteration": 183, "train_loss": 0.9418172240257263, "trained_tokens": 8575392, "valid_loss": null, "perplexity": null, "time_since_job_start": 1007.4548914432526}
{"epoch": 0, "iteration": 184, "train_loss": 0.9107633829116821, "trained_tokens": 8618312, "valid_loss": null, "perplexity": null, "time_since_job_start": 1011.8165757656097}
{"epoch": 0, "iteration": 185, "train_loss": 0.9390747547149658, "trained_tokens": 8661904, "valid_loss": null, "perplexity": null, "time_since_job_start": 1015.9919593334198}
{"epoch": 0, "iteration": 186, "train_loss": 0.9094264507293701, "trained_tokens": 8706416, "valid_loss": null, "perplexity": null, "time_since_job_start": 1020.2005829811096}
{"epoch": 0, "iteration": 187, "train_loss": 0.9457592964172363, "trained_tokens": 8751536, "valid_loss": null, "perplexity": null, "time_since_job_start": 1024.074091911316}
{"epoch": 0, "iteration": 188, "train_loss": 0.9264199733734131, "trained_tokens": 8793584, "valid_loss": null, "perplexity": null, "time_since_job_start": 1027.6919507980347}
{"epoch": 0, "iteration": 189, "train_loss": 0.9347048401832581, "trained_tokens": 8839768, "valid_loss": null, "perplexity": null, "time_since_job_start": 1032.0745437145233}
{"epoch": 0, "iteration": 190, "train_loss": 0.8965713977813721, "trained_tokens": 8884864, "valid_loss": null, "perplexity": null, "time_since_job_start": 1036.3120102882385}
{"epoch": 0, "iteration": 191, "train_loss": 0.9078487157821655, "trained_tokens": 8928736, "valid_loss": null, "perplexity": null, "time_since_job_start": 1040.6797835826874}
{"epoch": 0, "iteration": 192, "train_loss": 0.9549143314361572, "trained_tokens": 8973456, "valid_loss": null, "perplexity": null, "time_since_job_start": 1044.4093542099}
{"epoch": 0, "iteration": 193, "train_loss": 0.9348047971725464, "trained_tokens": 9020856, "valid_loss": null, "perplexity": null, "time_since_job_start": 1048.772408246994}
{"epoch": 0, "iteration": 194, "train_loss": 0.9361010789871216, "trained_tokens": 9068328, "valid_loss": null, "perplexity": null, "time_since_job_start": 1053.1394658088684}
{"epoch": 0, "iteration": 195, "train_loss": 0.9544965028762817, "trained_tokens": 9116672, "valid_loss": null, "perplexity": null, "time_since_job_start": 1057.521699666977}
{"epoch": 0, "iteration": 196, "train_loss": 0.964043140411377, "trained_tokens": 9164864, "valid_loss": null, "perplexity": null, "time_since_job_start": 1061.910991191864}
{"epoch": 0, "iteration": 197, "train_loss": 0.967327356338501, "trained_tokens": 9209984, "valid_loss": null, "perplexity": null, "time_since_job_start": 1066.1167635917664}
{"epoch": 0, "iteration": 198, "train_loss": 0.9446274042129517, "trained_tokens": 9255536, "valid_loss": null, "perplexity": null, "time_since_job_start": 1070.4665369987488}
{"epoch": 0, "iteration": 199, "train_loss": 0.9532848596572876, "trained_tokens": 9301288, "valid_loss": null, "perplexity": null, "time_since_job_start": 1075.5241839885712}
{"epoch": 0, "iteration": 200, "train_loss": 0.9228770732879639, "trained_tokens": 9347504, "valid_loss": null, "perplexity": null, "time_since_job_start": 1079.6882510185242}
{"epoch": 0, "iteration": 201, "train_loss": 0.9207225441932678, "trained_tokens": 9391448, "valid_loss": null, "perplexity": null, "time_since_job_start": 1083.2349314689636}
{"epoch": 0, "iteration": 202, "train_loss": 0.9306858777999878, "trained_tokens": 9436704, "valid_loss": null, "perplexity": null, "time_since_job_start": 1087.1505522727966}
{"epoch": 0, "iteration": 203, "train_loss": 0.9408777952194214, "trained_tokens": 9482864, "valid_loss": null, "perplexity": null, "time_since_job_start": 1091.4922077655792}
{"epoch": 0, "iteration": 204, "train_loss": 0.93889319896698, "trained_tokens": 9528864, "valid_loss": null, "perplexity": null, "time_since_job_start": 1095.589286327362}
{"epoch": 0, "iteration": 205, "train_loss": 0.9125361442565918, "trained_tokens": 9574216, "valid_loss": null, "perplexity": null, "time_since_job_start": 1099.1538314819336}
{"epoch": 0, "iteration": 206, "train_loss": 0.9520851373672485, "trained_tokens": 9624248, "valid_loss": null, "perplexity": null, "time_since_job_start": 1103.5109856128693}
{"epoch": 0, "iteration": 207, "train_loss": 0.9394710659980774, "trained_tokens": 9670096, "valid_loss": null, "perplexity": null, "time_since_job_start": 1107.5549175739288}
{"epoch": 0, "iteration": 208, "train_loss": 0.9460715055465698, "trained_tokens": 9716752, "valid_loss": null, "perplexity": null, "time_since_job_start": 1111.9243149757385}
{"epoch": 0, "iteration": 209, "train_loss": 0.9452148079872131, "trained_tokens": 9762144, "valid_loss": null, "perplexity": null, "time_since_job_start": 1115.9384150505066}
{"epoch": 0, "iteration": 210, "train_loss": 0.9118196964263916, "trained_tokens": 9808584, "valid_loss": null, "perplexity": null, "time_since_job_start": 1120.27831864357}
{"epoch": 0, "iteration": 211, "train_loss": 0.9494131803512573, "trained_tokens": 9855648, "valid_loss": null, "perplexity": null, "time_since_job_start": 1124.683584690094}
{"epoch": 0, "iteration": 212, "train_loss": 0.9474332332611084, "trained_tokens": 9903096, "valid_loss": null, "perplexity": null, "time_since_job_start": 1129.0907292366028}
{"epoch": 0, "iteration": 213, "train_loss": 0.9278837442398071, "trained_tokens": 9946600, "valid_loss": null, "perplexity": null, "time_since_job_start": 1133.316312789917}
{"epoch": 0, "iteration": 214, "train_loss": 0.9345028400421143, "trained_tokens": 9994992, "valid_loss": null, "perplexity": null, "time_since_job_start": 1137.6904196739197}
{"epoch": 0, "iteration": 215, "train_loss": 0.9338730573654175, "trained_tokens": 10039952, "valid_loss": null, "perplexity": null, "time_since_job_start": 1141.6782069206238}
{"epoch": 0, "iteration": 216, "train_loss": 0.9255131483078003, "trained_tokens": 10084256, "valid_loss": null, "perplexity": null, "time_since_job_start": 1145.8864307403564}
{"epoch": 0, "iteration": 217, "train_loss": 0.9161108732223511, "trained_tokens": 10129704, "valid_loss": null, "perplexity": null, "time_since_job_start": 1150.1236951351166}
{"epoch": 0, "iteration": 218, "train_loss": 0.9235725402832031, "trained_tokens": 10174992, "valid_loss": null, "perplexity": null, "time_since_job_start": 1154.4895331859589}
{"epoch": 0, "iteration": 219, "train_loss": 0.9315471649169922, "trained_tokens": 10218776, "valid_loss": null, "perplexity": null, "time_since_job_start": 1158.1643695831299}
{"epoch": 0, "iteration": 220, "train_loss": 0.9536263346672058, "trained_tokens": 10265744, "valid_loss": null, "perplexity": null, "time_since_job_start": 1162.5138790607452}
{"epoch": 0, "iteration": 221, "train_loss": 0.935306191444397, "trained_tokens": 10313304, "valid_loss": null, "perplexity": null, "time_since_job_start": 1166.8947904109955}
{"epoch": 0, "iteration": 222, "train_loss": 0.933881402015686, "trained_tokens": 10359768, "valid_loss": null, "perplexity": null, "time_since_job_start": 1170.9219851493835}
{"epoch": 0, "iteration": 223, "train_loss": 0.9743697047233582, "trained_tokens": 10405640, "valid_loss": null, "perplexity": null, "time_since_job_start": 1175.1993353366852}
{"epoch": 0, "iteration": 224, "train_loss": 0.9248418807983398, "trained_tokens": 10453840, "valid_loss": null, "perplexity": null, "time_since_job_start": 1179.6173584461212}
{"epoch": 0, "iteration": 225, "train_loss": 0.9568735361099243, "trained_tokens": 10502384, "valid_loss": null, "perplexity": null, "time_since_job_start": 1184.0198481082916}
{"epoch": 0, "iteration": 226, "train_loss": 0.9276665449142456, "trained_tokens": 10547520, "valid_loss": null, "perplexity": null, "time_since_job_start": 1188.032972574234}
{"epoch": 0, "iteration": 227, "train_loss": 0.9715742468833923, "trained_tokens": 10597216, "valid_loss": null, "perplexity": null, "time_since_job_start": 1192.0817914009094}
{"epoch": 0, "iteration": 228, "train_loss": 0.9418374300003052, "trained_tokens": 10646344, "valid_loss": null, "perplexity": null, "time_since_job_start": 1196.442105770111}
{"epoch": 0, "iteration": 229, "train_loss": 0.9600395560264587, "trained_tokens": 10693184, "valid_loss": null, "perplexity": null, "time_since_job_start": 1200.4836342334747}
{"epoch": 0, "iteration": 230, "train_loss": 0.9724797010421753, "trained_tokens": 10743536, "valid_loss": null, "perplexity": null, "time_since_job_start": 1204.891675710678}
{"epoch": 0, "iteration": 231, "train_loss": 0.9340342283248901, "trained_tokens": 10791896, "valid_loss": null, "perplexity": null, "time_since_job_start": 1209.2739901542664}
{"epoch": 0, "iteration": 232, "train_loss": 0.9548768997192383, "trained_tokens": 10838808, "valid_loss": null, "perplexity": null, "time_since_job_start": 1213.6681685447693}
{"epoch": 0, "iteration": 233, "train_loss": 0.9485148191452026, "trained_tokens": 10886240, "valid_loss": null, "perplexity": null, "time_since_job_start": 1218.0185987949371}
{"epoch": 0, "iteration": 234, "train_loss": 0.9815489053726196, "trained_tokens": 10933544, "valid_loss": null, "perplexity": null, "time_since_job_start": 1222.4313983917236}
{"epoch": 0, "iteration": 235, "train_loss": 0.9436153173446655, "trained_tokens": 10981520, "valid_loss": null, "perplexity": null, "time_since_job_start": 1226.7965261936188}
{"epoch": 0, "iteration": 236, "train_loss": 0.9369980692863464, "trained_tokens": 11031408, "valid_loss": null, "perplexity": null, "time_since_job_start": 1231.1888990402222}
{"epoch": 0, "iteration": 237, "train_loss": 0.9391626119613647, "trained_tokens": 11078320, "valid_loss": null, "perplexity": null, "time_since_job_start": 1235.5543422698975}
{"epoch": 0, "iteration": 238, "train_loss": 0.9361999034881592, "trained_tokens": 11126656, "valid_loss": null, "perplexity": null, "time_since_job_start": 1239.9360036849976}
{"epoch": 0, "iteration": 239, "train_loss": 0.9671157598495483, "trained_tokens": 11172480, "valid_loss": null, "perplexity": null, "time_since_job_start": 1243.9578697681427}
{"epoch": 0, "iteration": 240, "train_loss": 0.9527288675308228, "trained_tokens": 11216976, "valid_loss": null, "perplexity": null, "time_since_job_start": 1247.6352007389069}
{"epoch": 0, "iteration": 241, "train_loss": 0.9473632574081421, "trained_tokens": 11263696, "valid_loss": null, "perplexity": null, "time_since_job_start": 1252.0182602405548}
{"epoch": 0, "iteration": 242, "train_loss": 0.9301288723945618, "trained_tokens": 11308336, "valid_loss": null, "perplexity": null, "time_since_job_start": 1255.9339394569397}
{"epoch": 0, "iteration": 243, "train_loss": 1.0037554502487183, "trained_tokens": 11359368, "valid_loss": null, "perplexity": null, "time_since_job_start": 1260.363222360611}
{"epoch": 0, "iteration": 244, "train_loss": 0.9657710790634155, "trained_tokens": 11406864, "valid_loss": null, "perplexity": null, "time_since_job_start": 1264.7074823379517}
{"epoch": 0, "iteration": 245, "train_loss": 0.97240149974823, "trained_tokens": 11454048, "valid_loss": null, "perplexity": null, "time_since_job_start": 1269.002625465393}
{"epoch": 0, "iteration": 246, "train_loss": 0.9628379344940186, "trained_tokens": 11501360, "valid_loss": null, "perplexity": null, "time_since_job_start": 1273.5969121456146}
{"epoch": 0, "iteration": 247, "train_loss": 0.9619136452674866, "trained_tokens": 11548992, "valid_loss": null, "perplexity": null, "time_since_job_start": 1277.6968202590942}
{"epoch": 0, "iteration": 248, "train_loss": 0.9760681390762329, "trained_tokens": 11598056, "valid_loss": null, "perplexity": null, "time_since_job_start": 1282.0912334918976}
{"epoch": 0, "iteration": 249, "train_loss": 0.9212368726730347, "trained_tokens": 11644048, "valid_loss": null, "perplexity": null, "time_since_job_start": 1286.0418934822083}
{"epoch": 0, "iteration": 250, "train_loss": 0.9492572546005249, "trained_tokens": 11695152, "valid_loss": null, "perplexity": null, "time_since_job_start": 1290.4588372707367}
{"epoch": 0, "iteration": 251, "train_loss": 0.9554206132888794, "trained_tokens": 11742864, "valid_loss": null, "perplexity": null, "time_since_job_start": 1294.5377297401428}
{"epoch": 0, "iteration": 252, "train_loss": 0.921414852142334, "trained_tokens": 11789432, "valid_loss": null, "perplexity": null, "time_since_job_start": 1298.555787563324}
{"epoch": 0, "iteration": 253, "train_loss": 0.9507285356521606, "trained_tokens": 11836712, "valid_loss": null, "perplexity": null, "time_since_job_start": 1302.8780376911163}
{"epoch": 0, "iteration": 254, "train_loss": 0.9489372372627258, "trained_tokens": 11883144, "valid_loss": null, "perplexity": null, "time_since_job_start": 1307.2316718101501}
{"epoch": 0, "iteration": 255, "train_loss": 0.950923502445221, "trained_tokens": 11929248, "valid_loss": null, "perplexity": null, "time_since_job_start": 1310.9980356693268}
{"epoch": 0, "iteration": 256, "train_loss": 0.9617209434509277, "trained_tokens": 11978272, "valid_loss": null, "perplexity": null, "time_since_job_start": 1315.3919093608856}
{"epoch": 0, "iteration": 257, "train_loss": 0.946367621421814, "trained_tokens": 12026224, "valid_loss": null, "perplexity": null, "time_since_job_start": 1319.413375377655}
{"epoch": 0, "iteration": 258, "train_loss": 0.9253612160682678, "trained_tokens": 12072720, "valid_loss": null, "perplexity": null, "time_since_job_start": 1323.7779014110565}
{"epoch": 0, "iteration": 259, "train_loss": 0.9457186460494995, "trained_tokens": 12119472, "valid_loss": null, "perplexity": null, "time_since_job_start": 1328.1302845478058}
{"epoch": 0, "iteration": 260, "train_loss": 0.9310272932052612, "trained_tokens": 12165992, "valid_loss": null, "perplexity": null, "time_since_job_start": 1332.5175125598907}
{"epoch": 0, "iteration": 261, "train_loss": 0.9503007531166077, "trained_tokens": 12215624, "valid_loss": null, "perplexity": null, "time_since_job_start": 1336.9321510791779}
{"epoch": 0, "iteration": 262, "train_loss": 0.9465148448944092, "trained_tokens": 12263504, "valid_loss": null, "perplexity": null, "time_since_job_start": 1341.3290300369263}
{"epoch": 0, "iteration": 263, "train_loss": 0.9566612243652344, "trained_tokens": 12311376, "valid_loss": null, "perplexity": null, "time_since_job_start": 1345.3945126533508}
{"epoch": 0, "iteration": 264, "train_loss": 0.9732366800308228, "trained_tokens": 12357768, "valid_loss": null, "perplexity": null, "time_since_job_start": 1349.4520716667175}
{"epoch": 0, "iteration": 265, "train_loss": 0.9472500085830688, "trained_tokens": 12405816, "valid_loss": null, "perplexity": null, "time_since_job_start": 1353.8033311367035}
{"epoch": 0, "iteration": 266, "train_loss": 0.9468222260475159, "trained_tokens": 12452072, "valid_loss": null, "perplexity": null, "time_since_job_start": 1358.043868780136}
{"epoch": 0, "iteration": 267, "train_loss": 0.9425835013389587, "trained_tokens": 12497960, "valid_loss": null, "perplexity": null, "time_since_job_start": 1361.8130543231964}
{"epoch": 0, "iteration": 268, "train_loss": 0.9412144422531128, "trained_tokens": 12544432, "valid_loss": null, "perplexity": null, "time_since_job_start": 1366.1624970436096}
{"epoch": 0, "iteration": 269, "train_loss": 0.9401108026504517, "trained_tokens": 12594472, "valid_loss": null, "perplexity": null, "time_since_job_start": 1370.567850112915}
{"epoch": 0, "iteration": 270, "train_loss": 0.9273020029067993, "trained_tokens": 12639408, "valid_loss": null, "perplexity": null, "time_since_job_start": 1374.3317799568176}
{"epoch": 0, "iteration": 271, "train_loss": 0.9387463331222534, "trained_tokens": 12685288, "valid_loss": null, "perplexity": null, "time_since_job_start": 1378.3564574718475}
{"epoch": 0, "iteration": 272, "train_loss": 0.936293363571167, "trained_tokens": 12732752, "valid_loss": null, "perplexity": null, "time_since_job_start": 1382.6489205360413}
{"epoch": 0, "iteration": 273, "train_loss": 0.9316617846488953, "trained_tokens": 12779728, "valid_loss": null, "perplexity": null, "time_since_job_start": 1387.0424828529358}
{"epoch": 0, "iteration": 274, "train_loss": 0.9392356872558594, "trained_tokens": 12826880, "valid_loss": null, "perplexity": null, "time_since_job_start": 1391.203041791916}
{"epoch": 0, "iteration": 275, "train_loss": 0.9652104377746582, "trained_tokens": 12874856, "valid_loss": null, "perplexity": null, "time_since_job_start": 1395.492948293686}
{"epoch": 0, "iteration": 276, "train_loss": 0.9522521495819092, "trained_tokens": 12921936, "valid_loss": null, "perplexity": null, "time_since_job_start": 1399.5923132896423}
{"epoch": 0, "iteration": 277, "train_loss": 0.9510942697525024, "trained_tokens": 12969352, "valid_loss": null, "perplexity": null, "time_since_job_start": 1403.9716396331787}
{"epoch": 0, "iteration": 278, "train_loss": 0.9618430137634277, "trained_tokens": 13016104, "valid_loss": null, "perplexity": null, "time_since_job_start": 1408.0626401901245}
{"epoch": 0, "iteration": 279, "train_loss": 0.9424797296524048, "trained_tokens": 13061464, "valid_loss": null, "perplexity": null, "time_since_job_start": 1411.9573788642883}
{"epoch": 0, "iteration": 280, "train_loss": 0.964227020740509, "trained_tokens": 13109392, "valid_loss": null, "perplexity": null, "time_since_job_start": 1416.3130142688751}
{"epoch": 0, "iteration": 281, "train_loss": 0.9447520971298218, "trained_tokens": 13157080, "valid_loss": null, "perplexity": null, "time_since_job_start": 1420.6725392341614}
{"epoch": 0, "iteration": 282, "train_loss": 0.9427496790885925, "trained_tokens": 13206880, "valid_loss": null, "perplexity": null, "time_since_job_start": 1425.0862152576447}
{"epoch": 0, "iteration": 283, "train_loss": 0.9489904642105103, "trained_tokens": 13253640, "valid_loss": null, "perplexity": null, "time_since_job_start": 1429.542875289917}
{"epoch": 0, "iteration": 284, "train_loss": 0.9766574501991272, "trained_tokens": 13301872, "valid_loss": null, "perplexity": null, "time_since_job_start": 1433.9161546230316}
{"epoch": 0, "iteration": 285, "train_loss": 0.9632338881492615, "trained_tokens": 13350360, "valid_loss": null, "perplexity": null, "time_since_job_start": 1438.3154203891754}
{"epoch": 0, "iteration": 286, "train_loss": 0.9389750957489014, "trained_tokens": 13396560, "valid_loss": null, "perplexity": null, "time_since_job_start": 1442.7062833309174}
{"epoch": 0, "iteration": 287, "train_loss": 0.9518765211105347, "trained_tokens": 13443456, "valid_loss": null, "perplexity": null, "time_since_job_start": 1447.097041130066}
{"epoch": 0, "iteration": 288, "train_loss": 0.9568557739257812, "trained_tokens": 13490912, "valid_loss": null, "perplexity": null, "time_since_job_start": 1451.2660493850708}
{"epoch": 0, "iteration": 289, "train_loss": 0.9615598320960999, "trained_tokens": 13537856, "valid_loss": null, "perplexity": null, "time_since_job_start": 1455.9729163646698}
{"epoch": 0, "iteration": 290, "train_loss": 0.9279730319976807, "trained_tokens": 13584512, "valid_loss": null, "perplexity": null, "time_since_job_start": 1460.1833233833313}
{"epoch": 0, "iteration": 291, "train_loss": 0.9258472919464111, "trained_tokens": 13632368, "valid_loss": null, "perplexity": null, "time_since_job_start": 1464.5652341842651}
{"epoch": 0, "iteration": 292, "train_loss": 0.9342167973518372, "trained_tokens": 13678640, "valid_loss": null, "perplexity": null, "time_since_job_start": 1468.605447769165}
{"epoch": 0, "iteration": 293, "train_loss": 0.933361291885376, "trained_tokens": 13725176, "valid_loss": null, "perplexity": null, "time_since_job_start": 1472.6662867069244}
{"epoch": 0, "iteration": 294, "train_loss": 0.9625687003135681, "trained_tokens": 13772600, "valid_loss": null, "perplexity": null, "time_since_job_start": 1477.0401742458344}
{"epoch": 0, "iteration": 295, "train_loss": 0.9821858406066895, "trained_tokens": 13821264, "valid_loss": null, "perplexity": null, "time_since_job_start": 1481.1866681575775}
{"epoch": 0, "iteration": 296, "train_loss": 0.9470242261886597, "trained_tokens": 13865656, "valid_loss": null, "perplexity": null, "time_since_job_start": 1484.9350600242615}
{"epoch": 0, "iteration": 297, "train_loss": 0.9332404136657715, "trained_tokens": 13911336, "valid_loss": null, "perplexity": null, "time_since_job_start": 1489.2728848457336}
{"epoch": 0, "iteration": 298, "train_loss": 0.9538326263427734, "trained_tokens": 13957072, "valid_loss": null, "perplexity": null, "time_since_job_start": 1493.2315146923065}
{"epoch": 0, "iteration": 299, "train_loss": 1.0281158685684204, "trained_tokens": 14008968, "valid_loss": null, "perplexity": null, "time_since_job_start": 1497.650646686554}
{"epoch": 0, "iteration": 300, "train_loss": 0.993752121925354, "trained_tokens": 14054936, "valid_loss": null, "perplexity": null, "time_since_job_start": 1501.4118225574493}
{"epoch": 0, "iteration": 301, "train_loss": 0.9847540855407715, "trained_tokens": 14102088, "valid_loss": null, "perplexity": null, "time_since_job_start": 1505.8099479675293}
{"epoch": 0, "iteration": 302, "train_loss": 0.9909994602203369, "trained_tokens": 14148776, "valid_loss": null, "perplexity": null, "time_since_job_start": 1510.017059803009}
{"epoch": 0, "iteration": 303, "train_loss": 0.984935462474823, "trained_tokens": 14198656, "valid_loss": null, "perplexity": null, "time_since_job_start": 1514.4031100273132}
{"epoch": 0, "iteration": 304, "train_loss": 0.9878885746002197, "trained_tokens": 14248384, "valid_loss": null, "perplexity": null, "time_since_job_start": 1518.7732198238373}
{"epoch": 0, "iteration": 305, "train_loss": 0.9670425653457642, "trained_tokens": 14296552, "valid_loss": null, "perplexity": null, "time_since_job_start": 1523.1312408447266}
{"epoch": 0, "iteration": 306, "train_loss": 1.0130884647369385, "trained_tokens": 14346240, "valid_loss": null, "perplexity": null, "time_since_job_start": 1527.5116212368011}
{"epoch": 0, "iteration": 307, "train_loss": 1.0057965517044067, "trained_tokens": 14396608, "valid_loss": null, "perplexity": null, "time_since_job_start": 1531.699566602707}
{"epoch": 0, "iteration": 308, "train_loss": 0.985687255859375, "trained_tokens": 14443672, "valid_loss": null, "perplexity": null, "time_since_job_start": 1536.0838589668274}
{"epoch": 0, "iteration": 309, "train_loss": 1.0139925479888916, "trained_tokens": 14492912, "valid_loss": null, "perplexity": null, "time_since_job_start": 1540.444931268692}
{"epoch": 0, "iteration": 310, "train_loss": 1.011357307434082, "trained_tokens": 14541192, "valid_loss": null, "perplexity": null, "time_since_job_start": 1544.820770740509}
{"epoch": 0, "iteration": 311, "train_loss": 1.00661039352417, "trained_tokens": 14589032, "valid_loss": null, "perplexity": null, "time_since_job_start": 1549.0867567062378}
{"epoch": 0, "iteration": 312, "train_loss": 0.9975116848945618, "trained_tokens": 14638848, "valid_loss": null, "perplexity": null, "time_since_job_start": 1553.4071345329285}
{"epoch": 0, "iteration": 313, "train_loss": 0.9971510767936707, "trained_tokens": 14687984, "valid_loss": null, "perplexity": null, "time_since_job_start": 1557.794359922409}
{"epoch": 0, "iteration": 314, "train_loss": 0.9913002848625183, "trained_tokens": 14735624, "valid_loss": null, "perplexity": null, "time_since_job_start": 1561.8365530967712}
{"epoch": 0, "iteration": 315, "train_loss": 0.9983183741569519, "trained_tokens": 14783480, "valid_loss": null, "perplexity": null, "time_since_job_start": 1566.0419437885284}
{"epoch": 0, "iteration": 316, "train_loss": 0.9985114336013794, "trained_tokens": 14831520, "valid_loss": null, "perplexity": null, "time_since_job_start": 1570.3883090019226}
{"epoch": 0, "iteration": 317, "train_loss": 0.9963041543960571, "trained_tokens": 14879552, "valid_loss": null, "perplexity": null, "time_since_job_start": 1574.81787109375}
{"epoch": 0, "iteration": 318, "train_loss": 1.0060116052627563, "trained_tokens": 14927648, "valid_loss": null, "perplexity": null, "time_since_job_start": 1578.9049654006958}
{"epoch": 0, "iteration": 319, "train_loss": 1.0118510723114014, "trained_tokens": 14977256, "valid_loss": null, "perplexity": null, "time_since_job_start": 1583.3077039718628}
{"epoch": 0, "iteration": 320, "train_loss": 1.0074167251586914, "trained_tokens": 15028504, "valid_loss": null, "perplexity": null, "time_since_job_start": 1587.663869380951}
{"epoch": 0, "iteration": 321, "train_loss": 0.9902176856994629, "trained_tokens": 15076280, "valid_loss": null, "perplexity": null, "time_since_job_start": 1592.046819448471}
{"epoch": 0, "iteration": 322, "train_loss": 0.9830139875411987, "trained_tokens": 15093299, "valid_loss": 0.8249631524085999, "perplexity": 2.281796685068326, "time_since_job_start": 1696.9842269420624}