1
1
package fluent
2
2
3
3
import (
4
+ "context"
4
5
"encoding/json"
5
6
"errors"
6
7
"fmt"
@@ -15,8 +16,9 @@ import (
15
16
"bytes"
16
17
"encoding/base64"
17
18
"encoding/binary"
18
- "github.com/tinylib/msgp/msgp"
19
19
"math/rand"
20
+
21
+ "github.com/tinylib/msgp/msgp"
20
22
)
21
23
22
24
const (
@@ -84,6 +86,7 @@ type msgToSend struct {
84
86
type Fluent struct {
85
87
Config
86
88
89
+ ready chan bool
87
90
stopRunning chan bool
88
91
pending chan * msgToSend
89
92
wg sync.WaitGroup
@@ -130,14 +133,16 @@ func New(config Config) (f *Fluent, err error) {
130
133
}
131
134
if config .Async {
132
135
f = & Fluent {
133
- Config : config ,
134
- pending : make (chan * msgToSend , config .BufferLimit ),
136
+ Config : config ,
137
+ ready : make (chan bool ),
138
+ stopRunning : make (chan bool ),
139
+ pending : make (chan * msgToSend , config .BufferLimit ),
135
140
}
136
141
f .wg .Add (1 )
137
142
go f .run ()
138
143
} else {
139
144
f = & Fluent {Config : config }
140
- err = f .connect ()
145
+ err = f .connect (context . Background () )
141
146
}
142
147
return
143
148
}
@@ -339,38 +344,111 @@ func (f *Fluent) close(c net.Conn) {
339
344
}
340
345
341
346
// connect establishes a new connection using the specified transport.
342
- func (f * Fluent ) connect () (err error ) {
347
+ func (f * Fluent ) connect (ctx context.Context ) (err error ) {
348
+ f .muconn .Lock ()
349
+ defer f .muconn .Unlock ()
350
+
351
+ dialer := net.Dialer {Timeout : f .Config .Timeout }
343
352
344
353
switch f .Config .FluentNetwork {
345
354
case "tcp" :
346
- f .conn , err = net .DialTimeout (f .Config .FluentNetwork , f .Config .FluentHost + ":" + strconv .Itoa (f .Config .FluentPort ), f .Config .Timeout )
355
+ f .conn , err = dialer .DialContext (ctx ,
356
+ f .Config .FluentNetwork ,
357
+ f .Config .FluentHost + ":" + strconv .Itoa (f .Config .FluentPort ))
347
358
case "unix" :
348
- f .conn , err = net .DialTimeout (f .Config .FluentNetwork , f .Config .FluentSocketPath , f .Config .Timeout )
359
+ f .conn , err = dialer .DialContext (ctx ,
360
+ f .Config .FluentNetwork ,
361
+ f .Config .FluentSocketPath )
349
362
default :
350
363
err = NewErrUnknownNetwork (f .Config .FluentNetwork )
351
364
}
365
+
352
366
return err
353
367
}
354
368
355
- func (f * Fluent ) run () {
356
- drainEvents := false
357
- var emitEventDrainMsg sync.Once
358
- for {
369
+ func (f * Fluent ) connectAsync (ctx context.Context , stopAsyncConnect <- chan bool ) {
370
+ f .wg .Add (1 )
371
+ defer f .wg .Done ()
372
+
373
+ waiter := time .After (time .Duration (0 ))
374
+ for i := 0 ; i < f .Config .MaxRetry ; i ++ {
359
375
select {
360
- case entry , ok := <- f .pending :
361
- if ! ok {
362
- f .wg .Done ()
363
- return
376
+ case <- waiter :
377
+ if f .conn != nil {
378
+ f .ready <- true
364
379
}
365
- if drainEvents {
366
- emitEventDrainMsg .Do (func () { fmt .Fprintf (os .Stderr , "[%s] Discarding queued events...\n " , time .Now ().Format (time .RFC3339 )) })
367
- continue
380
+
381
+ err := f .connect (ctx )
382
+ if err == nil {
383
+ f .ready <- true
384
+ break
385
+ }
386
+
387
+ if _ , ok := err .(* ErrUnknownNetwork ); ok {
388
+ // No need to retry on unknown network error. Thus false is passed
389
+ // to ready channel to let the other end drain the message queue.
390
+ f .ready <- false
391
+ break
368
392
}
369
- err := f .write (entry )
370
- if err != nil {
371
- fmt .Fprintf (os .Stderr , "[%s] Unable to send logs to fluentd, reconnecting...\n " , time .Now ().Format (time .RFC3339 ))
393
+
394
+ waitTime := f .Config .RetryWait * e (defaultReconnectWaitIncreRate , float64 (i - 1 ))
395
+ if waitTime > f .Config .MaxRetryWait {
396
+ waitTime = f .Config .MaxRetryWait
372
397
}
398
+
399
+ waiter = time .After (time .Duration (waitTime ) * time .Millisecond )
400
+ case <- stopAsyncConnect :
401
+ break
373
402
}
403
+ }
404
+ }
405
+
406
+ func (f * Fluent ) run () {
407
+ drainEvents := false
408
+ var emitEventDrainMsg sync.Once
409
+
410
+ // First we need to wait for the connection to become ready. We cannot
411
+ // initialize the connection lazily (eg. when the first message is
412
+ // received) because it'd be done in the first for-select iteration on
413
+ // f.pending and this would block the select without letting the chance to
414
+ // the select on f.stopRunning to signal its end to this goroutine.
415
+ ctx , cancelDialing := context .WithCancel (context .Background ())
416
+ stopAsyncConnect := make (chan bool )
417
+ go f .connectAsync (ctx , stopAsyncConnect )
418
+ select {
419
+ case <- f .stopRunning :
420
+ drainEvents = true
421
+ // Stop any connection dialing and then tell connectAsync to stop
422
+ // trying to dial the connection. This has to be done in this
423
+ // specifc order to make sure connectAsync() is not blocking on the
424
+ // connection dialing.
425
+ cancelDialing ()
426
+ close (stopAsyncConnect )
427
+ break asyncConnect
428
+ case ready , ok := <- f .ready :
429
+ if ! ready || ! ok {
430
+ drainEvents = true
431
+ }
432
+ break asyncConnect
433
+ }
434
+
435
+ // At this point we can go ahead: the connection is either ready to use or
436
+ // drainEvents is true and thus all logs should be discarded.
437
+ for {
438
+ entry , ok := <- f.pending :
439
+ if ! ok {
440
+ f .wg .Done ()
441
+ return
442
+ }
443
+ if drainEvents {
444
+ emitEventDrainMsg .Do (func () { fmt .Fprintf (os .Stderr , "[%s] Discarding queued events...\n " , time .Now ().Format (time .RFC3339 )) })
445
+ continue
446
+ }
447
+ err := f .write (entry )
448
+ if err != nil {
449
+ fmt .Fprintf (os .Stderr , "[%s] Unable to send logs to fluentd, reconnecting...\n " , time .Now ().Format (time .RFC3339 ))
450
+ }
451
+
374
452
select {
375
453
case stopRunning , ok := <- f .stopRunning :
376
454
if stopRunning || ! ok {
@@ -389,31 +467,6 @@ func (f *Fluent) write(msg *msgToSend) error {
389
467
var c net.Conn
390
468
for i := 0 ; i < f .Config .MaxRetry ; i ++ {
391
469
c = f .conn
392
- // Connect if needed
393
- if c == nil {
394
- f .muconn .Lock ()
395
- if f .conn == nil {
396
- err := f .connect ()
397
- if err != nil {
398
- f .muconn .Unlock ()
399
-
400
- if _ , ok := err .(* ErrUnknownNetwork ); ok {
401
- // do not retry on unknown network error
402
- break
403
- }
404
- waitTime := f .Config .RetryWait * e (defaultReconnectWaitIncreRate , float64 (i - 1 ))
405
- if waitTime > f .Config .MaxRetryWait {
406
- waitTime = f .Config .MaxRetryWait
407
- }
408
- time .Sleep (time .Duration (waitTime ) * time .Millisecond )
409
- continue
410
- }
411
- }
412
- c = f .conn
413
- f .muconn .Unlock ()
414
- }
415
-
416
- // We're connected, write msg
417
470
t := f .Config .WriteTimeout
418
471
if time .Duration (0 ) < t {
419
472
c .SetWriteDeadline (time .Now ().Add (t ))
0 commit comments