mirror of
https://github.com/1f349/dendrite.git
synced 2024-11-09 22:42:58 +00:00
Track reasons why the process is in a degraded state
This commit is contained in:
parent
a767102f8a
commit
3da182212e
@ -18,6 +18,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net"
|
"net"
|
||||||
@ -467,8 +468,13 @@ func (b *BaseDendrite) SetupAndServeHTTP(
|
|||||||
w.WriteHeader(200)
|
w.WriteHeader(200)
|
||||||
})
|
})
|
||||||
b.DendriteAdminMux.HandleFunc("/monitor/health", func(w http.ResponseWriter, r *http.Request) {
|
b.DendriteAdminMux.HandleFunc("/monitor/health", func(w http.ResponseWriter, r *http.Request) {
|
||||||
if b.ProcessContext.IsDegraded() {
|
if isDegraded, reasons := b.ProcessContext.IsDegraded(); isDegraded {
|
||||||
w.WriteHeader(503)
|
w.WriteHeader(503)
|
||||||
|
_ = json.NewEncoder(w).Encode(struct {
|
||||||
|
Warnings []string `json:"warnings"`
|
||||||
|
}{
|
||||||
|
Warnings: reasons,
|
||||||
|
})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
w.WriteHeader(200)
|
w.WriteHeader(200)
|
||||||
|
@ -169,9 +169,9 @@ func setupNATS(process *process.ProcessContext, cfg *config.JetStream, nc *natsc
|
|||||||
// We've managed to add the stream in memory. What's on the
|
// We've managed to add the stream in memory. What's on the
|
||||||
// disk will be left alone, but our ability to recover from a
|
// disk will be left alone, but our ability to recover from a
|
||||||
// future crash will be limited. Yell about it.
|
// future crash will be limited. Yell about it.
|
||||||
sentry.CaptureException(fmt.Errorf("Stream %q is running in-memory; this may be due to data corruption in the JetStream storage directory, investigate as soon as possible", namespaced.Name))
|
err := fmt.Errorf("Stream %q is running in-memory; this may be due to data corruption in the JetStream storage directory", namespaced.Name)
|
||||||
logrus.Warn("Stream is running in-memory; this may be due to data corruption in the JetStream storage directory, investigate as soon as possible")
|
sentry.CaptureException(err)
|
||||||
process.Degraded()
|
process.Degraded(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,19 +2,18 @@ package process
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/getsentry/sentry-go"
|
"github.com/getsentry/sentry-go"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"go.uber.org/atomic"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type ProcessContext struct {
|
type ProcessContext struct {
|
||||||
|
mu sync.RWMutex
|
||||||
wg *sync.WaitGroup // used to wait for components to shutdown
|
wg *sync.WaitGroup // used to wait for components to shutdown
|
||||||
ctx context.Context // cancelled when Stop is called
|
ctx context.Context // cancelled when Stop is called
|
||||||
shutdown context.CancelFunc // shut down Dendrite
|
shutdown context.CancelFunc // shut down Dendrite
|
||||||
degraded atomic.Bool
|
degraded map[string]struct{} // reasons why the process is degraded
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewProcessContext() *ProcessContext {
|
func NewProcessContext() *ProcessContext {
|
||||||
@ -50,13 +49,25 @@ func (b *ProcessContext) WaitForComponentsToFinish() {
|
|||||||
b.wg.Wait()
|
b.wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *ProcessContext) Degraded() {
|
func (b *ProcessContext) Degraded(err error) {
|
||||||
if b.degraded.CompareAndSwap(false, true) {
|
b.mu.Lock()
|
||||||
logrus.Warn("Dendrite is running in a degraded state")
|
defer b.mu.Unlock()
|
||||||
sentry.CaptureException(fmt.Errorf("Process is running in a degraded state"))
|
if _, ok := b.degraded[err.Error()]; !ok {
|
||||||
|
logrus.WithError(err).Warn("Dendrite has entered a degraded state")
|
||||||
|
sentry.CaptureException(err)
|
||||||
|
b.degraded[err.Error()] = struct{}{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *ProcessContext) IsDegraded() bool {
|
func (b *ProcessContext) IsDegraded() (bool, []string) {
|
||||||
return b.degraded.Load()
|
b.mu.RLock()
|
||||||
|
defer b.mu.RUnlock()
|
||||||
|
if len(b.degraded) == 0 {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
reasons := make([]string, 0, len(b.degraded))
|
||||||
|
for reason := range b.degraded {
|
||||||
|
reasons = append(reasons, reason)
|
||||||
|
}
|
||||||
|
return true, reasons
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user