Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 20 additions & 27 deletions sei-cosmos/server/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ package server

import (
"context"
"errors"
"fmt"
"net/http"
"sync"
//nolint:gosec,G108
_ "net/http/pprof"
"os"
Expand Down Expand Up @@ -175,9 +177,6 @@ is performed. Note, when enabled, gRPC will also be automatically enabled.
tracerProviderOptions = []trace.TracerProviderOption{}
}

// amino is needed here for backwards compatibility of REST routes
exitCode := RestartErrorCode

serverCtx.Logger.Info("Creating node metrics provider")
nodeMetricsProvider := node.DefaultMetricsProvider(serverCtx.Config.Instrumentation)(clientCtx.ChainID)

Expand All @@ -193,33 +192,21 @@ is performed. Note, when enabled, gRPC will also be automatically enabled.
}
}

restartCoolDownDuration := time.Second * time.Duration(serverCtx.Config.SelfRemediation.RestartCooldownSeconds)
// Set the first restart time to be now - restartCoolDownDuration so that the first restart can trigger whenever
canRestartAfter := time.Now().Add(-restartCoolDownDuration)

serverCtx.Logger.Info("Starting Process")
for {
err = startInProcess(
err := startInProcess(
serverCtx,
clientCtx,
appCreator,
tracerProviderOptions,
nodeMetricsProvider,
apiMetrics,
canRestartAfter,
)
errCode, ok := err.(ErrorCode)
exitCode = errCode.Code
if !ok {
if !errors.Is(err, ErrShouldRestart) {
return err
}
if exitCode != RestartErrorCode {
break
}
serverCtx.Logger.Info("restarting node...")
canRestartAfter = time.Now().Add(restartCoolDownDuration)
}
return nil
},
}

Expand Down Expand Up @@ -283,7 +270,6 @@ func startInProcess(
tracerProviderOptions []trace.TracerProviderOption,
nodeMetricsProvider *node.NodeMetrics,
apiMetrics *telemetry.Metrics,
canRestartAfter time.Time,
) error {
cfg := ctx.Config
home := cfg.RootDir
Expand Down Expand Up @@ -330,13 +316,20 @@ func startInProcess(
}
app := appCreator(ctx.Logger, db, traceWriter, ctx.Config, ctx.Viper)

var (
tmNode service.Service
restartCh chan struct{}
gRPCOnly = ctx.Viper.GetBool(flagGRPCOnly)
)
gRPCOnly := ctx.Viper.GetBool(flagGRPCOnly)
var tmNode service.Service

restartCh = make(chan struct{})
var restartMtx sync.Mutex
restartCh := make(chan struct{})
restartEvent := func() {
restartMtx.Lock()
defer restartMtx.Unlock()
select {
case <-restartCh:
default:
close(restartCh)
}
}

if gRPCOnly {
ctx.Logger.Info("starting node in gRPC only mode; Tendermint is disabled")
Expand All @@ -361,7 +354,7 @@ func startInProcess(
goCtx,
ctx.Config,
ctx.Logger,
restartCh,
restartEvent,
abciclient.NewLocalClient(ctx.Logger, app),
gen,
tracerProviderOptions,
Expand Down Expand Up @@ -434,7 +427,7 @@ func startInProcess(
// we do not need to start Rosetta or handle any Tendermint related processes.
if gRPCOnly {
// wait for signal capture and gracefully return
return WaitForQuitSignals(ctx, restartCh, canRestartAfter)
return WaitForQuitSignals(goCtx, restartCh)
}

var rosettaSrv crgserver.Server
Expand Down Expand Up @@ -507,5 +500,5 @@ func startInProcess(
}()

// wait for signal capture and gracefully return
return WaitForQuitSignals(ctx, restartCh, canRestartAfter)
return WaitForQuitSignals(goCtx, restartCh)
}
36 changes: 12 additions & 24 deletions sei-cosmos/server/util.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package server

import (
"context"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -39,8 +40,7 @@ import (
// a command's Context.
const ServerContextKey = sdk.ContextKey("server.context")

// Error code reserved for signalled
const RestartErrorCode = 100
var ErrShouldRestart = errors.New("node should be restarted")

// server context
type Context struct {
Expand Down Expand Up @@ -135,7 +135,7 @@ func InterceptConfigs(cmd *cobra.Command) (*tmcfg.Config, error) {
// is used to read and parse the application configuration. Command handlers can
// fetch the server Context to get the Tendermint configuration or to get access
// to Viper.
func InterceptConfigsPreRunHandler(cmd *cobra.Command, customAppConfigTemplate string, customAppConfig interface{}) error {
func InterceptConfigsPreRunHandler(cmd *cobra.Command, customAppConfigTemplate string, customAppConfig any) error {
serverCtx := NewDefaultContext()

// Get the executable name and configure the viper instance so that environmental
Expand Down Expand Up @@ -221,7 +221,7 @@ func SetCmdServerContext(cmd *cobra.Command, serverCtx *Context) error {
// configuration file. The Tendermint configuration file is parsed given a root
// Viper object, whereas the application is parsed with the private package-aware
// viperCfg object.
func interceptConfigs(rootViper *viper.Viper, customAppTemplate string, customConfig interface{}) (*tmcfg.Config, error) {
func interceptConfigs(rootViper *viper.Viper, customAppTemplate string, customConfig any) (*tmcfg.Config, error) {
rootDir := rootViper.GetString(flags.FlagHome)
configPath := filepath.Join(rootDir, "config")
tmCfgFile := filepath.Join(configPath, "config.toml")
Expand Down Expand Up @@ -411,26 +411,14 @@ func TrapSignal(cleanupFunc func()) {
}

// WaitForQuitSignals waits for SIGINT and SIGTERM and returns.
func WaitForQuitSignals(ctx *Context, restartCh chan struct{}, canRestartAfter time.Time) ErrorCode {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
if restartCh != nil {
for {
select {
case sig := <-sigs:
return ErrorCode{Code: int(sig.(syscall.Signal)) + 128}
case <-restartCh:
// If it's in the restart cooldown period
if time.Now().Before(canRestartAfter) {
ctx.Logger.Info("Restarting too frequently, can only restart after %s", canRestartAfter)
continue
}
return ErrorCode{Code: RestartErrorCode}
}
}
} else {
sig := <-sigs
return ErrorCode{Code: int(sig.(syscall.Signal)) + 128}
func WaitForQuitSignals(ctx context.Context, restartCh chan struct{}) error {
ctx, cancel := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
defer cancel()
select {
case <-ctx.Done():
return nil
case <-restartCh: // blocks forever on a nil channel
return ErrShouldRestart
}
}

Expand Down
70 changes: 13 additions & 57 deletions sei-cosmos/server/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"errors"
"fmt"
"os"
"os/signal"
"path"
"path/filepath"
"strings"
Expand Down Expand Up @@ -103,8 +102,7 @@ func TestInterceptConfigsPreRunHandlerReadsConfigToml(t *testing.T) {
t.Fatalf("creating config.toml file failed: %v", err)
}

_, err = writer.WriteString(fmt.Sprintf("db-backend = '%s'\n", testDbBackend))
if err != nil {
if _, err := fmt.Fprintf(writer, "db-backend = '%s'\n", testDbBackend); err != nil {
t.Fatalf("Failed writing string to config.toml: %v", err)
}

Expand Down Expand Up @@ -144,8 +142,7 @@ func TestInterceptConfigsPreRunHandlerReadsAppToml(t *testing.T) {
t.Fatalf("creating app.toml file failed: %v", err)
}

_, err = writer.WriteString(fmt.Sprintf("halt-time = %d\n", testHaltTime))
if err != nil {
if _, err := fmt.Fprintf(writer, "halt-time = %d\n", testHaltTime); err != nil {
t.Fatalf("Failed writing string to app.toml: %v", err)
}

Expand Down Expand Up @@ -308,8 +305,7 @@ func (v precedenceCommon) setAll(t *testing.T, setFlag *string, setEnvVar *strin
t.Fatalf("creating config.toml file failed: %v", err)
}

_, err = writer.WriteString(fmt.Sprintf("[rpc]\nladdr = \"%s\"\n", *setConfigFile))
if err != nil {
if _, err := fmt.Fprintf(writer, "[rpc]\nladdr = \"%s\"\n", *setConfigFile); err != nil {
t.Fatalf("Failed writing string to config.toml: %v", err)
}

Expand Down Expand Up @@ -407,63 +403,28 @@ func TestInterceptConfigsWithBadPermissions(t *testing.T) {
}

func TestWaitForQuitSignals(t *testing.T) {
t.Run("WithRestartChannelAndCanRestartAfterNotReached", func(t *testing.T) {
t.Run("WithRestartChannelAndCanRestart", func(t *testing.T) {
restartCh := make(chan struct{})
go func() {
time.Sleep(100 * time.Millisecond)
restartCh <- struct{}{}
}()

go func() {
time.Sleep(200 * time.Millisecond)
syscall.Kill(syscall.Getpid(), syscall.SIGTERM)
}()

errCode := server.WaitForQuitSignals(
&server.Context{Logger: log.NewNopLogger()},
restartCh,
time.Now().Add(500*time.Millisecond),
)
expectedCode := int(syscall.SIGTERM) + 128
if errCode.Code != expectedCode {
t.Errorf("Expected error code %d, got %d", expectedCode, errCode.Code)
}
})

t.Run("WithRestartChannelAndCanRestartAfterReached", func(t *testing.T) {
restartCh := make(chan struct{})
go func() {
time.Sleep(100 * time.Millisecond)
restartCh <- struct{}{}
}()

errCode := server.WaitForQuitSignals(
&server.Context{Logger: log.NewNopLogger()},
restartCh,
time.Now().Add(-100*time.Millisecond),
)
if errCode.Code != server.RestartErrorCode {
t.Errorf("Expected error code %d, got %d", server.RestartErrorCode, errCode.Code)
err := server.WaitForQuitSignals(t.Context(), restartCh)
if !errors.Is(err, server.ErrShouldRestart) {
t.Errorf("Expected ErrShouldRestart, got %v", err)
}
})

t.Run("WithSIGINT", func(t *testing.T) {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT)

go func() {
time.Sleep(100 * time.Millisecond)
syscall.Kill(syscall.Getpid(), syscall.SIGINT)
}()

errCode := server.WaitForQuitSignals(
&server.Context{Logger: log.NewNopLogger()},
make(chan struct{}),
time.Now(),
)
expectedCode := int(syscall.SIGINT) + 128
if errCode.Code != expectedCode {
t.Errorf("Expected error code %d, got %d", expectedCode, errCode.Code)
err := server.WaitForQuitSignals(t.Context(), make(chan struct{}))
if err != nil {
t.Fatal(err)
}
})

Expand All @@ -473,14 +434,9 @@ func TestWaitForQuitSignals(t *testing.T) {
syscall.Kill(syscall.Getpid(), syscall.SIGTERM)
}()

errCode := server.WaitForQuitSignals(
&server.Context{Logger: log.NewNopLogger()},
make(chan struct{}),
time.Now(),
)
expectedCode := int(syscall.SIGTERM) + 128
if errCode.Code != expectedCode {
t.Errorf("Expected error code %d, got %d", expectedCode, errCode.Code)
err := server.WaitForQuitSignals(t.Context(), make(chan struct{}))
if err != nil {
t.Fatal(err)
}
})
}
14 changes: 4 additions & 10 deletions sei-cosmos/testutil/network/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package network

import (
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
"time"

Expand Down Expand Up @@ -56,7 +56,7 @@ func startInProcess(cfg Config, val *Validator) error {
val.GoCtx,
tmCfg,
logger,
make(chan struct{}),
func() {},
abciclient.NewLocalClient(logger, app),
defaultGensis,
[]trace.TracerProviderOption{},
Expand Down Expand Up @@ -211,15 +211,9 @@ func writeFile(name string, dir string, contents []byte) error {
writePath := filepath.Join(dir)
file := filepath.Join(writePath, name)

err := tmos.EnsureDir(writePath, 0755)
if err != nil {
return err
}

err = ioutil.WriteFile(file, contents, 0644)
if err != nil {
if err := tmos.EnsureDir(writePath, 0755); err != nil {
return err
}

return nil
return os.WriteFile(file, contents, 0644)
}
Loading
Loading