-
Notifications
You must be signed in to change notification settings - Fork 9.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Run a separate in memory snapshot to reduce number of entries stored in raft memory storage #18825
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,6 +109,7 @@ const ( | |
readyPercentThreshold = 0.9 | ||
|
||
DowngradeEnabledPath = "/downgrade/enabled" | ||
memorySnapshotCount = 100 | ||
) | ||
|
||
var ( | ||
|
@@ -291,9 +292,10 @@ type EtcdServer struct { | |
clusterVersionChanged *notify.Notifier | ||
|
||
*AccessController | ||
// forceSnapshot can force snapshot be triggered after apply, independent of the snapshotCount. | ||
// forceDiskSnapshot can force snapshot be triggered after apply, independent of the snapshotCount. | ||
// Should only be set within apply code path. Used to force snapshot after cluster version downgrade. | ||
forceSnapshot bool | ||
// TODO: Replace with flush db in v3.7 assuming v3.6 bootstraps from db file. | ||
forceDiskSnapshot bool | ||
corruptionChecker CorruptionChecker | ||
} | ||
|
||
|
@@ -741,10 +743,11 @@ func (s *EtcdServer) ReportSnapshot(id uint64, status raft.SnapshotStatus) { | |
} | ||
|
||
type etcdProgress struct { | ||
confState raftpb.ConfState | ||
snapi uint64 | ||
appliedt uint64 | ||
appliedi uint64 | ||
confState raftpb.ConfState | ||
diskSnapshotIndex uint64 | ||
memorySnapshotIndex uint64 | ||
appliedt uint64 | ||
appliedi uint64 | ||
} | ||
|
||
// raftReadyHandler contains a set of EtcdServer operations to be called by raftNode, | ||
|
@@ -809,10 +812,11 @@ func (s *EtcdServer) run() { | |
s.r.start(rh) | ||
|
||
ep := etcdProgress{ | ||
confState: sn.Metadata.ConfState, | ||
snapi: sn.Metadata.Index, | ||
appliedt: sn.Metadata.Term, | ||
appliedi: sn.Metadata.Index, | ||
confState: sn.Metadata.ConfState, | ||
diskSnapshotIndex: sn.Metadata.Index, | ||
memorySnapshotIndex: sn.Metadata.Index, | ||
appliedt: sn.Metadata.Term, | ||
appliedi: sn.Metadata.Index, | ||
} | ||
|
||
defer func() { | ||
|
@@ -998,15 +1002,15 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, toApply *toApply) { | |
lg := s.Logger() | ||
lg.Info( | ||
"applying snapshot", | ||
zap.Uint64("current-snapshot-index", ep.snapi), | ||
zap.Uint64("current-snapshot-index", ep.diskSnapshotIndex), | ||
zap.Uint64("current-applied-index", ep.appliedi), | ||
zap.Uint64("incoming-leader-snapshot-index", toApply.snapshot.Metadata.Index), | ||
zap.Uint64("incoming-leader-snapshot-term", toApply.snapshot.Metadata.Term), | ||
) | ||
defer func() { | ||
lg.Info( | ||
"applied snapshot", | ||
zap.Uint64("current-snapshot-index", ep.snapi), | ||
zap.Uint64("current-snapshot-index", ep.diskSnapshotIndex), | ||
zap.Uint64("current-applied-index", ep.appliedi), | ||
zap.Uint64("incoming-leader-snapshot-index", toApply.snapshot.Metadata.Index), | ||
zap.Uint64("incoming-leader-snapshot-term", toApply.snapshot.Metadata.Term), | ||
|
@@ -1017,7 +1021,7 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, toApply *toApply) { | |
if toApply.snapshot.Metadata.Index <= ep.appliedi { | ||
lg.Panic( | ||
"unexpected leader snapshot from outdated index", | ||
zap.Uint64("current-snapshot-index", ep.snapi), | ||
zap.Uint64("current-snapshot-index", ep.diskSnapshotIndex), | ||
zap.Uint64("current-applied-index", ep.appliedi), | ||
zap.Uint64("incoming-leader-snapshot-index", toApply.snapshot.Metadata.Index), | ||
zap.Uint64("incoming-leader-snapshot-term", toApply.snapshot.Metadata.Term), | ||
|
@@ -1132,7 +1136,8 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, toApply *toApply) { | |
|
||
ep.appliedt = toApply.snapshot.Metadata.Term | ||
ep.appliedi = toApply.snapshot.Metadata.Index | ||
ep.snapi = ep.appliedi | ||
ep.diskSnapshotIndex = ep.appliedi | ||
ep.memorySnapshotIndex = ep.appliedi | ||
ep.confState = toApply.snapshot.Metadata.ConfState | ||
|
||
// As backends and implementations like alarmsStore changed, we need | ||
|
@@ -1188,31 +1193,37 @@ func (s *EtcdServer) applyEntries(ep *etcdProgress, apply *toApply) { | |
} | ||
|
||
func (s *EtcdServer) ForceSnapshot() { | ||
s.forceSnapshot = true | ||
s.forceDiskSnapshot = true | ||
} | ||
|
||
func (s *EtcdServer) triggerSnapshot(ep *etcdProgress) { | ||
if !s.shouldSnapshot(ep) { | ||
if !s.shouldSnapshotToDisk(ep) { | ||
if ep.appliedi > ep.memorySnapshotIndex+memorySnapshotCount { | ||
s.snapshotToMemory(ep.appliedi, ep.confState) | ||
s.compactRaftLog(ep.appliedi) | ||
ep.memorySnapshotIndex = ep.appliedi | ||
} | ||
return | ||
} | ||
//TODO: Remove disk snapshot in v3.7 | ||
lg := s.Logger() | ||
lg.Info( | ||
"triggering snapshot", | ||
zap.String("local-member-id", s.MemberID().String()), | ||
zap.Uint64("local-member-applied-index", ep.appliedi), | ||
zap.Uint64("local-member-snapshot-index", ep.snapi), | ||
zap.Uint64("local-member-snapshot-index", ep.diskSnapshotIndex), | ||
zap.Uint64("local-member-snapshot-count", s.Cfg.SnapshotCount), | ||
zap.Bool("snapshot-forced", s.forceSnapshot), | ||
zap.Bool("snapshot-forced", s.forceDiskSnapshot), | ||
) | ||
s.forceSnapshot = false | ||
s.forceDiskSnapshot = false | ||
|
||
s.snapshot(ep.appliedi, ep.confState) | ||
s.snapshotToDisk(ep.appliedi, ep.confState) | ||
s.compactRaftLog(ep.appliedi) | ||
ep.snapi = ep.appliedi | ||
ep.diskSnapshotIndex = ep.appliedi | ||
} | ||
|
||
func (s *EtcdServer) shouldSnapshot(ep *etcdProgress) bool { | ||
return (s.forceSnapshot && ep.appliedi != ep.snapi) || (ep.appliedi-ep.snapi > s.Cfg.SnapshotCount) | ||
func (s *EtcdServer) shouldSnapshotToDisk(ep *etcdProgress) bool { | ||
return (s.forceDiskSnapshot && ep.appliedi != ep.diskSnapshotIndex) || (ep.appliedi-ep.diskSnapshotIndex > s.Cfg.SnapshotCount) | ||
} | ||
|
||
func (s *EtcdServer) hasMultipleVotingMembers() bool { | ||
|
@@ -2132,7 +2143,7 @@ func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.Con | |
} | ||
|
||
// TODO: non-blocking snapshot | ||
func (s *EtcdServer) snapshot(snapi uint64, confState raftpb.ConfState) { | ||
func (s *EtcdServer) snapshotToDisk(snapi uint64, confState raftpb.ConfState) { | ||
d := GetMembershipInfoInV2Format(s.Logger(), s.cluster) | ||
// commit kv to write metadata (for example: consistent index) to disk. | ||
// | ||
|
@@ -2169,11 +2180,30 @@ func (s *EtcdServer) snapshot(snapi uint64, confState raftpb.ConfState) { | |
} | ||
|
||
lg.Info( | ||
"saved snapshot", | ||
"saved snapshot to disk", | ||
zap.Uint64("snapshot-index", snap.Metadata.Index), | ||
) | ||
} | ||
|
||
func (s *EtcdServer) snapshotToMemory(snapi uint64, confState raftpb.ConfState) { | ||
d := GetMembershipInfoInV2Format(s.Logger(), s.cluster) | ||
|
||
lg := s.Logger() | ||
|
||
// For backward compatibility, generate v2 snapshot from v3 state. | ||
snap, err := s.r.raftStorage.CreateSnapshot(snapi, &confState, d) | ||
if err != nil { | ||
// the snapshot was done asynchronously with the progress of raft. | ||
// raft might have already got a newer snapshot. | ||
if errorspkg.Is(err, raft.ErrSnapOutOfDate) { | ||
return | ||
} | ||
lg.Panic("failed to create snapshot", zap.Error(err)) | ||
} | ||
|
||
verifyConsistentIndexIsLatest(lg, snap, s.consistIndex.ConsistentIndex()) | ||
} | ||
Comment on lines
+2188
to
+2205
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You do not persist the snapshot to disk. Is it intentionally? You will still have the same issue as #18588? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, please read the description of the PR for context. |
||
|
||
func (s *EtcdServer) compactRaftLog(snapi uint64) { | ||
lg := s.Logger() | ||
|
||
|
@@ -2188,11 +2218,10 @@ func (s *EtcdServer) compactRaftLog(snapi uint64) { | |
} | ||
|
||
// keep some in memory log entries for slow followers. | ||
compacti := uint64(1) | ||
if snapi > s.Cfg.SnapshotCatchUpEntries { | ||
compacti = snapi - s.Cfg.SnapshotCatchUpEntries | ||
if snapi <= s.Cfg.SnapshotCatchUpEntries { | ||
return | ||
} | ||
|
||
Comment on lines
-2191
to
-2195
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems to be unrelated to this PR, can we do it in a separate small PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, compactLog was causing some issue with when called too frequently. Need to double check. |
||
compacti := snapi - s.Cfg.SnapshotCatchUpEntries | ||
err := s.r.raftStorage.Compact(compacti) | ||
if err != nil { | ||
// the compaction was done asynchronously with the progress of raft. | ||
|
@@ -2202,10 +2231,6 @@ func (s *EtcdServer) compactRaftLog(snapi uint64) { | |
} | ||
lg.Panic("failed to compact", zap.Error(err)) | ||
} | ||
lg.Info( | ||
"compacted Raft logs", | ||
zap.Uint64("compact-index", compacti), | ||
) | ||
Comment on lines
-2205
to
-2208
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why remove this log? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now happens too frequently and spams logs |
||
} | ||
|
||
// CutPeer drops messages to the specified peer. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why remove this log ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now happens too frequently and spams logs