Metadata garbage collection

Marks and sweeps unreferenced objects.
Add snapshot cleanup to metadata.
Add content garbage collection

Add dirty flags for snapshotters and content store which
are set on deletion and used during the next garbage collection.
Cleanup content store backend when content metadata is removed.

Signed-off-by: Derek McGowan <derek@mcgstyle.net>
This commit is contained in:
Derek McGowan
2017-10-02 17:44:35 -07:00
parent 7884707c2f
commit 17471d5592
8 changed files with 1781 additions and 30 deletions

128
gc/gc.go
View File

@@ -5,8 +5,25 @@
// under certain use cases.
package gc
import (
"context"
"sync"
)
// Resourcetype represents type of resource at a node
type ResourceType uint8
// Node presents a resource which has a type and key,
// this node can be used to lookup other nodes.
type Node struct {
Type ResourceType
Namespace string
Key string
}
// Tricolor implements basic, single-thread tri-color GC. Given the roots, the
// complete set and a refs function, this returns the unreachable objects.
// complete set and a refs function, this function returns a map of all
// reachable objects.
//
// Correct usage requires that the caller not allow the arguments to change
// until the result is used to delete objects in the system.
@@ -15,11 +32,11 @@ package gc
//
// We can probably use this to inform a design for incremental GC by injecting
// callbacks to the set modification algorithms.
func Tricolor(roots []string, all []string, refs func(ref string) []string) []string {
func Tricolor(roots []Node, refs func(ref Node) ([]Node, error)) (map[Node]struct{}, error) {
var (
grays []string // maintain a gray "stack"
seen = map[string]struct{}{} // or not "white", basically "seen"
reachable = map[string]struct{}{} // or "block", in tri-color parlance
grays []Node // maintain a gray "stack"
seen = map[Node]struct{}{} // or not "white", basically "seen"
reachable = map[Node]struct{}{} // or "block", in tri-color parlance
)
grays = append(grays, roots...)
@@ -29,9 +46,13 @@ func Tricolor(roots []string, all []string, refs func(ref string) []string) []st
id := grays[len(grays)-1] // effectively "depth first" because first element
grays = grays[:len(grays)-1]
seen[id] = struct{}{} // post-mark this as not-white
rs, err := refs(id)
if err != nil {
return nil, err
}
// mark all the referenced objects as gray
for _, target := range refs(id) {
for _, target := range rs {
if _, ok := seen[target]; !ok {
grays = append(grays, target)
}
@@ -41,14 +62,99 @@ func Tricolor(roots []string, all []string, refs func(ref string) []string) []st
reachable[id] = struct{}{}
}
return reachable, nil
}
// ConcurrentMark implements simple, concurrent GC. All the roots are scanned
// and the complete set of references is formed by calling the refs function
// for each seen object. This function returns a map of all object reachable
// from a root.
//
// Correct usage requires that the caller not allow the arguments to change
// until the result is used to delete objects in the system.
//
// It will allocate memory proportional to the size of the reachable set.
func ConcurrentMark(ctx context.Context, root <-chan Node, refs func(context.Context, Node, func(Node)) error) (map[Node]struct{}, error) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var (
grays = make(chan Node)
seen = map[Node]struct{}{} // or not "white", basically "seen"
wg sync.WaitGroup
errOnce sync.Once
refErr error
)
go func() {
for gray := range grays {
if _, ok := seen[gray]; ok {
wg.Done()
continue
}
seen[gray] = struct{}{} // post-mark this as non-white
go func(gray Node) {
defer wg.Done()
send := func(n Node) {
wg.Add(1)
select {
case grays <- n:
case <-ctx.Done():
wg.Done()
}
}
if err := refs(ctx, gray, send); err != nil {
errOnce.Do(func() {
refErr = err
cancel()
})
}
}(gray)
}
}()
for r := range root {
wg.Add(1)
select {
case grays <- r:
case <-ctx.Done():
wg.Done()
}
}
// Wait for outstanding grays to be processed
wg.Wait()
close(grays)
if refErr != nil {
return nil, refErr
}
if cErr := ctx.Err(); cErr != nil {
return nil, cErr
}
return seen, nil
}
// Sweep removes all nodes returned through the channel which are not in
// the reachable set by calling the provided remove function.
func Sweep(reachable map[Node]struct{}, all <-chan Node, remove func(Node) error) error {
// All black objects are now reachable, and all white objects are
// unreachable. Free those that are white!
var whites []string
for _, obj := range all {
if _, ok := reachable[obj]; !ok {
whites = append(whites, obj)
for node := range all {
if _, ok := reachable[node]; !ok {
if err := remove(node); err != nil {
return err
}
}
}
return whites
return nil
}

View File

@@ -1,30 +1,154 @@
package gc
import (
"context"
"reflect"
"testing"
)
func TestTricolorBasic(t *testing.T) {
roots := []string{"A", "C"}
all := []string{"A", "B", "C", "D", "E", "F", "G"}
all := []string{"A", "B", "C", "D", "E", "F", "G", "H"}
refs := map[string][]string{
"A": {"B"},
"B": {"A"},
"C": {"D", "F", "B"},
"E": {"F", "G"},
"F": {"H"},
}
unreachable := Tricolor(roots, all, lookup(refs))
expected := []string{"E", "G"}
expected := toNodes([]string{"A", "B", "C", "D", "F", "H"})
if !reflect.DeepEqual(unreachable, expected) {
t.Fatalf("incorrect unreachable set: %v != %v", unreachable, expected)
reachable, err := Tricolor(toNodes(roots), lookup(refs))
if err != nil {
t.Fatal(err)
}
var sweeped []Node
for _, a := range toNodes(all) {
if _, ok := reachable[a]; ok {
sweeped = append(sweeped, a)
}
}
if !reflect.DeepEqual(sweeped, expected) {
t.Fatalf("incorrect unreachable set: %v != %v", sweeped, expected)
}
}
func lookup(refs map[string][]string) func(id string) []string {
return func(ref string) []string {
return refs[ref]
func TestConcurrentBasic(t *testing.T) {
roots := []string{"A", "C"}
all := []string{"A", "B", "C", "D", "E", "F", "G", "H", "I"}
refs := map[string][]string{
"A": {"B"},
"B": {"A"},
"C": {"D", "F", "B"},
"E": {"F", "G"},
"F": {"H"},
"G": {"I"},
}
expected := toNodes([]string{"A", "B", "C", "D", "F", "H"})
ctx := context.Background()
rootC := make(chan Node)
go func() {
writeNodes(ctx, rootC, toNodes(roots))
close(rootC)
}()
reachable, err := ConcurrentMark(ctx, rootC, lookupc(refs))
if err != nil {
t.Fatal(err)
}
var sweeped []Node
for _, a := range toNodes(all) {
if _, ok := reachable[a]; ok {
sweeped = append(sweeped, a)
}
}
if !reflect.DeepEqual(sweeped, expected) {
t.Fatalf("incorrect unreachable set: %v != %v", sweeped, expected)
}
}
func writeNodes(ctx context.Context, nc chan<- Node, nodes []Node) {
for _, n := range nodes {
select {
case nc <- n:
case <-ctx.Done():
return
}
}
}
func lookup(refs map[string][]string) func(id Node) ([]Node, error) {
return func(ref Node) ([]Node, error) {
return toNodes(refs[ref.Key]), nil
}
}
func lookupc(refs map[string][]string) func(context.Context, Node, func(Node)) error {
return func(ctx context.Context, ref Node, fn func(Node)) error {
for _, n := range toNodes(refs[ref.Key]) {
fn(n)
}
return nil
}
}
func toNodes(s []string) []Node {
n := make([]Node, len(s))
for i := range s {
n[i] = Node{
Key: s[i],
}
}
return n
}
func newScanner(refs []string) *stringScanner {
return &stringScanner{
i: -1,
s: refs,
}
}
type stringScanner struct {
i int
s []string
}
func (ss *stringScanner) Next() bool {
ss.i++
return ss.i < len(ss.s)
}
func (ss *stringScanner) Node() Node {
return Node{
Key: ss.s[ss.i],
}
}
func (ss *stringScanner) Cleanup() error {
ss.s[ss.i] = ""
return nil
}
func (ss *stringScanner) Err() error {
return nil
}
func (ss *stringScanner) All() []Node {
remaining := make([]Node, 0, len(ss.s))
for _, s := range ss.s {
if s != "" {
remaining = append(remaining, Node{
Key: s,
})
}
}
return remaining
}