diff options
author | hwajeong.son <hwajeong.son@samsung.com> | 2018-08-20 13:30:55 +0900 |
---|---|---|
committer | hwajeong.son <hwajeong.son@samsung.com> | 2018-08-20 13:30:55 +0900 |
commit | 0b51891e5977b87f986f4db2cbbe09295cfdbedc (patch) | |
tree | c35ac732cb1dffccee5a32131431f753481077c2 /vendor/github.com/coreos | |
parent | eea0e89806b2cf59af3dccabc67014bd19b91b82 (diff) | |
download | docker-engine-master.tar.gz docker-engine-master.tar.bz2 docker-engine-master.zip |
Tizen_4.0 baseHEADsubmit/trunk/20180820.043221master
Signed-off-by: hwajeong.son <hwajeong.son@samsung.com>
Diffstat (limited to 'vendor/github.com/coreos')
95 files changed, 14247 insertions, 0 deletions
diff --git a/vendor/github.com/coreos/etcd/LICENSE b/vendor/github.com/coreos/etcd/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/vendor/github.com/coreos/etcd/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/coreos/etcd/NOTICE b/vendor/github.com/coreos/etcd/NOTICE new file mode 100644 index 0000000..b39ddfa --- /dev/null +++ b/vendor/github.com/coreos/etcd/NOTICE @@ -0,0 +1,5 @@ +CoreOS Project +Copyright 2014 CoreOS, Inc + +This product includes software developed at CoreOS, Inc. +(http://www.coreos.com/). diff --git a/vendor/github.com/coreos/etcd/README.md b/vendor/github.com/coreos/etcd/README.md new file mode 100644 index 0000000..e7d4e23 --- /dev/null +++ b/vendor/github.com/coreos/etcd/README.md @@ -0,0 +1,139 @@ +# etcd + +[![Go Report Card](https://goreportcard.com/badge/github.com/coreos/etcd)](https://goreportcard.com/report/github.com/coreos/etcd) +[![Build Status](https://travis-ci.org/coreos/etcd.svg?branch=master)](https://travis-ci.org/coreos/etcd) +[![Build Status](https://semaphoreci.com/api/v1/coreos/etcd/branches/master/shields_badge.svg)](https://semaphoreci.com/coreos/etcd) +[![Docker Repository on Quay.io](https://quay.io/repository/coreos/etcd-git/status "Docker Repository on Quay.io")](https://quay.io/repository/coreos/etcd-git) + +**Note**: The `master` branch may be in an *unstable or even broken state* during development. Please use [releases][github-release] instead of the `master` branch in order to get stable binaries. + +*the etcd v2 [documentation](Documentation/v2/README.md) has moved* + +![etcd Logo](logos/etcd-horizontal-color.png) + +etcd is a distributed, consistent key-value store for shared configuration and service discovery, with a focus on being: + +* *Simple*: well-defined, user-facing API (gRPC) +* *Secure*: automatic TLS with optional client cert authentication +* *Fast*: benchmarked 10,000 writes/sec +* *Reliable*: properly distributed using Raft + +etcd is written in Go and uses the [Raft][raft] consensus algorithm to manage a highly-available replicated log. + +etcd is used [in production by many companies](./Documentation/production-users.md), and the development team stands behind it in critical deployment scenarios, where etcd is frequently teamed with applications such as [Kubernetes][k8s], [fleet][fleet], [locksmith][locksmith], [vulcand][vulcand], [Doorman][doorman], and many others. Reliability is further ensured by rigorous [testing][etcd-tests]. + +See [etcdctl][etcdctl] for a simple command line client. + +[raft]: https://raft.github.io/ +[k8s]: http://kubernetes.io/ +[doorman]: https://github.com/youtube/doorman +[fleet]: https://github.com/coreos/fleet +[locksmith]: https://github.com/coreos/locksmith +[vulcand]: https://github.com/vulcand/vulcand +[etcdctl]: https://github.com/coreos/etcd/tree/master/etcdctl +[etcd-tests]: http://dash.etcd.io + +## Getting started + +### Getting etcd + +The easiest way to get etcd is to use one of the pre-built release binaries which are available for OSX, Linux, Windows, AppC (ACI), and Docker. Instructions for using these binaries are on the [GitHub releases page][github-release]. + +For those wanting to try the very latest version, you can [build the latest version of etcd][dl-build] from the `master` branch. +You will first need [*Go*](https://golang.org/) installed on your machine (version 1.6+ is required). +All development occurs on `master`, including new features and bug fixes. +Bug fixes are first targeted at `master` and subsequently ported to release branches, as described in the [branch management][branch-management] guide. + +[github-release]: https://github.com/coreos/etcd/releases/ +[branch-management]: ./Documentation/branch_management.md +[dl-build]: ./Documentation/dl_build.md#build-the-latest-version + +### Running etcd + +First start a single-member cluster of etcd: + +```sh +./bin/etcd +``` + +This will bring up etcd listening on port 2379 for client communication and on port 2380 for server-to-server communication. + +Next, let's set a single key, and then retrieve it: + +``` +ETCDCTL_API=3 etcdctl put mykey "this is awesome" +ETCDCTL_API=3 etcdctl get mykey +``` + +That's it! etcd is now running and serving client requests. For more + +- [Animated quick demo][demo-gif] +- [Interactive etcd playground][etcd-play] + +[demo-gif]: ./Documentation/demo.md +[etcd-play]: http://play.etcd.io/ + +### etcd TCP ports + +The [official etcd ports][iana-ports] are 2379 for client requests, and 2380 for peer communication. + +[iana-ports]: https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml?search=etcd + +### Running a local etcd cluster + +First install [goreman](https://github.com/mattn/goreman), which manages Procfile-based applications. + +Our [Procfile script](./Procfile) will set up a local example cluster. Start it with: + +```sh +goreman start +``` + +This will bring up 3 etcd members `infra1`, `infra2` and `infra3` and etcd proxy `proxy`, which runs locally and composes a cluster. + +Every cluster member and proxy accepts key value reads and key value writes. + +### Running etcd on Kubernetes + +If you want to run etcd cluster on Kubernetes, try [etcd operator](https://github.com/coreos/etcd-operator). + +### Next steps + +Now it's time to dig into the full etcd API and other guides. + +- Read the full [documentation][fulldoc]. +- Explore the full gRPC [API][api]. +- Set up a [multi-machine cluster][clustering]. +- Learn the [config format, env variables and flags][configuration]. +- Find [language bindings and tools][libraries-and-tools]. +- Use TLS to [secure an etcd cluster][security]. +- [Tune etcd][tuning]. + +[fulldoc]: ./Documentation/docs.md +[api]: ./Documentation/dev-guide/api_reference_v3.md +[clustering]: ./Documentation/op-guide/clustering.md +[configuration]: ./Documentation/op-guide/configuration.md +[libraries-and-tools]: ./Documentation/libraries-and-tools.md +[security]: ./Documentation/op-guide/security.md +[tuning]: ./Documentation/tuning.md + +## Contact + +- Mailing list: [etcd-dev](https://groups.google.com/forum/?hl=en#!forum/etcd-dev) +- IRC: #[etcd](irc://irc.freenode.org:6667/#etcd) on freenode.org +- Planning/Roadmap: [milestones](https://github.com/coreos/etcd/milestones), [roadmap](./ROADMAP.md) +- Bugs: [issues](https://github.com/coreos/etcd/issues) + +## Contributing + +See [CONTRIBUTING](CONTRIBUTING.md) for details on submitting patches and the contribution workflow. + +## Reporting bugs + +See [reporting bugs](Documentation/reporting_bugs.md) for details about reporting any issue you may encounter. + +### License + +etcd is under the Apache 2.0 license. See the [LICENSE](LICENSE) file for details. + + diff --git a/vendor/github.com/coreos/etcd/pkg/README.md b/vendor/github.com/coreos/etcd/pkg/README.md new file mode 100644 index 0000000..d7de4d3 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/README.md @@ -0,0 +1,2 @@ +pkg/ is a collection of utility packages used by etcd without being specific to etcd itself. A package belongs here +only if it could possibly be moved out into its own repository in the future. diff --git a/vendor/github.com/coreos/etcd/pkg/crc/crc.go b/vendor/github.com/coreos/etcd/pkg/crc/crc.go new file mode 100644 index 0000000..4b998a4 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/crc/crc.go @@ -0,0 +1,43 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package crc provides utility function for cyclic redundancy check +// algorithms. +package crc + +import ( + "hash" + "hash/crc32" +) + +// The size of a CRC-32 checksum in bytes. +const Size = 4 + +type digest struct { + crc uint32 + tab *crc32.Table +} + +// New creates a new hash.Hash32 computing the CRC-32 checksum +// using the polynomial represented by the Table. +// Modified by xiangli to take a prevcrc. +func New(prev uint32, tab *crc32.Table) hash.Hash32 { return &digest{prev, tab} } + +func (d *digest) Size() int { return Size } + +func (d *digest) BlockSize() int { return 1 } + +func (d *digest) Reset() { d.crc = 0 } + +func (d *digest) Write(p []byte) (n int, err error) { + d.crc = crc32.Update(d.crc, d.tab, p) + return len(p), nil +} + +func (d *digest) Sum32() uint32 { return d.crc } + +func (d *digest) Sum(in []byte) []byte { + s := d.Sum32() + return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s)) +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/dir_unix.go b/vendor/github.com/coreos/etcd/pkg/fileutil/dir_unix.go new file mode 100644 index 0000000..58a77df --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/dir_unix.go @@ -0,0 +1,22 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !windows + +package fileutil + +import "os" + +// OpenDir opens a directory for syncing. +func OpenDir(path string) (*os.File, error) { return os.Open(path) } diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/dir_windows.go b/vendor/github.com/coreos/etcd/pkg/fileutil/dir_windows.go new file mode 100644 index 0000000..c123395 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/dir_windows.go @@ -0,0 +1,46 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build windows + +package fileutil + +import ( + "os" + "syscall" +) + +// OpenDir opens a directory in windows with write access for syncing. +func OpenDir(path string) (*os.File, error) { + fd, err := openDir(path) + if err != nil { + return nil, err + } + return os.NewFile(uintptr(fd), path), nil +} + +func openDir(path string) (fd syscall.Handle, err error) { + if len(path) == 0 { + return syscall.InvalidHandle, syscall.ERROR_FILE_NOT_FOUND + } + pathp, err := syscall.UTF16PtrFromString(path) + if err != nil { + return syscall.InvalidHandle, err + } + access := uint32(syscall.GENERIC_READ | syscall.GENERIC_WRITE) + sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE) + createmode := uint32(syscall.OPEN_EXISTING) + fl := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS) + return syscall.CreateFile(pathp, access, sharemode, nil, createmode, fl, 0) +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/fileutil.go b/vendor/github.com/coreos/etcd/pkg/fileutil/fileutil.go new file mode 100644 index 0000000..9585ed5 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/fileutil.go @@ -0,0 +1,121 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fileutil implements utility functions related to files and paths. +package fileutil + +import ( + "fmt" + "io/ioutil" + "os" + "path" + "sort" + + "github.com/coreos/pkg/capnslog" +) + +const ( + // PrivateFileMode grants owner to read/write a file. + PrivateFileMode = 0600 + // PrivateDirMode grants owner to make/remove files inside the directory. + PrivateDirMode = 0700 +) + +var ( + plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "pkg/fileutil") +) + +// IsDirWriteable checks if dir is writable by writing and removing a file +// to dir. It returns nil if dir is writable. +func IsDirWriteable(dir string) error { + f := path.Join(dir, ".touch") + if err := ioutil.WriteFile(f, []byte(""), PrivateFileMode); err != nil { + return err + } + return os.Remove(f) +} + +// ReadDir returns the filenames in the given directory in sorted order. +func ReadDir(dirpath string) ([]string, error) { + dir, err := os.Open(dirpath) + if err != nil { + return nil, err + } + defer dir.Close() + names, err := dir.Readdirnames(-1) + if err != nil { + return nil, err + } + sort.Strings(names) + return names, nil +} + +// TouchDirAll is similar to os.MkdirAll. It creates directories with 0700 permission if any directory +// does not exists. TouchDirAll also ensures the given directory is writable. +func TouchDirAll(dir string) error { + // If path is already a directory, MkdirAll does nothing + // and returns nil. + err := os.MkdirAll(dir, PrivateDirMode) + if err != nil { + // if mkdirAll("a/text") and "text" is not + // a directory, this will return syscall.ENOTDIR + return err + } + return IsDirWriteable(dir) +} + +// CreateDirAll is similar to TouchDirAll but returns error +// if the deepest directory was not empty. +func CreateDirAll(dir string) error { + err := TouchDirAll(dir) + if err == nil { + var ns []string + ns, err = ReadDir(dir) + if err != nil { + return err + } + if len(ns) != 0 { + err = fmt.Errorf("expected %q to be empty, got %q", dir, ns) + } + } + return err +} + +func Exist(name string) bool { + _, err := os.Stat(name) + return err == nil +} + +// ZeroToEnd zeros a file starting from SEEK_CUR to its SEEK_END. May temporarily +// shorten the length of the file. +func ZeroToEnd(f *os.File) error { + // TODO: support FALLOC_FL_ZERO_RANGE + off, err := f.Seek(0, os.SEEK_CUR) + if err != nil { + return err + } + lenf, lerr := f.Seek(0, os.SEEK_END) + if lerr != nil { + return lerr + } + if err = f.Truncate(off); err != nil { + return err + } + // make sure blocks remain allocated + if err = Preallocate(f, lenf, true); err != nil { + return err + } + _, err = f.Seek(off, os.SEEK_SET) + return err +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/lock.go b/vendor/github.com/coreos/etcd/pkg/fileutil/lock.go new file mode 100644 index 0000000..338627f --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/lock.go @@ -0,0 +1,26 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fileutil + +import ( + "errors" + "os" +) + +var ( + ErrLocked = errors.New("fileutil: file already locked") +) + +type LockedFile struct{ *os.File } diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/lock_flock.go b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_flock.go new file mode 100644 index 0000000..542550b --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_flock.go @@ -0,0 +1,49 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !windows,!plan9,!solaris + +package fileutil + +import ( + "os" + "syscall" +) + +func flockTryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + f, err := os.OpenFile(path, flag, perm) + if err != nil { + return nil, err + } + if err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { + f.Close() + if err == syscall.EWOULDBLOCK { + err = ErrLocked + } + return nil, err + } + return &LockedFile{f}, nil +} + +func flockLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + f, err := os.OpenFile(path, flag, perm) + if err != nil { + return nil, err + } + if err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX); err != nil { + f.Close() + return nil, err + } + return &LockedFile{f}, err +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/lock_linux.go b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_linux.go new file mode 100644 index 0000000..dec25a1 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_linux.go @@ -0,0 +1,96 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package fileutil + +import ( + "os" + "syscall" +) + +// This used to call syscall.Flock() but that call fails with EBADF on NFS. +// An alternative is lockf() which works on NFS but that call lets a process lock +// the same file twice. Instead, use Linux's non-standard open file descriptor +// locks which will block if the process already holds the file lock. +// +// constants from /usr/include/bits/fcntl-linux.h +const ( + F_OFD_GETLK = 37 + F_OFD_SETLK = 37 + F_OFD_SETLKW = 38 +) + +var ( + wrlck = syscall.Flock_t{ + Type: syscall.F_WRLCK, + Whence: int16(os.SEEK_SET), + Start: 0, + Len: 0, + } + + linuxTryLockFile = flockTryLockFile + linuxLockFile = flockLockFile +) + +func init() { + // use open file descriptor locks if the system supports it + getlk := syscall.Flock_t{Type: syscall.F_RDLCK} + if err := syscall.FcntlFlock(0, F_OFD_GETLK, &getlk); err == nil { + linuxTryLockFile = ofdTryLockFile + linuxLockFile = ofdLockFile + } +} + +func TryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + return linuxTryLockFile(path, flag, perm) +} + +func ofdTryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + f, err := os.OpenFile(path, flag, perm) + if err != nil { + return nil, err + } + + flock := wrlck + if err = syscall.FcntlFlock(f.Fd(), F_OFD_SETLK, &flock); err != nil { + f.Close() + if err == syscall.EWOULDBLOCK { + err = ErrLocked + } + return nil, err + } + return &LockedFile{f}, nil +} + +func LockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + return linuxLockFile(path, flag, perm) +} + +func ofdLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + f, err := os.OpenFile(path, flag, perm) + if err != nil { + return nil, err + } + + flock := wrlck + err = syscall.FcntlFlock(f.Fd(), F_OFD_SETLKW, &flock) + + if err != nil { + f.Close() + return nil, err + } + return &LockedFile{f}, err +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/lock_plan9.go b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_plan9.go new file mode 100644 index 0000000..fee6a7c --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_plan9.go @@ -0,0 +1,45 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fileutil + +import ( + "os" + "syscall" + "time" +) + +func TryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + if err := os.Chmod(path, syscall.DMEXCL|PrivateFileMode); err != nil { + return nil, err + } + f, err := os.Open(path, flag, perm) + if err != nil { + return nil, ErrLocked + } + return &LockedFile{f}, nil +} + +func LockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + if err := os.Chmod(path, syscall.DMEXCL|PrivateFileMode); err != nil { + return nil, err + } + for { + f, err := os.OpenFile(path, flag, perm) + if err == nil { + return &LockedFile{f}, nil + } + time.Sleep(10 * time.Millisecond) + } +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/lock_solaris.go b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_solaris.go new file mode 100644 index 0000000..352ca55 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_solaris.go @@ -0,0 +1,62 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build solaris + +package fileutil + +import ( + "os" + "syscall" +) + +func TryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + var lock syscall.Flock_t + lock.Start = 0 + lock.Len = 0 + lock.Pid = 0 + lock.Type = syscall.F_WRLCK + lock.Whence = 0 + lock.Pid = 0 + f, err := os.OpenFile(path, flag, perm) + if err != nil { + return nil, err + } + if err := syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &lock); err != nil { + f.Close() + if err == syscall.EAGAIN { + err = ErrLocked + } + return nil, err + } + return &LockedFile{f}, nil +} + +func LockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + var lock syscall.Flock_t + lock.Start = 0 + lock.Len = 0 + lock.Pid = 0 + lock.Type = syscall.F_WRLCK + lock.Whence = 0 + f, err := os.OpenFile(path, flag, perm) + if err != nil { + return nil, err + } + if err = syscall.FcntlFlock(f.Fd(), syscall.F_SETLKW, &lock); err != nil { + f.Close() + return nil, err + } + return &LockedFile{f}, nil +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/lock_unix.go b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_unix.go new file mode 100644 index 0000000..ed01164 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_unix.go @@ -0,0 +1,29 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !windows,!plan9,!solaris,!linux + +package fileutil + +import ( + "os" +) + +func TryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + return flockTryLockFile(path, flag, perm) +} + +func LockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + return flockLockFile(path, flag, perm) +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/lock_windows.go b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_windows.go new file mode 100644 index 0000000..8698f4a --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/lock_windows.go @@ -0,0 +1,125 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build windows + +package fileutil + +import ( + "errors" + "fmt" + "os" + "syscall" + "unsafe" +) + +var ( + modkernel32 = syscall.NewLazyDLL("kernel32.dll") + procLockFileEx = modkernel32.NewProc("LockFileEx") + + errLocked = errors.New("The process cannot access the file because another process has locked a portion of the file.") +) + +const ( + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx + LOCKFILE_EXCLUSIVE_LOCK = 2 + LOCKFILE_FAIL_IMMEDIATELY = 1 + + // see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx + errLockViolation syscall.Errno = 0x21 +) + +func TryLockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + f, err := open(path, flag, perm) + if err != nil { + return nil, err + } + if err := lockFile(syscall.Handle(f.Fd()), LOCKFILE_FAIL_IMMEDIATELY); err != nil { + f.Close() + return nil, err + } + return &LockedFile{f}, nil +} + +func LockFile(path string, flag int, perm os.FileMode) (*LockedFile, error) { + f, err := open(path, flag, perm) + if err != nil { + return nil, err + } + if err := lockFile(syscall.Handle(f.Fd()), 0); err != nil { + f.Close() + return nil, err + } + return &LockedFile{f}, nil +} + +func open(path string, flag int, perm os.FileMode) (*os.File, error) { + if path == "" { + return nil, fmt.Errorf("cannot open empty filename") + } + var access uint32 + switch flag { + case syscall.O_RDONLY: + access = syscall.GENERIC_READ + case syscall.O_WRONLY: + access = syscall.GENERIC_WRITE + case syscall.O_RDWR: + access = syscall.GENERIC_READ | syscall.GENERIC_WRITE + case syscall.O_WRONLY | syscall.O_CREAT: + access = syscall.GENERIC_ALL + default: + panic(fmt.Errorf("flag %v is not supported", flag)) + } + fd, err := syscall.CreateFile(&(syscall.StringToUTF16(path)[0]), + access, + syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE, + nil, + syscall.OPEN_ALWAYS, + syscall.FILE_ATTRIBUTE_NORMAL, + 0) + if err != nil { + return nil, err + } + return os.NewFile(uintptr(fd), path), nil +} + +func lockFile(fd syscall.Handle, flags uint32) error { + var flag uint32 = LOCKFILE_EXCLUSIVE_LOCK + flag |= flags + if fd == syscall.InvalidHandle { + return nil + } + err := lockFileEx(fd, flag, 1, 0, &syscall.Overlapped{}) + if err == nil { + return nil + } else if err.Error() == errLocked.Error() { + return ErrLocked + } else if err != errLockViolation { + return err + } + return nil +} + +func lockFileEx(h syscall.Handle, flags, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) { + var reserved uint32 = 0 + r1, _, e1 := syscall.Syscall6(procLockFileEx.Addr(), 6, uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol))) + if r1 == 0 { + if e1 != 0 { + err = error(e1) + } else { + err = syscall.EINVAL + } + } + return +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate.go b/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate.go new file mode 100644 index 0000000..bb7f028 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate.go @@ -0,0 +1,47 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fileutil + +import "os" + +// Preallocate tries to allocate the space for given +// file. This operation is only supported on linux by a +// few filesystems (btrfs, ext4, etc.). +// If the operation is unsupported, no error will be returned. +// Otherwise, the error encountered will be returned. +func Preallocate(f *os.File, sizeInBytes int64, extendFile bool) error { + if extendFile { + return preallocExtend(f, sizeInBytes) + } + return preallocFixed(f, sizeInBytes) +} + +func preallocExtendTrunc(f *os.File, sizeInBytes int64) error { + curOff, err := f.Seek(0, os.SEEK_CUR) + if err != nil { + return err + } + size, err := f.Seek(sizeInBytes, os.SEEK_END) + if err != nil { + return err + } + if _, err = f.Seek(curOff, os.SEEK_SET); err != nil { + return err + } + if sizeInBytes > size { + return nil + } + return f.Truncate(sizeInBytes) +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_darwin.go b/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_darwin.go new file mode 100644 index 0000000..1ed09c5 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_darwin.go @@ -0,0 +1,43 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build darwin + +package fileutil + +import ( + "os" + "syscall" + "unsafe" +) + +func preallocExtend(f *os.File, sizeInBytes int64) error { + if err := preallocFixed(f, sizeInBytes); err != nil { + return err + } + return preallocExtendTrunc(f, sizeInBytes) +} + +func preallocFixed(f *os.File, sizeInBytes int64) error { + fstore := &syscall.Fstore_t{ + Flags: syscall.F_ALLOCATEALL, + Posmode: syscall.F_PEOFPOSMODE, + Length: sizeInBytes} + p := unsafe.Pointer(fstore) + _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, f.Fd(), uintptr(syscall.F_PREALLOCATE), uintptr(p)) + if errno == 0 || errno == syscall.ENOTSUP { + return nil + } + return errno +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_unix.go b/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_unix.go new file mode 100644 index 0000000..50bd84f --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_unix.go @@ -0,0 +1,49 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package fileutil + +import ( + "os" + "syscall" +) + +func preallocExtend(f *os.File, sizeInBytes int64) error { + // use mode = 0 to change size + err := syscall.Fallocate(int(f.Fd()), 0, 0, sizeInBytes) + if err != nil { + errno, ok := err.(syscall.Errno) + // not supported; fallback + // fallocate EINTRs frequently in some environments; fallback + if ok && (errno == syscall.ENOTSUP || errno == syscall.EINTR) { + return preallocExtendTrunc(f, sizeInBytes) + } + } + return err +} + +func preallocFixed(f *os.File, sizeInBytes int64) error { + // use mode = 1 to keep size; see FALLOC_FL_KEEP_SIZE + err := syscall.Fallocate(int(f.Fd()), 1, 0, sizeInBytes) + if err != nil { + errno, ok := err.(syscall.Errno) + // treat not supported as nil error + if ok && errno == syscall.ENOTSUP { + return nil + } + } + return err +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_unsupported.go b/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_unsupported.go new file mode 100644 index 0000000..162fbc5 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/preallocate_unsupported.go @@ -0,0 +1,25 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !linux,!darwin + +package fileutil + +import "os" + +func preallocExtend(f *os.File, sizeInBytes int64) error { + return preallocExtendTrunc(f, sizeInBytes) +} + +func preallocFixed(f *os.File, sizeInBytes int64) error { return nil } diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/purge.go b/vendor/github.com/coreos/etcd/pkg/fileutil/purge.go new file mode 100644 index 0000000..53bda0c --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/purge.go @@ -0,0 +1,78 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fileutil + +import ( + "os" + "path" + "sort" + "strings" + "time" +) + +func PurgeFile(dirname string, suffix string, max uint, interval time.Duration, stop <-chan struct{}) <-chan error { + return purgeFile(dirname, suffix, max, interval, stop, nil) +} + +// purgeFile is the internal implementation for PurgeFile which can post purged files to purgec if non-nil. +func purgeFile(dirname string, suffix string, max uint, interval time.Duration, stop <-chan struct{}, purgec chan<- string) <-chan error { + errC := make(chan error, 1) + go func() { + for { + fnames, err := ReadDir(dirname) + if err != nil { + errC <- err + return + } + newfnames := make([]string, 0) + for _, fname := range fnames { + if strings.HasSuffix(fname, suffix) { + newfnames = append(newfnames, fname) + } + } + sort.Strings(newfnames) + fnames = newfnames + for len(newfnames) > int(max) { + f := path.Join(dirname, newfnames[0]) + l, err := TryLockFile(f, os.O_WRONLY, PrivateFileMode) + if err != nil { + break + } + if err = os.Remove(f); err != nil { + errC <- err + return + } + if err = l.Close(); err != nil { + plog.Errorf("error unlocking %s when purging file (%v)", l.Name(), err) + errC <- err + return + } + plog.Infof("purged file %s successfully", f) + newfnames = newfnames[1:] + } + if purgec != nil { + for i := 0; i < len(fnames)-len(newfnames); i++ { + purgec <- fnames[i] + } + } + select { + case <-time.After(interval): + case <-stop: + return + } + } + }() + return errC +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/sync.go b/vendor/github.com/coreos/etcd/pkg/fileutil/sync.go new file mode 100644 index 0000000..54dd41f --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/sync.go @@ -0,0 +1,29 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !linux,!darwin + +package fileutil + +import "os" + +// Fsync is a wrapper around file.Sync(). Special handling is needed on darwin platform. +func Fsync(f *os.File) error { + return f.Sync() +} + +// Fdatasync is a wrapper around file.Sync(). Special handling is needed on linux platform. +func Fdatasync(f *os.File) error { + return f.Sync() +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/sync_darwin.go b/vendor/github.com/coreos/etcd/pkg/fileutil/sync_darwin.go new file mode 100644 index 0000000..c2f39bf --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/sync_darwin.go @@ -0,0 +1,40 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build darwin + +package fileutil + +import ( + "os" + "syscall" +) + +// Fsync on HFS/OSX flushes the data on to the physical drive but the drive +// may not write it to the persistent media for quite sometime and it may be +// written in out-of-order sequence. Using F_FULLFSYNC ensures that the +// physical drive's buffer will also get flushed to the media. +func Fsync(f *os.File) error { + _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, f.Fd(), uintptr(syscall.F_FULLFSYNC), uintptr(0)) + if errno == 0 { + return nil + } + return errno +} + +// Fdatasync on darwin platform invokes fcntl(F_FULLFSYNC) for actual persistence +// on physical drive media. +func Fdatasync(f *os.File) error { + return Fsync(f) +} diff --git a/vendor/github.com/coreos/etcd/pkg/fileutil/sync_linux.go b/vendor/github.com/coreos/etcd/pkg/fileutil/sync_linux.go new file mode 100644 index 0000000..1bbced9 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/fileutil/sync_linux.go @@ -0,0 +1,34 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package fileutil + +import ( + "os" + "syscall" +) + +// Fsync is a wrapper around file.Sync(). Special handling is needed on darwin platform. +func Fsync(f *os.File) error { + return f.Sync() +} + +// Fdatasync is similar to fsync(), but does not flush modified metadata +// unless that metadata is needed in order to allow a subsequent data retrieval +// to be correctly handled. +func Fdatasync(f *os.File) error { + return syscall.Fdatasync(int(f.Fd())) +} diff --git a/vendor/github.com/coreos/etcd/pkg/idutil/id.go b/vendor/github.com/coreos/etcd/pkg/idutil/id.go new file mode 100644 index 0000000..931beb2 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/idutil/id.go @@ -0,0 +1,78 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package idutil implements utility functions for generating unique, +// randomized ids. +package idutil + +import ( + "math" + "sync" + "time" +) + +const ( + tsLen = 5 * 8 + cntLen = 8 + suffixLen = tsLen + cntLen +) + +// Generator generates unique identifiers based on counters, timestamps, and +// a node member ID. +// +// The initial id is in this format: +// High order byte is memberID, next 5 bytes are from timestamp, +// and low order 2 bytes are 0s. +// | prefix | suffix | +// | 2 bytes | 5 bytes | 1 byte | +// | memberID | timestamp | cnt | +// +// The timestamp 5 bytes is different when the machine is restart +// after 1 ms and before 35 years. +// +// It increases suffix to generate the next id. +// The count field may overflow to timestamp field, which is intentional. +// It helps to extend the event window to 2^56. This doesn't break that +// id generated after restart is unique because etcd throughput is << +// 256req/ms(250k reqs/second). +type Generator struct { + mu sync.Mutex + // high order 2 bytes + prefix uint64 + // low order 6 bytes + suffix uint64 +} + +func NewGenerator(memberID uint16, now time.Time) *Generator { + prefix := uint64(memberID) << suffixLen + unixMilli := uint64(now.UnixNano()) / uint64(time.Millisecond/time.Nanosecond) + suffix := lowbit(unixMilli, tsLen) << cntLen + return &Generator{ + prefix: prefix, + suffix: suffix, + } +} + +// Next generates a id that is unique. +func (g *Generator) Next() uint64 { + g.mu.Lock() + defer g.mu.Unlock() + g.suffix++ + id := g.prefix | lowbit(g.suffix, suffixLen) + return id +} + +func lowbit(x uint64, n uint) uint64 { + return x & (math.MaxUint64 >> (64 - n)) +} diff --git a/vendor/github.com/coreos/etcd/pkg/ioutil/pagewriter.go b/vendor/github.com/coreos/etcd/pkg/ioutil/pagewriter.go new file mode 100644 index 0000000..72de159 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/ioutil/pagewriter.go @@ -0,0 +1,106 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ioutil + +import ( + "io" +) + +var defaultBufferBytes = 128 * 1024 + +// PageWriter implements the io.Writer interface so that writes will +// either be in page chunks or from flushing. +type PageWriter struct { + w io.Writer + // pageOffset tracks the page offset of the base of the buffer + pageOffset int + // pageBytes is the number of bytes per page + pageBytes int + // bufferedBytes counts the number of bytes pending for write in the buffer + bufferedBytes int + // buf holds the write buffer + buf []byte + // bufWatermarkBytes is the number of bytes the buffer can hold before it needs + // to be flushed. It is less than len(buf) so there is space for slack writes + // to bring the writer to page alignment. + bufWatermarkBytes int +} + +// NewPageWriter creates a new PageWriter. pageBytes is the number of bytes +// to write per page. pageOffset is the starting offset of io.Writer. +func NewPageWriter(w io.Writer, pageBytes, pageOffset int) *PageWriter { + return &PageWriter{ + w: w, + pageOffset: pageOffset, + pageBytes: pageBytes, + buf: make([]byte, defaultBufferBytes+pageBytes), + bufWatermarkBytes: defaultBufferBytes, + } +} + +func (pw *PageWriter) Write(p []byte) (n int, err error) { + if len(p)+pw.bufferedBytes <= pw.bufWatermarkBytes { + // no overflow + copy(pw.buf[pw.bufferedBytes:], p) + pw.bufferedBytes += len(p) + return len(p), nil + } + // complete the slack page in the buffer if unaligned + slack := pw.pageBytes - ((pw.pageOffset + pw.bufferedBytes) % pw.pageBytes) + if slack != pw.pageBytes { + partial := slack > len(p) + if partial { + // not enough data to complete the slack page + slack = len(p) + } + // special case: writing to slack page in buffer + copy(pw.buf[pw.bufferedBytes:], p[:slack]) + pw.bufferedBytes += slack + n = slack + p = p[slack:] + if partial { + // avoid forcing an unaligned flush + return n, nil + } + } + // buffer contents are now page-aligned; clear out + if err = pw.Flush(); err != nil { + return n, err + } + // directly write all complete pages without copying + if len(p) > pw.pageBytes { + pages := len(p) / pw.pageBytes + c, werr := pw.w.Write(p[:pages*pw.pageBytes]) + n += c + if werr != nil { + return n, werr + } + p = p[pages*pw.pageBytes:] + } + // write remaining tail to buffer + c, werr := pw.Write(p) + n += c + return n, werr +} + +func (pw *PageWriter) Flush() error { + if pw.bufferedBytes == 0 { + return nil + } + _, err := pw.w.Write(pw.buf[:pw.bufferedBytes]) + pw.pageOffset = (pw.pageOffset + pw.bufferedBytes) % pw.pageBytes + pw.bufferedBytes = 0 + return err +} diff --git a/vendor/github.com/coreos/etcd/pkg/ioutil/readcloser.go b/vendor/github.com/coreos/etcd/pkg/ioutil/readcloser.go new file mode 100644 index 0000000..d3efcfe --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/ioutil/readcloser.go @@ -0,0 +1,66 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ioutil + +import ( + "fmt" + "io" +) + +// ReaderAndCloser implements io.ReadCloser interface by combining +// reader and closer together. +type ReaderAndCloser struct { + io.Reader + io.Closer +} + +var ( + ErrShortRead = fmt.Errorf("ioutil: short read") + ErrExpectEOF = fmt.Errorf("ioutil: expect EOF") +) + +// NewExactReadCloser returns a ReadCloser that returns errors if the underlying +// reader does not read back exactly the requested number of bytes. +func NewExactReadCloser(rc io.ReadCloser, totalBytes int64) io.ReadCloser { + return &exactReadCloser{rc: rc, totalBytes: totalBytes} +} + +type exactReadCloser struct { + rc io.ReadCloser + br int64 + totalBytes int64 +} + +func (e *exactReadCloser) Read(p []byte) (int, error) { + n, err := e.rc.Read(p) + e.br += int64(n) + if e.br > e.totalBytes { + return 0, ErrExpectEOF + } + if e.br < e.totalBytes && n == 0 { + return 0, ErrShortRead + } + return n, err +} + +func (e *exactReadCloser) Close() error { + if err := e.rc.Close(); err != nil { + return err + } + if e.br < e.totalBytes { + return ErrShortRead + } + return nil +} diff --git a/vendor/github.com/coreos/etcd/pkg/ioutil/reader.go b/vendor/github.com/coreos/etcd/pkg/ioutil/reader.go new file mode 100644 index 0000000..0703ed4 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/ioutil/reader.go @@ -0,0 +1,40 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ioutil implements I/O utility functions. +package ioutil + +import "io" + +// NewLimitedBufferReader returns a reader that reads from the given reader +// but limits the amount of data returned to at most n bytes. +func NewLimitedBufferReader(r io.Reader, n int) io.Reader { + return &limitedBufferReader{ + r: r, + n: n, + } +} + +type limitedBufferReader struct { + r io.Reader + n int +} + +func (r *limitedBufferReader) Read(p []byte) (n int, err error) { + np := p + if len(np) > r.n { + np = np[:r.n] + } + return r.r.Read(np) +} diff --git a/vendor/github.com/coreos/etcd/pkg/ioutil/util.go b/vendor/github.com/coreos/etcd/pkg/ioutil/util.go new file mode 100644 index 0000000..192ad88 --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/ioutil/util.go @@ -0,0 +1,43 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ioutil + +import ( + "io" + "os" + + "github.com/coreos/etcd/pkg/fileutil" +) + +// WriteAndSyncFile behaves just like ioutil.WriteFile in the standard library, +// but calls Sync before closing the file. WriteAndSyncFile guarantees the data +// is synced if there is no error returned. +func WriteAndSyncFile(filename string, data []byte, perm os.FileMode) error { + f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm) + if err != nil { + return err + } + n, err := f.Write(data) + if err == nil && n < len(data) { + err = io.ErrShortWrite + } + if err == nil { + err = fileutil.Fsync(f) + } + if err1 := f.Close(); err == nil { + err = err1 + } + return err +} diff --git a/vendor/github.com/coreos/etcd/pkg/pbutil/pbutil.go b/vendor/github.com/coreos/etcd/pkg/pbutil/pbutil.go new file mode 100644 index 0000000..d70f98d --- /dev/null +++ b/vendor/github.com/coreos/etcd/pkg/pbutil/pbutil.go @@ -0,0 +1,60 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package pbutil defines interfaces for handling Protocol Buffer objects. +package pbutil + +import "github.com/coreos/pkg/capnslog" + +var ( + plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "pkg/pbutil") +) + +type Marshaler interface { + Marshal() (data []byte, err error) +} + +type Unmarshaler interface { + Unmarshal(data []byte) error +} + +func MustMarshal(m Marshaler) []byte { + d, err := m.Marshal() + if err != nil { + plog.Panicf("marshal should never fail (%v)", err) + } + return d +} + +func MustUnmarshal(um Unmarshaler, data []byte) { + if err := um.Unmarshal(data); err != nil { + plog.Panicf("unmarshal should never fail (%v)", err) + } +} + +func MaybeUnmarshal(um Unmarshaler, data []byte) bool { + if err := um.Unmarshal(data); err != nil { + return false + } + return true +} + +func GetBool(v *bool) (vv bool, set bool) { + if v == nil { + return false, false + } + return *v, true +} + +func Boolp(b bool) *bool { return &b } diff --git a/vendor/github.com/coreos/etcd/raft/README.md b/vendor/github.com/coreos/etcd/raft/README.md new file mode 100644 index 0000000..a724b95 --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/README.md @@ -0,0 +1,247 @@ +# Raft library + +Raft is a protocol with which a cluster of nodes can maintain a replicated state machine. +The state machine is kept in sync through the use of a replicated log. +For more details on Raft, see "In Search of an Understandable Consensus Algorithm" +(https://ramcloud.stanford.edu/raft.pdf) by Diego Ongaro and John Ousterhout. + +This Raft library is stable and feature complete. As of 2016, it is **the most widely used** Raft library in production, serving tens of thousands clusters each day. It powers distributed systems such as etcd, Kubernetes, Docker Swarm, Cloud Foundry Diego, CockroachDB, TiDB, Project Calico, Flannel, and more. + +Most Raft implementations have a monolithic design, including storage handling, messaging serialization, and network transport. This library instead follows a minimalistic design philosophy by only implementing the core raft algorithm. This minimalism buys flexibility, determinism, and performance. + +To keep the codebase small as well as provide flexibility, the library only implements the Raft algorithm; both network and disk IO are left to the user. Library users must implement their own transportation layer for message passing between Raft peers over the wire. Similarly, users must implement their own storage layer to persist the Raft log and state. + +In order to easily test the Raft library, its behavior should be deterministic. To achieve this determinism, the library models Raft as a state machine. The state machine takes a `Message` as input. A message can either be a local timer update or a network message sent from a remote peer. The state machine's output is a 3-tuple `{[]Messages, []LogEntries, NextState}` consisting of an array of `Messages`, `log entries`, and `Raft state changes`. For state machines with the same state, the same state machine input should always generate the same state machine output. + +A simple example application, _raftexample_, is also available to help illustrate +how to use this package in practice: +https://github.com/coreos/etcd/tree/master/contrib/raftexample + +# Features + +This raft implementation is a full feature implementation of Raft protocol. Features includes: + +- Leader election +- Log replication +- Log compaction +- Membership changes +- Leadership transfer extension +- Efficient linearizable read-only queries served by both the leader and followers + - leader checks with quorum and bypasses Raft log before processing read-only queries + - followers asks leader to get a safe read index before processing read-only queries +- More efficient lease-based linearizable read-only queries served by both the leader and followers + - leader bypasses Raft log and processing read-only queries locally + - followers asks leader to get a safe read index before processing read-only queries + - this approach relies on the clock of the all the machines in raft group + +This raft implementation also includes a few optional enhancements: + +- Optimistic pipelining to reduce log replication latency +- Flow control for log replication +- Batching Raft messages to reduce synchronized network I/O calls +- Batching log entries to reduce disk synchronized I/O +- Writing to leader's disk in parallel +- Internal proposal redirection from followers to leader +- Automatic stepping down when the leader loses quorum + +## Notable Users + +- [cockroachdb](https://github.com/cockroachdb/cockroach) A Scalable, Survivable, Strongly-Consistent SQL Database +- [dgraph](https://github.com/dgraph-io/dgraph) A Scalable, Distributed, Low Latency, High Throughput Graph Database +- [etcd](https://github.com/coreos/etcd) A distributed reliable key-value store +- [tikv](https://github.com/pingcap/tikv) A Distributed transactional key value database powered by Rust and Raft +- [swarmkit](https://github.com/docker/swarmkit) A toolkit for orchestrating distributed systems at any scale. + +## Usage + +The primary object in raft is a Node. You either start a Node from scratch +using raft.StartNode or start a Node from some initial state using raft.RestartNode. + +To start a three-node cluster +```go + storage := raft.NewMemoryStorage() + c := &Config{ + ID: 0x01, + ElectionTick: 10, + HeartbeatTick: 1, + Storage: storage, + MaxSizePerMsg: 4096, + MaxInflightMsgs: 256, + } + // Set peer list to the other nodes in the cluster. + // Note that they need to be started separately as well. + n := raft.StartNode(c, []raft.Peer{{ID: 0x02}, {ID: 0x03}}) +``` + +You can start a single node cluster, like so: +```go + // Create storage and config as shown above. + // Set peer list to itself, so this node can become the leader of this single-node cluster. + peers := []raft.Peer{{ID: 0x01}} + n := raft.StartNode(c, peers) +``` + +To allow a new node to join this cluster, do not pass in any peers. First, you need add the node to the existing cluster by calling `ProposeConfChange` on any existing node inside the cluster. Then, you can start the node with empty peer list, like so: +```go + // Create storage and config as shown above. + n := raft.StartNode(c, nil) +``` + +To restart a node from previous state: +```go + storage := raft.NewMemoryStorage() + + // Recover the in-memory storage from persistent snapshot, state and entries. + storage.ApplySnapshot(snapshot) + storage.SetHardState(state) + storage.Append(entries) + + c := &Config{ + ID: 0x01, + ElectionTick: 10, + HeartbeatTick: 1, + Storage: storage, + MaxSizePerMsg: 4096, + MaxInflightMsgs: 256, + } + + // Restart raft without peer information. + // Peer information is already included in the storage. + n := raft.RestartNode(c) +``` + +Now that you are holding onto a Node you have a few responsibilities: + +First, you must read from the Node.Ready() channel and process the updates +it contains. These steps may be performed in parallel, except as noted in step +2. + +1. Write HardState, Entries, and Snapshot to persistent storage if they are +not empty. Note that when writing an Entry with Index i, any +previously-persisted entries with Index >= i must be discarded. + +2. Send all Messages to the nodes named in the To field. It is important that +no messages be sent until the latest HardState has been persisted to disk, +and all Entries written by any previous Ready batch (Messages may be sent while +entries from the same batch are being persisted). To reduce the I/O latency, an +optimization can be applied to make leader write to disk in parallel with its +followers (as explained at section 10.2.1 in Raft thesis). If any Message has type +MsgSnap, call Node.ReportSnapshot() after it has been sent (these messages may be +large). Note: Marshalling messages is not thread-safe; it is important that you +make sure that no new entries are persisted while marshalling. +The easiest way to achieve this is to serialise the messages directly inside +your main raft loop. + +3. Apply Snapshot (if any) and CommittedEntries to the state machine. +If any committed Entry has Type EntryConfChange, call Node.ApplyConfChange() +to apply it to the node. The configuration change may be cancelled at this point +by setting the NodeID field to zero before calling ApplyConfChange +(but ApplyConfChange must be called one way or the other, and the decision to cancel +must be based solely on the state machine and not external information such as +the observed health of the node). + +4. Call Node.Advance() to signal readiness for the next batch of updates. +This may be done at any time after step 1, although all updates must be processed +in the order they were returned by Ready. + +Second, all persisted log entries must be made available via an +implementation of the Storage interface. The provided MemoryStorage +type can be used for this (if you repopulate its state upon a +restart), or you can supply your own disk-backed implementation. + +Third, when you receive a message from another node, pass it to Node.Step: + +```go + func recvRaftRPC(ctx context.Context, m raftpb.Message) { + n.Step(ctx, m) + } +``` + +Finally, you need to call `Node.Tick()` at regular intervals (probably +via a `time.Ticker`). Raft has two important timeouts: heartbeat and the +election timeout. However, internally to the raft package time is +represented by an abstract "tick". + +The total state machine handling loop will look something like this: + +```go + for { + select { + case <-s.Ticker: + n.Tick() + case rd := <-s.Node.Ready(): + saveToStorage(rd.State, rd.Entries, rd.Snapshot) + send(rd.Messages) + if !raft.IsEmptySnap(rd.Snapshot) { + processSnapshot(rd.Snapshot) + } + for _, entry := range rd.CommittedEntries { + process(entry) + if entry.Type == raftpb.EntryConfChange { + var cc raftpb.ConfChange + cc.Unmarshal(entry.Data) + s.Node.ApplyConfChange(cc) + } + } + s.Node.Advance() + case <-s.done: + return + } + } +``` + +To propose changes to the state machine from your node take your application +data, serialize it into a byte slice and call: + +```go + n.Propose(ctx, data) +``` + +If the proposal is committed, data will appear in committed entries with type +raftpb.EntryNormal. There is no guarantee that a proposed command will be +committed; you may have to re-propose after a timeout. + +To add or remove node in a cluster, build ConfChange struct 'cc' and call: + +```go + n.ProposeConfChange(ctx, cc) +``` + +After config change is committed, some committed entry with type +raftpb.EntryConfChange will be returned. You must apply it to node through: + +```go + var cc raftpb.ConfChange + cc.Unmarshal(data) + n.ApplyConfChange(cc) +``` + +Note: An ID represents a unique node in a cluster for all time. A +given ID MUST be used only once even if the old node has been removed. +This means that for example IP addresses make poor node IDs since they +may be reused. Node IDs must be non-zero. + +## Implementation notes + +This implementation is up to date with the final Raft thesis +(https://ramcloud.stanford.edu/~ongaro/thesis.pdf), although our +implementation of the membership change protocol differs somewhat from +that described in chapter 4. The key invariant that membership changes +happen one node at a time is preserved, but in our implementation the +membership change takes effect when its entry is applied, not when it +is added to the log (so the entry is committed under the old +membership instead of the new). This is equivalent in terms of safety, +since the old and new configurations are guaranteed to overlap. + +To ensure that we do not attempt to commit two membership changes at +once by matching log positions (which would be unsafe since they +should have different quorum requirements), we simply disallow any +proposed membership change while any uncommitted change appears in +the leader's log. + +This approach introduces a problem when you try to remove a member +from a two-member cluster: If one of the members dies before the +other one receives the commit of the confchange entry, then the member +cannot be removed any more since the cluster cannot make progress. +For this reason it is highly recommended to use three or more nodes in +every cluster. diff --git a/vendor/github.com/coreos/etcd/raft/doc.go b/vendor/github.com/coreos/etcd/raft/doc.go new file mode 100644 index 0000000..b55c591 --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/doc.go @@ -0,0 +1,300 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* +Package raft sends and receives messages in the Protocol Buffer format +defined in the raftpb package. + +Raft is a protocol with which a cluster of nodes can maintain a replicated state machine. +The state machine is kept in sync through the use of a replicated log. +For more details on Raft, see "In Search of an Understandable Consensus Algorithm" +(https://ramcloud.stanford.edu/raft.pdf) by Diego Ongaro and John Ousterhout. + +A simple example application, _raftexample_, is also available to help illustrate +how to use this package in practice: +https://github.com/coreos/etcd/tree/master/contrib/raftexample + +Usage + +The primary object in raft is a Node. You either start a Node from scratch +using raft.StartNode or start a Node from some initial state using raft.RestartNode. + +To start a node from scratch: + + storage := raft.NewMemoryStorage() + c := &Config{ + ID: 0x01, + ElectionTick: 10, + HeartbeatTick: 1, + Storage: storage, + MaxSizePerMsg: 4096, + MaxInflightMsgs: 256, + } + n := raft.StartNode(c, []raft.Peer{{ID: 0x02}, {ID: 0x03}}) + +To restart a node from previous state: + + storage := raft.NewMemoryStorage() + + // recover the in-memory storage from persistent + // snapshot, state and entries. + storage.ApplySnapshot(snapshot) + storage.SetHardState(state) + storage.Append(entries) + + c := &Config{ + ID: 0x01, + ElectionTick: 10, + HeartbeatTick: 1, + Storage: storage, + MaxSizePerMsg: 4096, + MaxInflightMsgs: 256, + } + + // restart raft without peer information. + // peer information is already included in the storage. + n := raft.RestartNode(c) + +Now that you are holding onto a Node you have a few responsibilities: + +First, you must read from the Node.Ready() channel and process the updates +it contains. These steps may be performed in parallel, except as noted in step +2. + +1. Write HardState, Entries, and Snapshot to persistent storage if they are +not empty. Note that when writing an Entry with Index i, any +previously-persisted entries with Index >= i must be discarded. + +2. Send all Messages to the nodes named in the To field. It is important that +no messages be sent until the latest HardState has been persisted to disk, +and all Entries written by any previous Ready batch (Messages may be sent while +entries from the same batch are being persisted). To reduce the I/O latency, an +optimization can be applied to make leader write to disk in parallel with its +followers (as explained at section 10.2.1 in Raft thesis). If any Message has type +MsgSnap, call Node.ReportSnapshot() after it has been sent (these messages may be +large). + +Note: Marshalling messages is not thread-safe; it is important that you +make sure that no new entries are persisted while marshalling. +The easiest way to achieve this is to serialise the messages directly inside +your main raft loop. + +3. Apply Snapshot (if any) and CommittedEntries to the state machine. +If any committed Entry has Type EntryConfChange, call Node.ApplyConfChange() +to apply it to the node. The configuration change may be cancelled at this point +by setting the NodeID field to zero before calling ApplyConfChange +(but ApplyConfChange must be called one way or the other, and the decision to cancel +must be based solely on the state machine and not external information such as +the observed health of the node). + +4. Call Node.Advance() to signal readiness for the next batch of updates. +This may be done at any time after step 1, although all updates must be processed +in the order they were returned by Ready. + +Second, all persisted log entries must be made available via an +implementation of the Storage interface. The provided MemoryStorage +type can be used for this (if you repopulate its state upon a +restart), or you can supply your own disk-backed implementation. + +Third, when you receive a message from another node, pass it to Node.Step: + + func recvRaftRPC(ctx context.Context, m raftpb.Message) { + n.Step(ctx, m) + } + +Finally, you need to call Node.Tick() at regular intervals (probably +via a time.Ticker). Raft has two important timeouts: heartbeat and the +election timeout. However, internally to the raft package time is +represented by an abstract "tick". + +The total state machine handling loop will look something like this: + + for { + select { + case <-s.Ticker: + n.Tick() + case rd := <-s.Node.Ready(): + saveToStorage(rd.State, rd.Entries, rd.Snapshot) + send(rd.Messages) + if !raft.IsEmptySnap(rd.Snapshot) { + processSnapshot(rd.Snapshot) + } + for _, entry := range rd.CommittedEntries { + process(entry) + if entry.Type == raftpb.EntryConfChange { + var cc raftpb.ConfChange + cc.Unmarshal(entry.Data) + s.Node.ApplyConfChange(cc) + } + } + s.Node.Advance() + case <-s.done: + return + } + } + +To propose changes to the state machine from your node take your application +data, serialize it into a byte slice and call: + + n.Propose(ctx, data) + +If the proposal is committed, data will appear in committed entries with type +raftpb.EntryNormal. There is no guarantee that a proposed command will be +committed; you may have to re-propose after a timeout. + +To add or remove node in a cluster, build ConfChange struct 'cc' and call: + + n.ProposeConfChange(ctx, cc) + +After config change is committed, some committed entry with type +raftpb.EntryConfChange will be returned. You must apply it to node through: + + var cc raftpb.ConfChange + cc.Unmarshal(data) + n.ApplyConfChange(cc) + +Note: An ID represents a unique node in a cluster for all time. A +given ID MUST be used only once even if the old node has been removed. +This means that for example IP addresses make poor node IDs since they +may be reused. Node IDs must be non-zero. + +Implementation notes + +This implementation is up to date with the final Raft thesis +(https://ramcloud.stanford.edu/~ongaro/thesis.pdf), although our +implementation of the membership change protocol differs somewhat from +that described in chapter 4. The key invariant that membership changes +happen one node at a time is preserved, but in our implementation the +membership change takes effect when its entry is applied, not when it +is added to the log (so the entry is committed under the old +membership instead of the new). This is equivalent in terms of safety, +since the old and new configurations are guaranteed to overlap. + +To ensure that we do not attempt to commit two membership changes at +once by matching log positions (which would be unsafe since they +should have different quorum requirements), we simply disallow any +proposed membership change while any uncommitted change appears in +the leader's log. + +This approach introduces a problem when you try to remove a member +from a two-member cluster: If one of the members dies before the +other one receives the commit of the confchange entry, then the member +cannot be removed any more since the cluster cannot make progress. +For this reason it is highly recommended to use three or more nodes in +every cluster. + +MessageType + +Package raft sends and receives message in Protocol Buffer format (defined +in raftpb package). Each state (follower, candidate, leader) implements its +own 'step' method ('stepFollower', 'stepCandidate', 'stepLeader') when +advancing with the given raftpb.Message. Each step is determined by its +raftpb.MessageType. Note that every step is checked by one common method +'Step' that safety-checks the terms of node and incoming message to prevent +stale log entries: + + 'MsgHup' is used for election. If a node is a follower or candidate, the + 'tick' function in 'raft' struct is set as 'tickElection'. If a follower or + candidate has not received any heartbeat before the election timeout, it + passes 'MsgHup' to its Step method and becomes (or remains) a candidate to + start a new election. + + 'MsgBeat' is an internal type that signals the leader to send a heartbeat of + the 'MsgHeartbeat' type. If a node is a leader, the 'tick' function in + the 'raft' struct is set as 'tickHeartbeat', and triggers the leader to + send periodic 'MsgHeartbeat' messages to its followers. + + 'MsgProp' proposes to append data to its log entries. This is a special + type to redirect proposals to leader. Therefore, send method overwrites + raftpb.Message's term with its HardState's term to avoid attaching its + local term to 'MsgProp'. When 'MsgProp' is passed to the leader's 'Step' + method, the leader first calls the 'appendEntry' method to append entries + to its log, and then calls 'bcastAppend' method to send those entries to + its peers. When passed to candidate, 'MsgProp' is dropped. When passed to + follower, 'MsgProp' is stored in follower's mailbox(msgs) by the send + method. It is stored with sender's ID and later forwarded to leader by + rafthttp package. + + 'MsgApp' contains log entries to replicate. A leader calls bcastAppend, + which calls sendAppend, which sends soon-to-be-replicated logs in 'MsgApp' + type. When 'MsgApp' is passed to candidate's Step method, candidate reverts + back to follower, because it indicates that there is a valid leader sending + 'MsgApp' messages. Candidate and follower respond to this message in + 'MsgAppResp' type. + + 'MsgAppResp' is response to log replication request('MsgApp'). When + 'MsgApp' is passed to candidate or follower's Step method, it responds by + calling 'handleAppendEntries' method, which sends 'MsgAppResp' to raft + mailbox. + + 'MsgVote' requests votes for election. When a node is a follower or + candidate and 'MsgHup' is passed to its Step method, then the node calls + 'campaign' method to campaign itself to become a leader. Once 'campaign' + method is called, the node becomes candidate and sends 'MsgVote' to peers + in cluster to request votes. When passed to leader or candidate's Step + method and the message's Term is lower than leader's or candidate's, + 'MsgVote' will be rejected ('MsgVoteResp' is returned with Reject true). + If leader or candidate receives 'MsgVote' with higher term, it will revert + back to follower. When 'MsgVote' is passed to follower, it votes for the + sender only when sender's last term is greater than MsgVote's term or + sender's last term is equal to MsgVote's term but sender's last committed + index is greater than or equal to follower's. + + 'MsgVoteResp' contains responses from voting request. When 'MsgVoteResp' is + passed to candidate, the candidate calculates how many votes it has won. If + it's more than majority (quorum), it becomes leader and calls 'bcastAppend'. + If candidate receives majority of votes of denials, it reverts back to + follower. + + 'MsgPreVote' and 'MsgPreVoteResp' are used in an optional two-phase election + protocol. When Config.PreVote is true, a pre-election is carried out first + (using the same rules as a regular election), and no node increases its term + number unless the pre-election indicates that the campaigining node would win. + This minimizes disruption when a partitioned node rejoins the cluster. + + 'MsgSnap' requests to install a snapshot message. When a node has just + become a leader or the leader receives 'MsgProp' message, it calls + 'bcastAppend' method, which then calls 'sendAppend' method to each + follower. In 'sendAppend', if a leader fails to get term or entries, + the leader requests snapshot by sending 'MsgSnap' type message. + + 'MsgSnapStatus' tells the result of snapshot install message. When a + follower rejected 'MsgSnap', it indicates the snapshot request with + 'MsgSnap' had failed from network issues which causes the network layer + to fail to send out snapshots to its followers. Then leader considers + follower's progress as probe. When 'MsgSnap' were not rejected, it + indicates that the snapshot succeeded and the leader sets follower's + progress to probe and resumes its log replication. + + 'MsgHeartbeat' sends heartbeat from leader. When 'MsgHeartbeat' is passed + to candidate and message's term is higher than candidate's, the candidate + reverts back to follower and updates its committed index from the one in + this heartbeat. And it sends the message to its mailbox. When + 'MsgHeartbeat' is passed to follower's Step method and message's term is + higher than follower's, the follower updates its leaderID with the ID + from the message. + + 'MsgHeartbeatResp' is a response to 'MsgHeartbeat'. When 'MsgHeartbeatResp' + is passed to leader's Step method, the leader knows which follower + responded. And only when the leader's last committed index is greater than + follower's Match index, the leader runs 'sendAppend` method. + + 'MsgUnreachable' tells that request(message) wasn't delivered. When + 'MsgUnreachable' is passed to leader's Step method, the leader discovers + that the follower that sent this 'MsgUnreachable' is not reachable, often + indicating 'MsgApp' is lost. When follower's progress state is replicate, + the leader sets it back to probe. + +*/ +package raft diff --git a/vendor/github.com/coreos/etcd/raft/log.go b/vendor/github.com/coreos/etcd/raft/log.go new file mode 100644 index 0000000..c3036d3 --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/log.go @@ -0,0 +1,358 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import ( + "fmt" + "log" + + pb "github.com/coreos/etcd/raft/raftpb" +) + +type raftLog struct { + // storage contains all stable entries since the last snapshot. + storage Storage + + // unstable contains all unstable entries and snapshot. + // they will be saved into storage. + unstable unstable + + // committed is the highest log position that is known to be in + // stable storage on a quorum of nodes. + committed uint64 + // applied is the highest log position that the application has + // been instructed to apply to its state machine. + // Invariant: applied <= committed + applied uint64 + + logger Logger +} + +// newLog returns log using the given storage. It recovers the log to the state +// that it just commits and applies the latest snapshot. +func newLog(storage Storage, logger Logger) *raftLog { + if storage == nil { + log.Panic("storage must not be nil") + } + log := &raftLog{ + storage: storage, + logger: logger, + } + firstIndex, err := storage.FirstIndex() + if err != nil { + panic(err) // TODO(bdarnell) + } + lastIndex, err := storage.LastIndex() + if err != nil { + panic(err) // TODO(bdarnell) + } + log.unstable.offset = lastIndex + 1 + log.unstable.logger = logger + // Initialize our committed and applied pointers to the time of the last compaction. + log.committed = firstIndex - 1 + log.applied = firstIndex - 1 + + return log +} + +func (l *raftLog) String() string { + return fmt.Sprintf("committed=%d, applied=%d, unstable.offset=%d, len(unstable.Entries)=%d", l.committed, l.applied, l.unstable.offset, len(l.unstable.entries)) +} + +// maybeAppend returns (0, false) if the entries cannot be appended. Otherwise, +// it returns (last index of new entries, true). +func (l *raftLog) maybeAppend(index, logTerm, committed uint64, ents ...pb.Entry) (lastnewi uint64, ok bool) { + if l.matchTerm(index, logTerm) { + lastnewi = index + uint64(len(ents)) + ci := l.findConflict(ents) + switch { + case ci == 0: + case ci <= l.committed: + l.logger.Panicf("entry %d conflict with committed entry [committed(%d)]", ci, l.committed) + default: + offset := index + 1 + l.append(ents[ci-offset:]...) + } + l.commitTo(min(committed, lastnewi)) + return lastnewi, true + } + return 0, false +} + +func (l *raftLog) append(ents ...pb.Entry) uint64 { + if len(ents) == 0 { + return l.lastIndex() + } + if after := ents[0].Index - 1; after < l.committed { + l.logger.Panicf("after(%d) is out of range [committed(%d)]", after, l.committed) + } + l.unstable.truncateAndAppend(ents) + return l.lastIndex() +} + +// findConflict finds the index of the conflict. +// It returns the first pair of conflicting entries between the existing +// entries and the given entries, if there are any. +// If there is no conflicting entries, and the existing entries contains +// all the given entries, zero will be returned. +// If there is no conflicting entries, but the given entries contains new +// entries, the index of the first new entry will be returned. +// An entry is considered to be conflicting if it has the same index but +// a different term. +// The first entry MUST have an index equal to the argument 'from'. +// The index of the given entries MUST be continuously increasing. +func (l *raftLog) findConflict(ents []pb.Entry) uint64 { + for _, ne := range ents { + if !l.matchTerm(ne.Index, ne.Term) { + if ne.Index <= l.lastIndex() { + l.logger.Infof("found conflict at index %d [existing term: %d, conflicting term: %d]", + ne.Index, l.zeroTermOnErrCompacted(l.term(ne.Index)), ne.Term) + } + return ne.Index + } + } + return 0 +} + +func (l *raftLog) unstableEntries() []pb.Entry { + if len(l.unstable.entries) == 0 { + return nil + } + return l.unstable.entries +} + +// nextEnts returns all the available entries for execution. +// If applied is smaller than the index of snapshot, it returns all committed +// entries after the index of snapshot. +func (l *raftLog) nextEnts() (ents []pb.Entry) { + off := max(l.applied+1, l.firstIndex()) + if l.committed+1 > off { + ents, err := l.slice(off, l.committed+1, noLimit) + if err != nil { + l.logger.Panicf("unexpected error when getting unapplied entries (%v)", err) + } + return ents + } + return nil +} + +// hasNextEnts returns if there is any available entries for execution. This +// is a fast check without heavy raftLog.slice() in raftLog.nextEnts(). +func (l *raftLog) hasNextEnts() bool { + off := max(l.applied+1, l.firstIndex()) + return l.committed+1 > off +} + +func (l *raftLog) snapshot() (pb.Snapshot, error) { + if l.unstable.snapshot != nil { + return *l.unstable.snapshot, nil + } + return l.storage.Snapshot() +} + +func (l *raftLog) firstIndex() uint64 { + if i, ok := l.unstable.maybeFirstIndex(); ok { + return i + } + index, err := l.storage.FirstIndex() + if err != nil { + panic(err) // TODO(bdarnell) + } + return index +} + +func (l *raftLog) lastIndex() uint64 { + if i, ok := l.unstable.maybeLastIndex(); ok { + return i + } + i, err := l.storage.LastIndex() + if err != nil { + panic(err) // TODO(bdarnell) + } + return i +} + +func (l *raftLog) commitTo(tocommit uint64) { + // never decrease commit + if l.committed < tocommit { + if l.lastIndex() < tocommit { + l.logger.Panicf("tocommit(%d) is out of range [lastIndex(%d)]. Was the raft log corrupted, truncated, or lost?", tocommit, l.lastIndex()) + } + l.committed = tocommit + } +} + +func (l *raftLog) appliedTo(i uint64) { + if i == 0 { + return + } + if l.committed < i || i < l.applied { + l.logger.Panicf("applied(%d) is out of range [prevApplied(%d), committed(%d)]", i, l.applied, l.committed) + } + l.applied = i +} + +func (l *raftLog) stableTo(i, t uint64) { l.unstable.stableTo(i, t) } + +func (l *raftLog) stableSnapTo(i uint64) { l.unstable.stableSnapTo(i) } + +func (l *raftLog) lastTerm() uint64 { + t, err := l.term(l.lastIndex()) + if err != nil { + l.logger.Panicf("unexpected error when getting the last term (%v)", err) + } + return t +} + +func (l *raftLog) term(i uint64) (uint64, error) { + // the valid term range is [index of dummy entry, last index] + dummyIndex := l.firstIndex() - 1 + if i < dummyIndex || i > l.lastIndex() { + // TODO: return an error instead? + return 0, nil + } + + if t, ok := l.unstable.maybeTerm(i); ok { + return t, nil + } + + t, err := l.storage.Term(i) + if err == nil { + return t, nil + } + if err == ErrCompacted || err == ErrUnavailable { + return 0, err + } + panic(err) // TODO(bdarnell) +} + +func (l *raftLog) entries(i, maxsize uint64) ([]pb.Entry, error) { + if i > l.lastIndex() { + return nil, nil + } + return l.slice(i, l.lastIndex()+1, maxsize) +} + +// allEntries returns all entries in the log. +func (l *raftLog) allEntries() []pb.Entry { + ents, err := l.entries(l.firstIndex(), noLimit) + if err == nil { + return ents + } + if err == ErrCompacted { // try again if there was a racing compaction + return l.allEntries() + } + // TODO (xiangli): handle error? + panic(err) +} + +// isUpToDate determines if the given (lastIndex,term) log is more up-to-date +// by comparing the index and term of the last entries in the existing logs. +// If the logs have last entries with different terms, then the log with the +// later term is more up-to-date. If the logs end with the same term, then +// whichever log has the larger lastIndex is more up-to-date. If the logs are +// the same, the given log is up-to-date. +func (l *raftLog) isUpToDate(lasti, term uint64) bool { + return term > l.lastTerm() || (term == l.lastTerm() && lasti >= l.lastIndex()) +} + +func (l *raftLog) matchTerm(i, term uint64) bool { + t, err := l.term(i) + if err != nil { + return false + } + return t == term +} + +func (l *raftLog) maybeCommit(maxIndex, term uint64) bool { + if maxIndex > l.committed && l.zeroTermOnErrCompacted(l.term(maxIndex)) == term { + l.commitTo(maxIndex) + return true + } + return false +} + +func (l *raftLog) restore(s pb.Snapshot) { + l.logger.Infof("log [%s] starts to restore snapshot [index: %d, term: %d]", l, s.Metadata.Index, s.Metadata.Term) + l.committed = s.Metadata.Index + l.unstable.restore(s) +} + +// slice returns a slice of log entries from lo through hi-1, inclusive. +func (l *raftLog) slice(lo, hi, maxSize uint64) ([]pb.Entry, error) { + err := l.mustCheckOutOfBounds(lo, hi) + if err != nil { + return nil, err + } + if lo == hi { + return nil, nil + } + var ents []pb.Entry + if lo < l.unstable.offset { + storedEnts, err := l.storage.Entries(lo, min(hi, l.unstable.offset), maxSize) + if err == ErrCompacted { + return nil, err + } else if err == ErrUnavailable { + l.logger.Panicf("entries[%d:%d) is unavailable from storage", lo, min(hi, l.unstable.offset)) + } else if err != nil { + panic(err) // TODO(bdarnell) + } + + // check if ents has reached the size limitation + if uint64(len(storedEnts)) < min(hi, l.unstable.offset)-lo { + return storedEnts, nil + } + + ents = storedEnts + } + if hi > l.unstable.offset { + unstable := l.unstable.slice(max(lo, l.unstable.offset), hi) + if len(ents) > 0 { + ents = append([]pb.Entry{}, ents...) + ents = append(ents, unstable...) + } else { + ents = unstable + } + } + return limitSize(ents, maxSize), nil +} + +// l.firstIndex <= lo <= hi <= l.firstIndex + len(l.entries) +func (l *raftLog) mustCheckOutOfBounds(lo, hi uint64) error { + if lo > hi { + l.logger.Panicf("invalid slice %d > %d", lo, hi) + } + fi := l.firstIndex() + if lo < fi { + return ErrCompacted + } + + length := l.lastIndex() + 1 - fi + if lo < fi || hi > fi+length { + l.logger.Panicf("slice[%d,%d) out of bound [%d,%d]", lo, hi, fi, l.lastIndex()) + } + return nil +} + +func (l *raftLog) zeroTermOnErrCompacted(t uint64, err error) uint64 { + if err == nil { + return t + } + if err == ErrCompacted { + return 0 + } + l.logger.Panicf("unexpected error (%v)", err) + return 0 +} diff --git a/vendor/github.com/coreos/etcd/raft/log_unstable.go b/vendor/github.com/coreos/etcd/raft/log_unstable.go new file mode 100644 index 0000000..8ae301c --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/log_unstable.go @@ -0,0 +1,139 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import pb "github.com/coreos/etcd/raft/raftpb" + +// unstable.entries[i] has raft log position i+unstable.offset. +// Note that unstable.offset may be less than the highest log +// position in storage; this means that the next write to storage +// might need to truncate the log before persisting unstable.entries. +type unstable struct { + // the incoming unstable snapshot, if any. + snapshot *pb.Snapshot + // all entries that have not yet been written to storage. + entries []pb.Entry + offset uint64 + + logger Logger +} + +// maybeFirstIndex returns the index of the first possible entry in entries +// if it has a snapshot. +func (u *unstable) maybeFirstIndex() (uint64, bool) { + if u.snapshot != nil { + return u.snapshot.Metadata.Index + 1, true + } + return 0, false +} + +// maybeLastIndex returns the last index if it has at least one +// unstable entry or snapshot. +func (u *unstable) maybeLastIndex() (uint64, bool) { + if l := len(u.entries); l != 0 { + return u.offset + uint64(l) - 1, true + } + if u.snapshot != nil { + return u.snapshot.Metadata.Index, true + } + return 0, false +} + +// maybeTerm returns the term of the entry at index i, if there +// is any. +func (u *unstable) maybeTerm(i uint64) (uint64, bool) { + if i < u.offset { + if u.snapshot == nil { + return 0, false + } + if u.snapshot.Metadata.Index == i { + return u.snapshot.Metadata.Term, true + } + return 0, false + } + + last, ok := u.maybeLastIndex() + if !ok { + return 0, false + } + if i > last { + return 0, false + } + return u.entries[i-u.offset].Term, true +} + +func (u *unstable) stableTo(i, t uint64) { + gt, ok := u.maybeTerm(i) + if !ok { + return + } + // if i < offset, term is matched with the snapshot + // only update the unstable entries if term is matched with + // an unstable entry. + if gt == t && i >= u.offset { + u.entries = u.entries[i+1-u.offset:] + u.offset = i + 1 + } +} + +func (u *unstable) stableSnapTo(i uint64) { + if u.snapshot != nil && u.snapshot.Metadata.Index == i { + u.snapshot = nil + } +} + +func (u *unstable) restore(s pb.Snapshot) { + u.offset = s.Metadata.Index + 1 + u.entries = nil + u.snapshot = &s +} + +func (u *unstable) truncateAndAppend(ents []pb.Entry) { + after := ents[0].Index + switch { + case after == u.offset+uint64(len(u.entries)): + // after is the next index in the u.entries + // directly append + u.entries = append(u.entries, ents...) + case after <= u.offset: + u.logger.Infof("replace the unstable entries from index %d", after) + // The log is being truncated to before our current offset + // portion, so set the offset and replace the entries + u.offset = after + u.entries = ents + default: + // truncate to after and copy to u.entries + // then append + u.logger.Infof("truncate the unstable entries before index %d", after) + u.entries = append([]pb.Entry{}, u.slice(u.offset, after)...) + u.entries = append(u.entries, ents...) + } +} + +func (u *unstable) slice(lo uint64, hi uint64) []pb.Entry { + u.mustCheckOutOfBounds(lo, hi) + return u.entries[lo-u.offset : hi-u.offset] +} + +// u.offset <= lo <= hi <= u.offset+len(u.offset) +func (u *unstable) mustCheckOutOfBounds(lo, hi uint64) { + if lo > hi { + u.logger.Panicf("invalid unstable.slice %d > %d", lo, hi) + } + upper := u.offset + uint64(len(u.entries)) + if lo < u.offset || hi > upper { + u.logger.Panicf("unstable.slice[%d,%d) out of bound [%d,%d]", lo, hi, u.offset, upper) + } +} diff --git a/vendor/github.com/coreos/etcd/raft/logger.go b/vendor/github.com/coreos/etcd/raft/logger.go new file mode 100644 index 0000000..92e55b3 --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/logger.go @@ -0,0 +1,126 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import ( + "fmt" + "io/ioutil" + "log" + "os" +) + +type Logger interface { + Debug(v ...interface{}) + Debugf(format string, v ...interface{}) + + Error(v ...interface{}) + Errorf(format string, v ...interface{}) + + Info(v ...interface{}) + Infof(format string, v ...interface{}) + + Warning(v ...interface{}) + Warningf(format string, v ...interface{}) + + Fatal(v ...interface{}) + Fatalf(format string, v ...interface{}) + + Panic(v ...interface{}) + Panicf(format string, v ...interface{}) +} + +func SetLogger(l Logger) { raftLogger = l } + +var ( + defaultLogger = &DefaultLogger{Logger: log.New(os.Stderr, "raft", log.LstdFlags)} + discardLogger = &DefaultLogger{Logger: log.New(ioutil.Discard, "", 0)} + raftLogger = Logger(defaultLogger) +) + +const ( + calldepth = 2 +) + +// DefaultLogger is a default implementation of the Logger interface. +type DefaultLogger struct { + *log.Logger + debug bool +} + +func (l *DefaultLogger) EnableTimestamps() { + l.SetFlags(l.Flags() | log.Ldate | log.Ltime) +} + +func (l *DefaultLogger) EnableDebug() { + l.debug = true +} + +func (l *DefaultLogger) Debug(v ...interface{}) { + if l.debug { + l.Output(calldepth, header("DEBUG", fmt.Sprint(v...))) + } +} + +func (l *DefaultLogger) Debugf(format string, v ...interface{}) { + if l.debug { + l.Output(calldepth, header("DEBUG", fmt.Sprintf(format, v...))) + } +} + +func (l *DefaultLogger) Info(v ...interface{}) { + l.Output(calldepth, header("INFO", fmt.Sprint(v...))) +} + +func (l *DefaultLogger) Infof(format string, v ...interface{}) { + l.Output(calldepth, header("INFO", fmt.Sprintf(format, v...))) +} + +func (l *DefaultLogger) Error(v ...interface{}) { + l.Output(calldepth, header("ERROR", fmt.Sprint(v...))) +} + +func (l *DefaultLogger) Errorf(format string, v ...interface{}) { + l.Output(calldepth, header("ERROR", fmt.Sprintf(format, v...))) +} + +func (l *DefaultLogger) Warning(v ...interface{}) { + l.Output(calldepth, header("WARN", fmt.Sprint(v...))) +} + +func (l *DefaultLogger) Warningf(format string, v ...interface{}) { + l.Output(calldepth, header("WARN", fmt.Sprintf(format, v...))) +} + +func (l *DefaultLogger) Fatal(v ...interface{}) { + l.Output(calldepth, header("FATAL", fmt.Sprint(v...))) + os.Exit(1) +} + +func (l *DefaultLogger) Fatalf(format string, v ...interface{}) { + l.Output(calldepth, header("FATAL", fmt.Sprintf(format, v...))) + os.Exit(1) +} + +func (l *DefaultLogger) Panic(v ...interface{}) { + l.Logger.Panic(v) +} + +func (l *DefaultLogger) Panicf(format string, v ...interface{}) { + l.Logger.Panicf(format, v...) +} + +func header(lvl, msg string) string { + return fmt.Sprintf("%s: %s", lvl, msg) +} diff --git a/vendor/github.com/coreos/etcd/raft/node.go b/vendor/github.com/coreos/etcd/raft/node.go new file mode 100644 index 0000000..c8410fd --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/node.go @@ -0,0 +1,521 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import ( + "errors" + + pb "github.com/coreos/etcd/raft/raftpb" + "golang.org/x/net/context" +) + +type SnapshotStatus int + +const ( + SnapshotFinish SnapshotStatus = 1 + SnapshotFailure SnapshotStatus = 2 +) + +var ( + emptyState = pb.HardState{} + + // ErrStopped is returned by methods on Nodes that have been stopped. + ErrStopped = errors.New("raft: stopped") +) + +// SoftState provides state that is useful for logging and debugging. +// The state is volatile and does not need to be persisted to the WAL. +type SoftState struct { + Lead uint64 // must use atomic operations to access; keep 64-bit aligned. + RaftState StateType +} + +func (a *SoftState) equal(b *SoftState) bool { + return a.Lead == b.Lead && a.RaftState == b.RaftState +} + +// Ready encapsulates the entries and messages that are ready to read, +// be saved to stable storage, committed or sent to other peers. +// All fields in Ready are read-only. +type Ready struct { + // The current volatile state of a Node. + // SoftState will be nil if there is no update. + // It is not required to consume or store SoftState. + *SoftState + + // The current state of a Node to be saved to stable storage BEFORE + // Messages are sent. + // HardState will be equal to empty state if there is no update. + pb.HardState + + // ReadStates can be used for node to serve linearizable read requests locally + // when its applied index is greater than the index in ReadState. + // Note that the readState will be returned when raft receives msgReadIndex. + // The returned is only valid for the request that requested to read. + ReadStates []ReadState + + // Entries specifies entries to be saved to stable storage BEFORE + // Messages are sent. + Entries []pb.Entry + + // Snapshot specifies the snapshot to be saved to stable storage. + Snapshot pb.Snapshot + + // CommittedEntries specifies entries to be committed to a + // store/state-machine. These have previously been committed to stable + // store. + CommittedEntries []pb.Entry + + // Messages specifies outbound messages to be sent AFTER Entries are + // committed to stable storage. + // If it contains a MsgSnap message, the application MUST report back to raft + // when the snapshot has been received or has failed by calling ReportSnapshot. + Messages []pb.Message +} + +func isHardStateEqual(a, b pb.HardState) bool { + return a.Term == b.Term && a.Vote == b.Vote && a.Commit == b.Commit +} + +// IsEmptyHardState returns true if the given HardState is empty. +func IsEmptyHardState(st pb.HardState) bool { + return isHardStateEqual(st, emptyState) +} + +// IsEmptySnap returns true if the given Snapshot is empty. +func IsEmptySnap(sp pb.Snapshot) bool { + return sp.Metadata.Index == 0 +} + +func (rd Ready) containsUpdates() bool { + return rd.SoftState != nil || !IsEmptyHardState(rd.HardState) || + !IsEmptySnap(rd.Snapshot) || len(rd.Entries) > 0 || + len(rd.CommittedEntries) > 0 || len(rd.Messages) > 0 || len(rd.ReadStates) != 0 +} + +// Node represents a node in a raft cluster. +type Node interface { + // Tick increments the internal logical clock for the Node by a single tick. Election + // timeouts and heartbeat timeouts are in units of ticks. + Tick() + // Campaign causes the Node to transition to candidate state and start campaigning to become leader. + Campaign(ctx context.Context) error + // Propose proposes that data be appended to the log. + Propose(ctx context.Context, data []byte) error + // ProposeConfChange proposes config change. + // At most one ConfChange can be in the process of going through consensus. + // Application needs to call ApplyConfChange when applying EntryConfChange type entry. + ProposeConfChange(ctx context.Context, cc pb.ConfChange) error + // Step advances the state machine using the given message. ctx.Err() will be returned, if any. + Step(ctx context.Context, msg pb.Message) error + + // Ready returns a channel that returns the current point-in-time state. + // Users of the Node must call Advance after retrieving the state returned by Ready. + // + // NOTE: No committed entries from the next Ready may be applied until all committed entries + // and snapshots from the previous one have finished. + Ready() <-chan Ready + + // Advance notifies the Node that the application has saved progress up to the last Ready. + // It prepares the node to return the next available Ready. + // + // The application should generally call Advance after it applies the entries in last Ready. + // + // However, as an optimization, the application may call Advance while it is applying the + // commands. For example. when the last Ready contains a snapshot, the application might take + // a long time to apply the snapshot data. To continue receiving Ready without blocking raft + // progress, it can call Advance before finishing applying the last ready. + Advance() + // ApplyConfChange applies config change to the local node. + // Returns an opaque ConfState protobuf which must be recorded + // in snapshots. Will never return nil; it returns a pointer only + // to match MemoryStorage.Compact. + ApplyConfChange(cc pb.ConfChange) *pb.ConfState + + // TransferLeadership attempts to transfer leadership to the given transferee. + TransferLeadership(ctx context.Context, lead, transferee uint64) + + // ReadIndex request a read state. The read state will be set in the ready. + // Read state has a read index. Once the application advances further than the read + // index, any linearizable read requests issued before the read request can be + // processed safely. The read state will have the same rctx attached. + ReadIndex(ctx context.Context, rctx []byte) error + + // Status returns the current status of the raft state machine. + Status() Status + // ReportUnreachable reports the given node is not reachable for the last send. + ReportUnreachable(id uint64) + // ReportSnapshot reports the status of the sent snapshot. + ReportSnapshot(id uint64, status SnapshotStatus) + // Stop performs any necessary termination of the Node. + Stop() +} + +type Peer struct { + ID uint64 + Context []byte +} + +// StartNode returns a new Node given configuration and a list of raft peers. +// It appends a ConfChangeAddNode entry for each given peer to the initial log. +func StartNode(c *Config, peers []Peer) Node { + r := newRaft(c) + // become the follower at term 1 and apply initial configuration + // entries of term 1 + r.becomeFollower(1, None) + for _, peer := range peers { + cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context} + d, err := cc.Marshal() + if err != nil { + panic("unexpected marshal error") + } + e := pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: r.raftLog.lastIndex() + 1, Data: d} + r.raftLog.append(e) + } + // Mark these initial entries as committed. + // TODO(bdarnell): These entries are still unstable; do we need to preserve + // the invariant that committed < unstable? + r.raftLog.committed = r.raftLog.lastIndex() + // Now apply them, mainly so that the application can call Campaign + // immediately after StartNode in tests. Note that these nodes will + // be added to raft twice: here and when the application's Ready + // loop calls ApplyConfChange. The calls to addNode must come after + // all calls to raftLog.append so progress.next is set after these + // bootstrapping entries (it is an error if we try to append these + // entries since they have already been committed). + // We do not set raftLog.applied so the application will be able + // to observe all conf changes via Ready.CommittedEntries. + for _, peer := range peers { + r.addNode(peer.ID) + } + + n := newNode() + n.logger = c.Logger + go n.run(r) + return &n +} + +// RestartNode is similar to StartNode but does not take a list of peers. +// The current membership of the cluster will be restored from the Storage. +// If the caller has an existing state machine, pass in the last log index that +// has been applied to it; otherwise use zero. +func RestartNode(c *Config) Node { + r := newRaft(c) + + n := newNode() + n.logger = c.Logger + go n.run(r) + return &n +} + +// node is the canonical implementation of the Node interface +type node struct { + propc chan pb.Message + recvc chan pb.Message + confc chan pb.ConfChange + confstatec chan pb.ConfState + readyc chan Ready + advancec chan struct{} + tickc chan struct{} + done chan struct{} + stop chan struct{} + status chan chan Status + + logger Logger +} + +func newNode() node { + return node{ + propc: make(chan pb.Message), + recvc: make(chan pb.Message), + confc: make(chan pb.ConfChange), + confstatec: make(chan pb.ConfState), + readyc: make(chan Ready), + advancec: make(chan struct{}), + // make tickc a buffered chan, so raft node can buffer some ticks when the node + // is busy processing raft messages. Raft node will resume process buffered + // ticks when it becomes idle. + tickc: make(chan struct{}, 128), + done: make(chan struct{}), + stop: make(chan struct{}), + status: make(chan chan Status), + } +} + +func (n *node) Stop() { + select { + case n.stop <- struct{}{}: + // Not already stopped, so trigger it + case <-n.done: + // Node has already been stopped - no need to do anything + return + } + // Block until the stop has been acknowledged by run() + <-n.done +} + +func (n *node) run(r *raft) { + var propc chan pb.Message + var readyc chan Ready + var advancec chan struct{} + var prevLastUnstablei, prevLastUnstablet uint64 + var havePrevLastUnstablei bool + var prevSnapi uint64 + var rd Ready + + lead := None + prevSoftSt := r.softState() + prevHardSt := emptyState + + for { + if advancec != nil { + readyc = nil + } else { + rd = newReady(r, prevSoftSt, prevHardSt) + if rd.containsUpdates() { + readyc = n.readyc + } else { + readyc = nil + } + } + + if lead != r.lead { + if r.hasLeader() { + if lead == None { + r.logger.Infof("raft.node: %x elected leader %x at term %d", r.id, r.lead, r.Term) + } else { + r.logger.Infof("raft.node: %x changed leader from %x to %x at term %d", r.id, lead, r.lead, r.Term) + } + propc = n.propc + } else { + r.logger.Infof("raft.node: %x lost leader %x at term %d", r.id, lead, r.Term) + propc = nil + } + lead = r.lead + } + + select { + // TODO: maybe buffer the config propose if there exists one (the way + // described in raft dissertation) + // Currently it is dropped in Step silently. + case m := <-propc: + m.From = r.id + r.Step(m) + case m := <-n.recvc: + // filter out response message from unknown From. + if _, ok := r.prs[m.From]; ok || !IsResponseMsg(m.Type) { + r.Step(m) // raft never returns an error + } + case cc := <-n.confc: + if cc.NodeID == None { + r.resetPendingConf() + select { + case n.confstatec <- pb.ConfState{Nodes: r.nodes()}: + case <-n.done: + } + break + } + switch cc.Type { + case pb.ConfChangeAddNode: + r.addNode(cc.NodeID) + case pb.ConfChangeRemoveNode: + // block incoming proposal when local node is + // removed + if cc.NodeID == r.id { + propc = nil + } + r.removeNode(cc.NodeID) + case pb.ConfChangeUpdateNode: + r.resetPendingConf() + default: + panic("unexpected conf type") + } + select { + case n.confstatec <- pb.ConfState{Nodes: r.nodes()}: + case <-n.done: + } + case <-n.tickc: + r.tick() + case readyc <- rd: + if rd.SoftState != nil { + prevSoftSt = rd.SoftState + } + if len(rd.Entries) > 0 { + prevLastUnstablei = rd.Entries[len(rd.Entries)-1].Index + prevLastUnstablet = rd.Entries[len(rd.Entries)-1].Term + havePrevLastUnstablei = true + } + if !IsEmptyHardState(rd.HardState) { + prevHardSt = rd.HardState + } + if !IsEmptySnap(rd.Snapshot) { + prevSnapi = rd.Snapshot.Metadata.Index + } + + r.msgs = nil + r.readStates = nil + advancec = n.advancec + case <-advancec: + if prevHardSt.Commit != 0 { + r.raftLog.appliedTo(prevHardSt.Commit) + } + if havePrevLastUnstablei { + r.raftLog.stableTo(prevLastUnstablei, prevLastUnstablet) + havePrevLastUnstablei = false + } + r.raftLog.stableSnapTo(prevSnapi) + advancec = nil + case c := <-n.status: + c <- getStatus(r) + case <-n.stop: + close(n.done) + return + } + } +} + +// Tick increments the internal logical clock for this Node. Election timeouts +// and heartbeat timeouts are in units of ticks. +func (n *node) Tick() { + select { + case n.tickc <- struct{}{}: + case <-n.done: + default: + n.logger.Warningf("A tick missed to fire. Node blocks too long!") + } +} + +func (n *node) Campaign(ctx context.Context) error { return n.step(ctx, pb.Message{Type: pb.MsgHup}) } + +func (n *node) Propose(ctx context.Context, data []byte) error { + return n.step(ctx, pb.Message{Type: pb.MsgProp, Entries: []pb.Entry{{Data: data}}}) +} + +func (n *node) Step(ctx context.Context, m pb.Message) error { + // ignore unexpected local messages receiving over network + if IsLocalMsg(m.Type) { + // TODO: return an error? + return nil + } + return n.step(ctx, m) +} + +func (n *node) ProposeConfChange(ctx context.Context, cc pb.ConfChange) error { + data, err := cc.Marshal() + if err != nil { + return err + } + return n.Step(ctx, pb.Message{Type: pb.MsgProp, Entries: []pb.Entry{{Type: pb.EntryConfChange, Data: data}}}) +} + +// Step advances the state machine using msgs. The ctx.Err() will be returned, +// if any. +func (n *node) step(ctx context.Context, m pb.Message) error { + ch := n.recvc + if m.Type == pb.MsgProp { + ch = n.propc + } + + select { + case ch <- m: + return nil + case <-ctx.Done(): + return ctx.Err() + case <-n.done: + return ErrStopped + } +} + +func (n *node) Ready() <-chan Ready { return n.readyc } + +func (n *node) Advance() { + select { + case n.advancec <- struct{}{}: + case <-n.done: + } +} + +func (n *node) ApplyConfChange(cc pb.ConfChange) *pb.ConfState { + var cs pb.ConfState + select { + case n.confc <- cc: + case <-n.done: + } + select { + case cs = <-n.confstatec: + case <-n.done: + } + return &cs +} + +func (n *node) Status() Status { + c := make(chan Status) + select { + case n.status <- c: + return <-c + case <-n.done: + return Status{} + } +} + +func (n *node) ReportUnreachable(id uint64) { + select { + case n.recvc <- pb.Message{Type: pb.MsgUnreachable, From: id}: + case <-n.done: + } +} + +func (n *node) ReportSnapshot(id uint64, status SnapshotStatus) { + rej := status == SnapshotFailure + + select { + case n.recvc <- pb.Message{Type: pb.MsgSnapStatus, From: id, Reject: rej}: + case <-n.done: + } +} + +func (n *node) TransferLeadership(ctx context.Context, lead, transferee uint64) { + select { + // manually set 'from' and 'to', so that leader can voluntarily transfers its leadership + case n.recvc <- pb.Message{Type: pb.MsgTransferLeader, From: transferee, To: lead}: + case <-n.done: + case <-ctx.Done(): + } +} + +func (n *node) ReadIndex(ctx context.Context, rctx []byte) error { + return n.step(ctx, pb.Message{Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: rctx}}}) +} + +func newReady(r *raft, prevSoftSt *SoftState, prevHardSt pb.HardState) Ready { + rd := Ready{ + Entries: r.raftLog.unstableEntries(), + CommittedEntries: r.raftLog.nextEnts(), + Messages: r.msgs, + } + if softSt := r.softState(); !softSt.equal(prevSoftSt) { + rd.SoftState = softSt + } + if hardSt := r.hardState(); !isHardStateEqual(hardSt, prevHardSt) { + rd.HardState = hardSt + } + if r.raftLog.unstable.snapshot != nil { + rd.Snapshot = *r.raftLog.unstable.snapshot + } + if len(r.readStates) != 0 { + rd.ReadStates = r.readStates + } + return rd +} diff --git a/vendor/github.com/coreos/etcd/raft/progress.go b/vendor/github.com/coreos/etcd/raft/progress.go new file mode 100644 index 0000000..77c7b52 --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/progress.go @@ -0,0 +1,279 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import "fmt" + +const ( + ProgressStateProbe ProgressStateType = iota + ProgressStateReplicate + ProgressStateSnapshot +) + +type ProgressStateType uint64 + +var prstmap = [...]string{ + "ProgressStateProbe", + "ProgressStateReplicate", + "ProgressStateSnapshot", +} + +func (st ProgressStateType) String() string { return prstmap[uint64(st)] } + +// Progress represents a follower’s progress in the view of the leader. Leader maintains +// progresses of all followers, and sends entries to the follower based on its progress. +type Progress struct { + Match, Next uint64 + // State defines how the leader should interact with the follower. + // + // When in ProgressStateProbe, leader sends at most one replication message + // per heartbeat interval. It also probes actual progress of the follower. + // + // When in ProgressStateReplicate, leader optimistically increases next + // to the latest entry sent after sending replication message. This is + // an optimized state for fast replicating log entries to the follower. + // + // When in ProgressStateSnapshot, leader should have sent out snapshot + // before and stops sending any replication message. + State ProgressStateType + // Paused is used in ProgressStateProbe. + // When Paused is true, raft should pause sending replication message to this peer. + Paused bool + // PendingSnapshot is used in ProgressStateSnapshot. + // If there is a pending snapshot, the pendingSnapshot will be set to the + // index of the snapshot. If pendingSnapshot is set, the replication process of + // this Progress will be paused. raft will not resend snapshot until the pending one + // is reported to be failed. + PendingSnapshot uint64 + + // RecentActive is true if the progress is recently active. Receiving any messages + // from the corresponding follower indicates the progress is active. + // RecentActive can be reset to false after an election timeout. + RecentActive bool + + // inflights is a sliding window for the inflight messages. + // Each inflight message contains one or more log entries. + // The max number of entries per message is defined in raft config as MaxSizePerMsg. + // Thus inflight effectively limits both the number of inflight messages + // and the bandwidth each Progress can use. + // When inflights is full, no more message should be sent. + // When a leader sends out a message, the index of the last + // entry should be added to inflights. The index MUST be added + // into inflights in order. + // When a leader receives a reply, the previous inflights should + // be freed by calling inflights.freeTo with the index of the last + // received entry. + ins *inflights +} + +func (pr *Progress) resetState(state ProgressStateType) { + pr.Paused = false + pr.PendingSnapshot = 0 + pr.State = state + pr.ins.reset() +} + +func (pr *Progress) becomeProbe() { + // If the original state is ProgressStateSnapshot, progress knows that + // the pending snapshot has been sent to this peer successfully, then + // probes from pendingSnapshot + 1. + if pr.State == ProgressStateSnapshot { + pendingSnapshot := pr.PendingSnapshot + pr.resetState(ProgressStateProbe) + pr.Next = max(pr.Match+1, pendingSnapshot+1) + } else { + pr.resetState(ProgressStateProbe) + pr.Next = pr.Match + 1 + } +} + +func (pr *Progress) becomeReplicate() { + pr.resetState(ProgressStateReplicate) + pr.Next = pr.Match + 1 +} + +func (pr *Progress) becomeSnapshot(snapshoti uint64) { + pr.resetState(ProgressStateSnapshot) + pr.PendingSnapshot = snapshoti +} + +// maybeUpdate returns false if the given n index comes from an outdated message. +// Otherwise it updates the progress and returns true. +func (pr *Progress) maybeUpdate(n uint64) bool { + var updated bool + if pr.Match < n { + pr.Match = n + updated = true + pr.resume() + } + if pr.Next < n+1 { + pr.Next = n + 1 + } + return updated +} + +func (pr *Progress) optimisticUpdate(n uint64) { pr.Next = n + 1 } + +// maybeDecrTo returns false if the given to index comes from an out of order message. +// Otherwise it decreases the progress next index to min(rejected, last) and returns true. +func (pr *Progress) maybeDecrTo(rejected, last uint64) bool { + if pr.State == ProgressStateReplicate { + // the rejection must be stale if the progress has matched and "rejected" + // is smaller than "match". + if rejected <= pr.Match { + return false + } + // directly decrease next to match + 1 + pr.Next = pr.Match + 1 + return true + } + + // the rejection must be stale if "rejected" does not match next - 1 + if pr.Next-1 != rejected { + return false + } + + if pr.Next = min(rejected, last+1); pr.Next < 1 { + pr.Next = 1 + } + pr.resume() + return true +} + +func (pr *Progress) pause() { pr.Paused = true } +func (pr *Progress) resume() { pr.Paused = false } + +// IsPaused returns whether sending log entries to this node has been +// paused. A node may be paused because it has rejected recent +// MsgApps, is currently waiting for a snapshot, or has reached the +// MaxInflightMsgs limit. +func (pr *Progress) IsPaused() bool { + switch pr.State { + case ProgressStateProbe: + return pr.Paused + case ProgressStateReplicate: + return pr.ins.full() + case ProgressStateSnapshot: + return true + default: + panic("unexpected state") + } +} + +func (pr *Progress) snapshotFailure() { pr.PendingSnapshot = 0 } + +// needSnapshotAbort returns true if snapshot progress's Match +// is equal or higher than the pendingSnapshot. +func (pr *Progress) needSnapshotAbort() bool { + return pr.State == ProgressStateSnapshot && pr.Match >= pr.PendingSnapshot +} + +func (pr *Progress) String() string { + return fmt.Sprintf("next = %d, match = %d, state = %s, waiting = %v, pendingSnapshot = %d", pr.Next, pr.Match, pr.State, pr.IsPaused(), pr.PendingSnapshot) +} + +type inflights struct { + // the starting index in the buffer + start int + // number of inflights in the buffer + count int + + // the size of the buffer + size int + + // buffer contains the index of the last entry + // inside one message. + buffer []uint64 +} + +func newInflights(size int) *inflights { + return &inflights{ + size: size, + } +} + +// add adds an inflight into inflights +func (in *inflights) add(inflight uint64) { + if in.full() { + panic("cannot add into a full inflights") + } + next := in.start + in.count + size := in.size + if next >= size { + next -= size + } + if next >= len(in.buffer) { + in.growBuf() + } + in.buffer[next] = inflight + in.count++ +} + +// grow the inflight buffer by doubling up to inflights.size. We grow on demand +// instead of preallocating to inflights.size to handle systems which have +// thousands of Raft groups per process. +func (in *inflights) growBuf() { + newSize := len(in.buffer) * 2 + if newSize == 0 { + newSize = 1 + } else if newSize > in.size { + newSize = in.size + } + newBuffer := make([]uint64, newSize) + copy(newBuffer, in.buffer) + in.buffer = newBuffer +} + +// freeTo frees the inflights smaller or equal to the given `to` flight. +func (in *inflights) freeTo(to uint64) { + if in.count == 0 || to < in.buffer[in.start] { + // out of the left side of the window + return + } + + i, idx := 0, in.start + for i = 0; i < in.count; i++ { + if to < in.buffer[idx] { // found the first large inflight + break + } + + // increase index and maybe rotate + size := in.size + if idx++; idx >= size { + idx -= size + } + } + // free i inflights and set new start index + in.count -= i + in.start = idx + if in.count == 0 { + // inflights is empty, reset the start index so that we don't grow the + // buffer unnecessarily. + in.start = 0 + } +} + +func (in *inflights) freeFirstOne() { in.freeTo(in.buffer[in.start]) } + +// full returns true if the inflights is full. +func (in *inflights) full() bool { + return in.count == in.size +} + +// resets frees all inflights. +func (in *inflights) reset() { + in.count = 0 + in.start = 0 +} diff --git a/vendor/github.com/coreos/etcd/raft/raft.go b/vendor/github.com/coreos/etcd/raft/raft.go new file mode 100644 index 0000000..633cc14 --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/raft.go @@ -0,0 +1,1252 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import ( + "bytes" + "errors" + "fmt" + "math" + "math/rand" + "sort" + "strings" + "sync" + "time" + + pb "github.com/coreos/etcd/raft/raftpb" +) + +// None is a placeholder node ID used when there is no leader. +const None uint64 = 0 +const noLimit = math.MaxUint64 + +// Possible values for StateType. +const ( + StateFollower StateType = iota + StateCandidate + StateLeader + StatePreCandidate + numStates +) + +type ReadOnlyOption int + +const ( + // ReadOnlySafe guarantees the linearizability of the read only request by + // communicating with the quorum. It is the default and suggested option. + ReadOnlySafe ReadOnlyOption = iota + // ReadOnlyLeaseBased ensures linearizability of the read only request by + // relying on the leader lease. It can be affected by clock drift. + // If the clock drift is unbounded, leader might keep the lease longer than it + // should (clock can move backward/pause without any bound). ReadIndex is not safe + // in that case. + ReadOnlyLeaseBased +) + +// Possible values for CampaignType +const ( + // campaignPreElection represents the first phase of a normal election when + // Config.PreVote is true. + campaignPreElection CampaignType = "CampaignPreElection" + // campaignElection represents a normal (time-based) election (the second phase + // of the election when Config.PreVote is true). + campaignElection CampaignType = "CampaignElection" + // campaignTransfer represents the type of leader transfer + campaignTransfer CampaignType = "CampaignTransfer" +) + +// lockedRand is a small wrapper around rand.Rand to provide +// synchronization. Only the methods needed by the code are exposed +// (e.g. Intn). +type lockedRand struct { + mu sync.Mutex + rand *rand.Rand +} + +func (r *lockedRand) Intn(n int) int { + r.mu.Lock() + v := r.rand.Intn(n) + r.mu.Unlock() + return v +} + +var globalRand = &lockedRand{ + rand: rand.New(rand.NewSource(time.Now().UnixNano())), +} + +// CampaignType represents the type of campaigning +// the reason we use the type of string instead of uint64 +// is because it's simpler to compare and fill in raft entries +type CampaignType string + +// StateType represents the role of a node in a cluster. +type StateType uint64 + +var stmap = [...]string{ + "StateFollower", + "StateCandidate", + "StateLeader", + "StatePreCandidate", +} + +func (st StateType) String() string { + return stmap[uint64(st)] +} + +// Config contains the parameters to start a raft. +type Config struct { + // ID is the identity of the local raft. ID cannot be 0. + ID uint64 + + // peers contains the IDs of all nodes (including self) in the raft cluster. It + // should only be set when starting a new raft cluster. Restarting raft from + // previous configuration will panic if peers is set. peer is private and only + // used for testing right now. + peers []uint64 + + // ElectionTick is the number of Node.Tick invocations that must pass between + // elections. That is, if a follower does not receive any message from the + // leader of current term before ElectionTick has elapsed, it will become + // candidate and start an election. ElectionTick must be greater than + // HeartbeatTick. We suggest ElectionTick = 10 * HeartbeatTick to avoid + // unnecessary leader switching. + ElectionTick int + // HeartbeatTick is the number of Node.Tick invocations that must pass between + // heartbeats. That is, a leader sends heartbeat messages to maintain its + // leadership every HeartbeatTick ticks. + HeartbeatTick int + + // Storage is the storage for raft. raft generates entries and states to be + // stored in storage. raft reads the persisted entries and states out of + // Storage when it needs. raft reads out the previous state and configuration + // out of storage when restarting. + Storage Storage + // Applied is the last applied index. It should only be set when restarting + // raft. raft will not return entries to the application smaller or equal to + // Applied. If Applied is unset when restarting, raft might return previous + // applied entries. This is a very application dependent configuration. + Applied uint64 + + // MaxSizePerMsg limits the max size of each append message. Smaller value + // lowers the raft recovery cost(initial probing and message lost during normal + // operation). On the other side, it might affect the throughput during normal + // replication. Note: math.MaxUint64 for unlimited, 0 for at most one entry per + // message. + MaxSizePerMsg uint64 + // MaxInflightMsgs limits the max number of in-flight append messages during + // optimistic replication phase. The application transportation layer usually + // has its own sending buffer over TCP/UDP. Setting MaxInflightMsgs to avoid + // overflowing that sending buffer. TODO (xiangli): feedback to application to + // limit the proposal rate? + MaxInflightMsgs int + + // CheckQuorum specifies if the leader should check quorum activity. Leader + // steps down when quorum is not active for an electionTimeout. + CheckQuorum bool + + // PreVote enables the Pre-Vote algorithm described in raft thesis section + // 9.6. This prevents disruption when a node that has been partitioned away + // rejoins the cluster. + PreVote bool + + // ReadOnlyOption specifies how the read only request is processed. + // + // ReadOnlySafe guarantees the linearizability of the read only request by + // communicating with the quorum. It is the default and suggested option. + // + // ReadOnlyLeaseBased ensures linearizability of the read only request by + // relying on the leader lease. It can be affected by clock drift. + // If the clock drift is unbounded, leader might keep the lease longer than it + // should (clock can move backward/pause without any bound). ReadIndex is not safe + // in that case. + ReadOnlyOption ReadOnlyOption + + // Logger is the logger used for raft log. For multinode which can host + // multiple raft group, each raft group can have its own logger + Logger Logger +} + +func (c *Config) validate() error { + if c.ID == None { + return errors.New("cannot use none as id") + } + + if c.HeartbeatTick <= 0 { + return errors.New("heartbeat tick must be greater than 0") + } + + if c.ElectionTick <= c.HeartbeatTick { + return errors.New("election tick must be greater than heartbeat tick") + } + + if c.Storage == nil { + return errors.New("storage cannot be nil") + } + + if c.MaxInflightMsgs <= 0 { + return errors.New("max inflight messages must be greater than 0") + } + + if c.Logger == nil { + c.Logger = raftLogger + } + + return nil +} + +type raft struct { + id uint64 + + Term uint64 + Vote uint64 + + readStates []ReadState + + // the log + raftLog *raftLog + + maxInflight int + maxMsgSize uint64 + prs map[uint64]*Progress + + state StateType + + votes map[uint64]bool + + msgs []pb.Message + + // the leader id + lead uint64 + // leadTransferee is id of the leader transfer target when its value is not zero. + // Follow the procedure defined in raft thesis 3.10. + leadTransferee uint64 + // New configuration is ignored if there exists unapplied configuration. + pendingConf bool + + readOnly *readOnly + + // number of ticks since it reached last electionTimeout when it is leader + // or candidate. + // number of ticks since it reached last electionTimeout or received a + // valid message from current leader when it is a follower. + electionElapsed int + + // number of ticks since it reached last heartbeatTimeout. + // only leader keeps heartbeatElapsed. + heartbeatElapsed int + + checkQuorum bool + preVote bool + + heartbeatTimeout int + electionTimeout int + // randomizedElectionTimeout is a random number between + // [electiontimeout, 2 * electiontimeout - 1]. It gets reset + // when raft changes its state to follower or candidate. + randomizedElectionTimeout int + + tick func() + step stepFunc + + logger Logger +} + +func newRaft(c *Config) *raft { + if err := c.validate(); err != nil { + panic(err.Error()) + } + raftlog := newLog(c.Storage, c.Logger) + hs, cs, err := c.Storage.InitialState() + if err != nil { + panic(err) // TODO(bdarnell) + } + peers := c.peers + if len(cs.Nodes) > 0 { + if len(peers) > 0 { + // TODO(bdarnell): the peers argument is always nil except in + // tests; the argument should be removed and these tests should be + // updated to specify their nodes through a snapshot. + panic("cannot specify both newRaft(peers) and ConfState.Nodes)") + } + peers = cs.Nodes + } + r := &raft{ + id: c.ID, + lead: None, + raftLog: raftlog, + maxMsgSize: c.MaxSizePerMsg, + maxInflight: c.MaxInflightMsgs, + prs: make(map[uint64]*Progress), + electionTimeout: c.ElectionTick, + heartbeatTimeout: c.HeartbeatTick, + logger: c.Logger, + checkQuorum: c.CheckQuorum, + preVote: c.PreVote, + readOnly: newReadOnly(c.ReadOnlyOption), + } + for _, p := range peers { + r.prs[p] = &Progress{Next: 1, ins: newInflights(r.maxInflight)} + } + if !isHardStateEqual(hs, emptyState) { + r.loadState(hs) + } + if c.Applied > 0 { + raftlog.appliedTo(c.Applied) + } + r.becomeFollower(r.Term, None) + + var nodesStrs []string + for _, n := range r.nodes() { + nodesStrs = append(nodesStrs, fmt.Sprintf("%x", n)) + } + + r.logger.Infof("newRaft %x [peers: [%s], term: %d, commit: %d, applied: %d, lastindex: %d, lastterm: %d]", + r.id, strings.Join(nodesStrs, ","), r.Term, r.raftLog.committed, r.raftLog.applied, r.raftLog.lastIndex(), r.raftLog.lastTerm()) + return r +} + +func (r *raft) hasLeader() bool { return r.lead != None } + +func (r *raft) softState() *SoftState { return &SoftState{Lead: r.lead, RaftState: r.state} } + +func (r *raft) hardState() pb.HardState { + return pb.HardState{ + Term: r.Term, + Vote: r.Vote, + Commit: r.raftLog.committed, + } +} + +func (r *raft) quorum() int { return len(r.prs)/2 + 1 } + +func (r *raft) nodes() []uint64 { + nodes := make([]uint64, 0, len(r.prs)) + for id := range r.prs { + nodes = append(nodes, id) + } + sort.Sort(uint64Slice(nodes)) + return nodes +} + +// send persists state to stable storage and then sends to its mailbox. +func (r *raft) send(m pb.Message) { + m.From = r.id + if m.Type == pb.MsgVote || m.Type == pb.MsgPreVote { + if m.Term == 0 { + // PreVote RPCs are sent at a term other than our actual term, so the code + // that sends these messages is responsible for setting the term. + panic(fmt.Sprintf("term should be set when sending %s", m.Type)) + } + } else { + if m.Term != 0 { + panic(fmt.Sprintf("term should not be set when sending %s (was %d)", m.Type, m.Term)) + } + // do not attach term to MsgProp, MsgReadIndex + // proposals are a way to forward to the leader and + // should be treated as local message. + // MsgReadIndex is also forwarded to leader. + if m.Type != pb.MsgProp && m.Type != pb.MsgReadIndex { + m.Term = r.Term + } + } + r.msgs = append(r.msgs, m) +} + +// sendAppend sends RPC, with entries to the given peer. +func (r *raft) sendAppend(to uint64) { + pr := r.prs[to] + if pr.IsPaused() { + return + } + m := pb.Message{} + m.To = to + + term, errt := r.raftLog.term(pr.Next - 1) + ents, erre := r.raftLog.entries(pr.Next, r.maxMsgSize) + + if errt != nil || erre != nil { // send snapshot if we failed to get term or entries + if !pr.RecentActive { + r.logger.Debugf("ignore sending snapshot to %x since it is not recently active", to) + return + } + + m.Type = pb.MsgSnap + snapshot, err := r.raftLog.snapshot() + if err != nil { + if err == ErrSnapshotTemporarilyUnavailable { + r.logger.Debugf("%x failed to send snapshot to %x because snapshot is temporarily unavailable", r.id, to) + return + } + panic(err) // TODO(bdarnell) + } + if IsEmptySnap(snapshot) { + panic("need non-empty snapshot") + } + m.Snapshot = snapshot + sindex, sterm := snapshot.Metadata.Index, snapshot.Metadata.Term + r.logger.Debugf("%x [firstindex: %d, commit: %d] sent snapshot[index: %d, term: %d] to %x [%s]", + r.id, r.raftLog.firstIndex(), r.raftLog.committed, sindex, sterm, to, pr) + pr.becomeSnapshot(sindex) + r.logger.Debugf("%x paused sending replication messages to %x [%s]", r.id, to, pr) + } else { + m.Type = pb.MsgApp + m.Index = pr.Next - 1 + m.LogTerm = term + m.Entries = ents + m.Commit = r.raftLog.committed + if n := len(m.Entries); n != 0 { + switch pr.State { + // optimistically increase the next when in ProgressStateReplicate + case ProgressStateReplicate: + last := m.Entries[n-1].Index + pr.optimisticUpdate(last) + pr.ins.add(last) + case ProgressStateProbe: + pr.pause() + default: + r.logger.Panicf("%x is sending append in unhandled state %s", r.id, pr.State) + } + } + } + r.send(m) +} + +// sendHeartbeat sends an empty MsgApp +func (r *raft) sendHeartbeat(to uint64, ctx []byte) { + // Attach the commit as min(to.matched, r.committed). + // When the leader sends out heartbeat message, + // the receiver(follower) might not be matched with the leader + // or it might not have all the committed entries. + // The leader MUST NOT forward the follower's commit to + // an unmatched index. + commit := min(r.prs[to].Match, r.raftLog.committed) + m := pb.Message{ + To: to, + Type: pb.MsgHeartbeat, + Commit: commit, + Context: ctx, + } + + r.send(m) +} + +// bcastAppend sends RPC, with entries to all peers that are not up-to-date +// according to the progress recorded in r.prs. +func (r *raft) bcastAppend() { + for id := range r.prs { + if id == r.id { + continue + } + r.sendAppend(id) + } +} + +// bcastHeartbeat sends RPC, without entries to all the peers. +func (r *raft) bcastHeartbeat() { + lastCtx := r.readOnly.lastPendingRequestCtx() + if len(lastCtx) == 0 { + r.bcastHeartbeatWithCtx(nil) + } else { + r.bcastHeartbeatWithCtx([]byte(lastCtx)) + } +} + +func (r *raft) bcastHeartbeatWithCtx(ctx []byte) { + for id := range r.prs { + if id == r.id { + continue + } + r.sendHeartbeat(id, ctx) + } +} + +// maybeCommit attempts to advance the commit index. Returns true if +// the commit index changed (in which case the caller should call +// r.bcastAppend). +func (r *raft) maybeCommit() bool { + // TODO(bmizerany): optimize.. Currently naive + mis := make(uint64Slice, 0, len(r.prs)) + for id := range r.prs { + mis = append(mis, r.prs[id].Match) + } + sort.Sort(sort.Reverse(mis)) + mci := mis[r.quorum()-1] + return r.raftLog.maybeCommit(mci, r.Term) +} + +func (r *raft) reset(term uint64) { + if r.Term != term { + r.Term = term + r.Vote = None + } + r.lead = None + + r.electionElapsed = 0 + r.heartbeatElapsed = 0 + r.resetRandomizedElectionTimeout() + + r.abortLeaderTransfer() + + r.votes = make(map[uint64]bool) + for id := range r.prs { + r.prs[id] = &Progress{Next: r.raftLog.lastIndex() + 1, ins: newInflights(r.maxInflight)} + if id == r.id { + r.prs[id].Match = r.raftLog.lastIndex() + } + } + r.pendingConf = false + r.readOnly = newReadOnly(r.readOnly.option) +} + +func (r *raft) appendEntry(es ...pb.Entry) { + li := r.raftLog.lastIndex() + for i := range es { + es[i].Term = r.Term + es[i].Index = li + 1 + uint64(i) + } + r.raftLog.append(es...) + r.prs[r.id].maybeUpdate(r.raftLog.lastIndex()) + // Regardless of maybeCommit's return, our caller will call bcastAppend. + r.maybeCommit() +} + +// tickElection is run by followers and candidates after r.electionTimeout. +func (r *raft) tickElection() { + r.electionElapsed++ + + if r.promotable() && r.pastElectionTimeout() { + r.electionElapsed = 0 + r.Step(pb.Message{From: r.id, Type: pb.MsgHup}) + } +} + +// tickHeartbeat is run by leaders to send a MsgBeat after r.heartbeatTimeout. +func (r *raft) tickHeartbeat() { + r.heartbeatElapsed++ + r.electionElapsed++ + + if r.electionElapsed >= r.electionTimeout { + r.electionElapsed = 0 + if r.checkQuorum { + r.Step(pb.Message{From: r.id, Type: pb.MsgCheckQuorum}) + } + // If current leader cannot transfer leadership in electionTimeout, it becomes leader again. + if r.state == StateLeader && r.leadTransferee != None { + r.abortLeaderTransfer() + } + } + + if r.state != StateLeader { + return + } + + if r.heartbeatElapsed >= r.heartbeatTimeout { + r.heartbeatElapsed = 0 + r.Step(pb.Message{From: r.id, Type: pb.MsgBeat}) + } +} + +func (r *raft) becomeFollower(term uint64, lead uint64) { + r.step = stepFollower + r.reset(term) + r.tick = r.tickElection + r.lead = lead + r.state = StateFollower + r.logger.Infof("%x became follower at term %d", r.id, r.Term) +} + +func (r *raft) becomeCandidate() { + // TODO(xiangli) remove the panic when the raft implementation is stable + if r.state == StateLeader { + panic("invalid transition [leader -> candidate]") + } + r.step = stepCandidate + r.reset(r.Term + 1) + r.tick = r.tickElection + r.Vote = r.id + r.state = StateCandidate + r.logger.Infof("%x became candidate at term %d", r.id, r.Term) +} + +func (r *raft) becomePreCandidate() { + // TODO(xiangli) remove the panic when the raft implementation is stable + if r.state == StateLeader { + panic("invalid transition [leader -> pre-candidate]") + } + // Becoming a pre-candidate changes our step functions and state, + // but doesn't change anything else. In particular it does not increase + // r.Term or change r.Vote. + r.step = stepCandidate + r.tick = r.tickElection + r.state = StatePreCandidate + r.logger.Infof("%x became pre-candidate at term %d", r.id, r.Term) +} + +func (r *raft) becomeLeader() { + // TODO(xiangli) remove the panic when the raft implementation is stable + if r.state == StateFollower { + panic("invalid transition [follower -> leader]") + } + r.step = stepLeader + r.reset(r.Term) + r.tick = r.tickHeartbeat + r.lead = r.id + r.state = StateLeader + ents, err := r.raftLog.entries(r.raftLog.committed+1, noLimit) + if err != nil { + r.logger.Panicf("unexpected error getting uncommitted entries (%v)", err) + } + + nconf := numOfPendingConf(ents) + if nconf > 1 { + panic("unexpected multiple uncommitted config entry") + } + if nconf == 1 { + r.pendingConf = true + } + + r.appendEntry(pb.Entry{Data: nil}) + r.logger.Infof("%x became leader at term %d", r.id, r.Term) +} + +func (r *raft) campaign(t CampaignType) { + var term uint64 + var voteMsg pb.MessageType + if t == campaignPreElection { + r.becomePreCandidate() + voteMsg = pb.MsgPreVote + // PreVote RPCs are sent for the next term before we've incremented r.Term. + term = r.Term + 1 + } else { + r.becomeCandidate() + voteMsg = pb.MsgVote + term = r.Term + } + if r.quorum() == r.poll(r.id, voteRespMsgType(voteMsg), true) { + // We won the election after voting for ourselves (which must mean that + // this is a single-node cluster). Advance to the next state. + if t == campaignPreElection { + r.campaign(campaignElection) + } else { + r.becomeLeader() + } + return + } + for id := range r.prs { + if id == r.id { + continue + } + r.logger.Infof("%x [logterm: %d, index: %d] sent %s request to %x at term %d", + r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), voteMsg, id, r.Term) + + var ctx []byte + if t == campaignTransfer { + ctx = []byte(t) + } + r.send(pb.Message{Term: term, To: id, Type: voteMsg, Index: r.raftLog.lastIndex(), LogTerm: r.raftLog.lastTerm(), Context: ctx}) + } +} + +func (r *raft) poll(id uint64, t pb.MessageType, v bool) (granted int) { + if v { + r.logger.Infof("%x received %s from %x at term %d", r.id, t, id, r.Term) + } else { + r.logger.Infof("%x received %s rejection from %x at term %d", r.id, t, id, r.Term) + } + if _, ok := r.votes[id]; !ok { + r.votes[id] = v + } + for _, vv := range r.votes { + if vv { + granted++ + } + } + return granted +} + +func (r *raft) Step(m pb.Message) error { + // Handle the message term, which may result in our stepping down to a follower. + switch { + case m.Term == 0: + // local message + case m.Term > r.Term: + lead := m.From + if m.Type == pb.MsgVote || m.Type == pb.MsgPreVote { + force := bytes.Equal(m.Context, []byte(campaignTransfer)) + inLease := r.checkQuorum && r.lead != None && r.electionElapsed < r.electionTimeout + if !force && inLease { + // If a server receives a RequestVote request within the minimum election timeout + // of hearing from a current leader, it does not update its term or grant its vote + r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] ignored %s from %x [logterm: %d, index: %d] at term %d: lease is not expired (remaining ticks: %d)", + r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.Type, m.From, m.LogTerm, m.Index, r.Term, r.electionTimeout-r.electionElapsed) + return nil + } + lead = None + } + switch { + case m.Type == pb.MsgPreVote: + // Never change our term in response to a PreVote + case m.Type == pb.MsgPreVoteResp && !m.Reject: + // We send pre-vote requests with a term in our future. If the + // pre-vote is granted, we will increment our term when we get a + // quorum. If it is not, the term comes from the node that + // rejected our vote so we should become a follower at the new + // term. + default: + r.logger.Infof("%x [term: %d] received a %s message with higher term from %x [term: %d]", + r.id, r.Term, m.Type, m.From, m.Term) + r.becomeFollower(m.Term, lead) + } + + case m.Term < r.Term: + if r.checkQuorum && (m.Type == pb.MsgHeartbeat || m.Type == pb.MsgApp) { + // We have received messages from a leader at a lower term. It is possible + // that these messages were simply delayed in the network, but this could + // also mean that this node has advanced its term number during a network + // partition, and it is now unable to either win an election or to rejoin + // the majority on the old term. If checkQuorum is false, this will be + // handled by incrementing term numbers in response to MsgVote with a + // higher term, but if checkQuorum is true we may not advance the term on + // MsgVote and must generate other messages to advance the term. The net + // result of these two features is to minimize the disruption caused by + // nodes that have been removed from the cluster's configuration: a + // removed node will send MsgVotes (or MsgPreVotes) which will be ignored, + // but it will not receive MsgApp or MsgHeartbeat, so it will not create + // disruptive term increases + r.send(pb.Message{To: m.From, Type: pb.MsgAppResp}) + } else { + // ignore other cases + r.logger.Infof("%x [term: %d] ignored a %s message with lower term from %x [term: %d]", + r.id, r.Term, m.Type, m.From, m.Term) + } + return nil + } + + switch m.Type { + case pb.MsgHup: + if r.state != StateLeader { + ents, err := r.raftLog.slice(r.raftLog.applied+1, r.raftLog.committed+1, noLimit) + if err != nil { + r.logger.Panicf("unexpected error getting unapplied entries (%v)", err) + } + if n := numOfPendingConf(ents); n != 0 && r.raftLog.committed > r.raftLog.applied { + r.logger.Warningf("%x cannot campaign at term %d since there are still %d pending configuration changes to apply", r.id, r.Term, n) + return nil + } + + r.logger.Infof("%x is starting a new election at term %d", r.id, r.Term) + if r.preVote { + r.campaign(campaignPreElection) + } else { + r.campaign(campaignElection) + } + } else { + r.logger.Debugf("%x ignoring MsgHup because already leader", r.id) + } + + case pb.MsgVote, pb.MsgPreVote: + // The m.Term > r.Term clause is for MsgPreVote. For MsgVote m.Term should + // always equal r.Term. + if (r.Vote == None || m.Term > r.Term || r.Vote == m.From) && r.raftLog.isUpToDate(m.Index, m.LogTerm) { + r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] cast %s for %x [logterm: %d, index: %d] at term %d", + r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.Type, m.From, m.LogTerm, m.Index, r.Term) + r.send(pb.Message{To: m.From, Type: voteRespMsgType(m.Type)}) + if m.Type == pb.MsgVote { + // Only record real votes. + r.electionElapsed = 0 + r.Vote = m.From + } + } else { + r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] rejected %s from %x [logterm: %d, index: %d] at term %d", + r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.Type, m.From, m.LogTerm, m.Index, r.Term) + r.send(pb.Message{To: m.From, Type: voteRespMsgType(m.Type), Reject: true}) + } + + default: + r.step(r, m) + } + return nil +} + +type stepFunc func(r *raft, m pb.Message) + +func stepLeader(r *raft, m pb.Message) { + // These message types do not require any progress for m.From. + switch m.Type { + case pb.MsgBeat: + r.bcastHeartbeat() + return + case pb.MsgCheckQuorum: + if !r.checkQuorumActive() { + r.logger.Warningf("%x stepped down to follower since quorum is not active", r.id) + r.becomeFollower(r.Term, None) + } + return + case pb.MsgProp: + if len(m.Entries) == 0 { + r.logger.Panicf("%x stepped empty MsgProp", r.id) + } + if _, ok := r.prs[r.id]; !ok { + // If we are not currently a member of the range (i.e. this node + // was removed from the configuration while serving as leader), + // drop any new proposals. + return + } + if r.leadTransferee != None { + r.logger.Debugf("%x [term %d] transfer leadership to %x is in progress; dropping proposal", r.id, r.Term, r.leadTransferee) + return + } + + for i, e := range m.Entries { + if e.Type == pb.EntryConfChange { + if r.pendingConf { + r.logger.Infof("propose conf %s ignored since pending unapplied configuration", e.String()) + m.Entries[i] = pb.Entry{Type: pb.EntryNormal} + } + r.pendingConf = true + } + } + r.appendEntry(m.Entries...) + r.bcastAppend() + return + case pb.MsgReadIndex: + if r.quorum() > 1 { + // thinking: use an interally defined context instead of the user given context. + // We can express this in terms of the term and index instead of a user-supplied value. + // This would allow multiple reads to piggyback on the same message. + switch r.readOnly.option { + case ReadOnlySafe: + r.readOnly.addRequest(r.raftLog.committed, m) + r.bcastHeartbeatWithCtx(m.Entries[0].Data) + case ReadOnlyLeaseBased: + var ri uint64 + if r.checkQuorum { + ri = r.raftLog.committed + } + if m.From == None || m.From == r.id { // from local member + r.readStates = append(r.readStates, ReadState{Index: r.raftLog.committed, RequestCtx: m.Entries[0].Data}) + } else { + r.send(pb.Message{To: m.From, Type: pb.MsgReadIndexResp, Index: ri, Entries: m.Entries}) + } + } + } else { + r.readStates = append(r.readStates, ReadState{Index: r.raftLog.committed, RequestCtx: m.Entries[0].Data}) + } + + return + } + + // All other message types require a progress for m.From (pr). + pr, prOk := r.prs[m.From] + if !prOk { + r.logger.Debugf("%x no progress available for %x", r.id, m.From) + return + } + switch m.Type { + case pb.MsgAppResp: + pr.RecentActive = true + + if m.Reject { + r.logger.Debugf("%x received msgApp rejection(lastindex: %d) from %x for index %d", + r.id, m.RejectHint, m.From, m.Index) + if pr.maybeDecrTo(m.Index, m.RejectHint) { + r.logger.Debugf("%x decreased progress of %x to [%s]", r.id, m.From, pr) + if pr.State == ProgressStateReplicate { + pr.becomeProbe() + } + r.sendAppend(m.From) + } + } else { + oldPaused := pr.IsPaused() + if pr.maybeUpdate(m.Index) { + switch { + case pr.State == ProgressStateProbe: + pr.becomeReplicate() + case pr.State == ProgressStateSnapshot && pr.needSnapshotAbort(): + r.logger.Debugf("%x snapshot aborted, resumed sending replication messages to %x [%s]", r.id, m.From, pr) + pr.becomeProbe() + case pr.State == ProgressStateReplicate: + pr.ins.freeTo(m.Index) + } + + if r.maybeCommit() { + r.bcastAppend() + } else if oldPaused { + // update() reset the wait state on this node. If we had delayed sending + // an update before, send it now. + r.sendAppend(m.From) + } + // Transfer leadership is in progress. + if m.From == r.leadTransferee && pr.Match == r.raftLog.lastIndex() { + r.logger.Infof("%x sent MsgTimeoutNow to %x after received MsgAppResp", r.id, m.From) + r.sendTimeoutNow(m.From) + } + } + } + case pb.MsgHeartbeatResp: + pr.RecentActive = true + pr.resume() + + // free one slot for the full inflights window to allow progress. + if pr.State == ProgressStateReplicate && pr.ins.full() { + pr.ins.freeFirstOne() + } + if pr.Match < r.raftLog.lastIndex() { + r.sendAppend(m.From) + } + + if r.readOnly.option != ReadOnlySafe || len(m.Context) == 0 { + return + } + + ackCount := r.readOnly.recvAck(m) + if ackCount < r.quorum() { + return + } + + rss := r.readOnly.advance(m) + for _, rs := range rss { + req := rs.req + if req.From == None || req.From == r.id { // from local member + r.readStates = append(r.readStates, ReadState{Index: rs.index, RequestCtx: req.Entries[0].Data}) + } else { + r.send(pb.Message{To: req.From, Type: pb.MsgReadIndexResp, Index: rs.index, Entries: req.Entries}) + } + } + case pb.MsgSnapStatus: + if pr.State != ProgressStateSnapshot { + return + } + if !m.Reject { + pr.becomeProbe() + r.logger.Debugf("%x snapshot succeeded, resumed sending replication messages to %x [%s]", r.id, m.From, pr) + } else { + pr.snapshotFailure() + pr.becomeProbe() + r.logger.Debugf("%x snapshot failed, resumed sending replication messages to %x [%s]", r.id, m.From, pr) + } + // If snapshot finish, wait for the msgAppResp from the remote node before sending + // out the next msgApp. + // If snapshot failure, wait for a heartbeat interval before next try + pr.pause() + case pb.MsgUnreachable: + // During optimistic replication, if the remote becomes unreachable, + // there is huge probability that a MsgApp is lost. + if pr.State == ProgressStateReplicate { + pr.becomeProbe() + } + r.logger.Debugf("%x failed to send message to %x because it is unreachable [%s]", r.id, m.From, pr) + case pb.MsgTransferLeader: + leadTransferee := m.From + lastLeadTransferee := r.leadTransferee + if lastLeadTransferee != None { + if lastLeadTransferee == leadTransferee { + r.logger.Infof("%x [term %d] transfer leadership to %x is in progress, ignores request to same node %x", + r.id, r.Term, leadTransferee, leadTransferee) + return + } + r.abortLeaderTransfer() + r.logger.Infof("%x [term %d] abort previous transferring leadership to %x", r.id, r.Term, lastLeadTransferee) + } + if leadTransferee == r.id { + r.logger.Debugf("%x is already leader. Ignored transferring leadership to self", r.id) + return + } + // Transfer leadership to third party. + r.logger.Infof("%x [term %d] starts to transfer leadership to %x", r.id, r.Term, leadTransferee) + // Transfer leadership should be finished in one electionTimeout, so reset r.electionElapsed. + r.electionElapsed = 0 + r.leadTransferee = leadTransferee + if pr.Match == r.raftLog.lastIndex() { + r.sendTimeoutNow(leadTransferee) + r.logger.Infof("%x sends MsgTimeoutNow to %x immediately as %x already has up-to-date log", r.id, leadTransferee, leadTransferee) + } else { + r.sendAppend(leadTransferee) + } + } +} + +// stepCandidate is shared by StateCandidate and StatePreCandidate; the difference is +// whether they respond to MsgVoteResp or MsgPreVoteResp. +func stepCandidate(r *raft, m pb.Message) { + // Only handle vote responses corresponding to our candidacy (while in + // StateCandidate, we may get stale MsgPreVoteResp messages in this term from + // our pre-candidate state). + var myVoteRespType pb.MessageType + if r.state == StatePreCandidate { + myVoteRespType = pb.MsgPreVoteResp + } else { + myVoteRespType = pb.MsgVoteResp + } + switch m.Type { + case pb.MsgProp: + r.logger.Infof("%x no leader at term %d; dropping proposal", r.id, r.Term) + return + case pb.MsgApp: + r.becomeFollower(r.Term, m.From) + r.handleAppendEntries(m) + case pb.MsgHeartbeat: + r.becomeFollower(r.Term, m.From) + r.handleHeartbeat(m) + case pb.MsgSnap: + r.becomeFollower(m.Term, m.From) + r.handleSnapshot(m) + case myVoteRespType: + gr := r.poll(m.From, m.Type, !m.Reject) + r.logger.Infof("%x [quorum:%d] has received %d %s votes and %d vote rejections", r.id, r.quorum(), gr, m.Type, len(r.votes)-gr) + switch r.quorum() { + case gr: + if r.state == StatePreCandidate { + r.campaign(campaignElection) + } else { + r.becomeLeader() + r.bcastAppend() + } + case len(r.votes) - gr: + r.becomeFollower(r.Term, None) + } + case pb.MsgTimeoutNow: + r.logger.Debugf("%x [term %d state %v] ignored MsgTimeoutNow from %x", r.id, r.Term, r.state, m.From) + } +} + +func stepFollower(r *raft, m pb.Message) { + switch m.Type { + case pb.MsgProp: + if r.lead == None { + r.logger.Infof("%x no leader at term %d; dropping proposal", r.id, r.Term) + return + } + m.To = r.lead + r.send(m) + case pb.MsgApp: + r.electionElapsed = 0 + r.lead = m.From + r.handleAppendEntries(m) + case pb.MsgHeartbeat: + r.electionElapsed = 0 + r.lead = m.From + r.handleHeartbeat(m) + case pb.MsgSnap: + r.electionElapsed = 0 + r.lead = m.From + r.handleSnapshot(m) + case pb.MsgTransferLeader: + if r.lead == None { + r.logger.Infof("%x no leader at term %d; dropping leader transfer msg", r.id, r.Term) + return + } + m.To = r.lead + r.send(m) + case pb.MsgTimeoutNow: + if r.promotable() { + r.logger.Infof("%x [term %d] received MsgTimeoutNow from %x and starts an election to get leadership.", r.id, r.Term, m.From) + // Leadership transfers never use pre-vote even if r.preVote is true; we + // know we are not recovering from a partition so there is no need for the + // extra round trip. + r.campaign(campaignTransfer) + } else { + r.logger.Infof("%x received MsgTimeoutNow from %x but is not promotable", r.id, m.From) + } + case pb.MsgReadIndex: + if r.lead == None { + r.logger.Infof("%x no leader at term %d; dropping index reading msg", r.id, r.Term) + return + } + m.To = r.lead + r.send(m) + case pb.MsgReadIndexResp: + if len(m.Entries) != 1 { + r.logger.Errorf("%x invalid format of MsgReadIndexResp from %x, entries count: %d", r.id, m.From, len(m.Entries)) + return + } + r.readStates = append(r.readStates, ReadState{Index: m.Index, RequestCtx: m.Entries[0].Data}) + } +} + +func (r *raft) handleAppendEntries(m pb.Message) { + if m.Index < r.raftLog.committed { + r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: r.raftLog.committed}) + return + } + + if mlastIndex, ok := r.raftLog.maybeAppend(m.Index, m.LogTerm, m.Commit, m.Entries...); ok { + r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: mlastIndex}) + } else { + r.logger.Debugf("%x [logterm: %d, index: %d] rejected msgApp [logterm: %d, index: %d] from %x", + r.id, r.raftLog.zeroTermOnErrCompacted(r.raftLog.term(m.Index)), m.Index, m.LogTerm, m.Index, m.From) + r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: m.Index, Reject: true, RejectHint: r.raftLog.lastIndex()}) + } +} + +func (r *raft) handleHeartbeat(m pb.Message) { + r.raftLog.commitTo(m.Commit) + r.send(pb.Message{To: m.From, Type: pb.MsgHeartbeatResp, Context: m.Context}) +} + +func (r *raft) handleSnapshot(m pb.Message) { + sindex, sterm := m.Snapshot.Metadata.Index, m.Snapshot.Metadata.Term + if r.restore(m.Snapshot) { + r.logger.Infof("%x [commit: %d] restored snapshot [index: %d, term: %d]", + r.id, r.raftLog.committed, sindex, sterm) + r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: r.raftLog.lastIndex()}) + } else { + r.logger.Infof("%x [commit: %d] ignored snapshot [index: %d, term: %d]", + r.id, r.raftLog.committed, sindex, sterm) + r.send(pb.Message{To: m.From, Type: pb.MsgAppResp, Index: r.raftLog.committed}) + } +} + +// restore recovers the state machine from a snapshot. It restores the log and the +// configuration of state machine. +func (r *raft) restore(s pb.Snapshot) bool { + if s.Metadata.Index <= r.raftLog.committed { + return false + } + if r.raftLog.matchTerm(s.Metadata.Index, s.Metadata.Term) { + r.logger.Infof("%x [commit: %d, lastindex: %d, lastterm: %d] fast-forwarded commit to snapshot [index: %d, term: %d]", + r.id, r.raftLog.committed, r.raftLog.lastIndex(), r.raftLog.lastTerm(), s.Metadata.Index, s.Metadata.Term) + r.raftLog.commitTo(s.Metadata.Index) + return false + } + + r.logger.Infof("%x [commit: %d, lastindex: %d, lastterm: %d] starts to restore snapshot [index: %d, term: %d]", + r.id, r.raftLog.committed, r.raftLog.lastIndex(), r.raftLog.lastTerm(), s.Metadata.Index, s.Metadata.Term) + + r.raftLog.restore(s) + r.prs = make(map[uint64]*Progress) + for _, n := range s.Metadata.ConfState.Nodes { + match, next := uint64(0), r.raftLog.lastIndex()+1 + if n == r.id { + match = next - 1 + } + r.setProgress(n, match, next) + r.logger.Infof("%x restored progress of %x [%s]", r.id, n, r.prs[n]) + } + return true +} + +// promotable indicates whether state machine can be promoted to leader, +// which is true when its own id is in progress list. +func (r *raft) promotable() bool { + _, ok := r.prs[r.id] + return ok +} + +func (r *raft) addNode(id uint64) { + r.pendingConf = false + if _, ok := r.prs[id]; ok { + // Ignore any redundant addNode calls (which can happen because the + // initial bootstrapping entries are applied twice). + return + } + + r.setProgress(id, 0, r.raftLog.lastIndex()+1) + // When a node is first added, we should mark it as recently active. + // Otherwise, CheckQuorum may cause us to step down if it is invoked + // before the added node has a chance to communicate with us. + r.prs[id].RecentActive = true +} + +func (r *raft) removeNode(id uint64) { + r.delProgress(id) + r.pendingConf = false + + // do not try to commit or abort transferring if there is no nodes in the cluster. + if len(r.prs) == 0 { + return + } + + // The quorum size is now smaller, so see if any pending entries can + // be committed. + if r.maybeCommit() { + r.bcastAppend() + } + // If the removed node is the leadTransferee, then abort the leadership transferring. + if r.state == StateLeader && r.leadTransferee == id { + r.abortLeaderTransfer() + } +} + +func (r *raft) resetPendingConf() { r.pendingConf = false } + +func (r *raft) setProgress(id, match, next uint64) { + r.prs[id] = &Progress{Next: next, Match: match, ins: newInflights(r.maxInflight)} +} + +func (r *raft) delProgress(id uint64) { + delete(r.prs, id) +} + +func (r *raft) loadState(state pb.HardState) { + if state.Commit < r.raftLog.committed || state.Commit > r.raftLog.lastIndex() { + r.logger.Panicf("%x state.commit %d is out of range [%d, %d]", r.id, state.Commit, r.raftLog.committed, r.raftLog.lastIndex()) + } + r.raftLog.committed = state.Commit + r.Term = state.Term + r.Vote = state.Vote +} + +// pastElectionTimeout returns true iff r.electionElapsed is greater +// than or equal to the randomized election timeout in +// [electiontimeout, 2 * electiontimeout - 1]. +func (r *raft) pastElectionTimeout() bool { + return r.electionElapsed >= r.randomizedElectionTimeout +} + +func (r *raft) resetRandomizedElectionTimeout() { + r.randomizedElectionTimeout = r.electionTimeout + globalRand.Intn(r.electionTimeout) +} + +// checkQuorumActive returns true if the quorum is active from +// the view of the local raft state machine. Otherwise, it returns +// false. +// checkQuorumActive also resets all RecentActive to false. +func (r *raft) checkQuorumActive() bool { + var act int + + for id := range r.prs { + if id == r.id { // self is always active + act++ + continue + } + + if r.prs[id].RecentActive { + act++ + } + + r.prs[id].RecentActive = false + } + + return act >= r.quorum() +} + +func (r *raft) sendTimeoutNow(to uint64) { + r.send(pb.Message{To: to, Type: pb.MsgTimeoutNow}) +} + +func (r *raft) abortLeaderTransfer() { + r.leadTransferee = None +} + +func numOfPendingConf(ents []pb.Entry) int { + n := 0 + for i := range ents { + if ents[i].Type == pb.EntryConfChange { + n++ + } + } + return n +} diff --git a/vendor/github.com/coreos/etcd/raft/raftpb/raft.pb.go b/vendor/github.com/coreos/etcd/raft/raftpb/raft.pb.go new file mode 100644 index 0000000..86ad312 --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/raftpb/raft.pb.go @@ -0,0 +1,1900 @@ +// Code generated by protoc-gen-gogo. +// source: raft.proto +// DO NOT EDIT! + +/* + Package raftpb is a generated protocol buffer package. + + It is generated from these files: + raft.proto + + It has these top-level messages: + Entry + SnapshotMetadata + Snapshot + Message + HardState + ConfState + ConfChange +*/ +package raftpb + +import ( + "fmt" + + proto "github.com/golang/protobuf/proto" + + math "math" + + io "io" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type EntryType int32 + +const ( + EntryNormal EntryType = 0 + EntryConfChange EntryType = 1 +) + +var EntryType_name = map[int32]string{ + 0: "EntryNormal", + 1: "EntryConfChange", +} +var EntryType_value = map[string]int32{ + "EntryNormal": 0, + "EntryConfChange": 1, +} + +func (x EntryType) Enum() *EntryType { + p := new(EntryType) + *p = x + return p +} +func (x EntryType) String() string { + return proto.EnumName(EntryType_name, int32(x)) +} +func (x *EntryType) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(EntryType_value, data, "EntryType") + if err != nil { + return err + } + *x = EntryType(value) + return nil +} +func (EntryType) EnumDescriptor() ([]byte, []int) { return fileDescriptorRaft, []int{0} } + +type MessageType int32 + +const ( + MsgHup MessageType = 0 + MsgBeat MessageType = 1 + MsgProp MessageType = 2 + MsgApp MessageType = 3 + MsgAppResp MessageType = 4 + MsgVote MessageType = 5 + MsgVoteResp MessageType = 6 + MsgSnap MessageType = 7 + MsgHeartbeat MessageType = 8 + MsgHeartbeatResp MessageType = 9 + MsgUnreachable MessageType = 10 + MsgSnapStatus MessageType = 11 + MsgCheckQuorum MessageType = 12 + MsgTransferLeader MessageType = 13 + MsgTimeoutNow MessageType = 14 + MsgReadIndex MessageType = 15 + MsgReadIndexResp MessageType = 16 + MsgPreVote MessageType = 17 + MsgPreVoteResp MessageType = 18 +) + +var MessageType_name = map[int32]string{ + 0: "MsgHup", + 1: "MsgBeat", + 2: "MsgProp", + 3: "MsgApp", + 4: "MsgAppResp", + 5: "MsgVote", + 6: "MsgVoteResp", + 7: "MsgSnap", + 8: "MsgHeartbeat", + 9: "MsgHeartbeatResp", + 10: "MsgUnreachable", + 11: "MsgSnapStatus", + 12: "MsgCheckQuorum", + 13: "MsgTransferLeader", + 14: "MsgTimeoutNow", + 15: "MsgReadIndex", + 16: "MsgReadIndexResp", + 17: "MsgPreVote", + 18: "MsgPreVoteResp", +} +var MessageType_value = map[string]int32{ + "MsgHup": 0, + "MsgBeat": 1, + "MsgProp": 2, + "MsgApp": 3, + "MsgAppResp": 4, + "MsgVote": 5, + "MsgVoteResp": 6, + "MsgSnap": 7, + "MsgHeartbeat": 8, + "MsgHeartbeatResp": 9, + "MsgUnreachable": 10, + "MsgSnapStatus": 11, + "MsgCheckQuorum": 12, + "MsgTransferLeader": 13, + "MsgTimeoutNow": 14, + "MsgReadIndex": 15, + "MsgReadIndexResp": 16, + "MsgPreVote": 17, + "MsgPreVoteResp": 18, +} + +func (x MessageType) Enum() *MessageType { + p := new(MessageType) + *p = x + return p +} +func (x MessageType) String() string { + return proto.EnumName(MessageType_name, int32(x)) +} +func (x *MessageType) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(MessageType_value, data, "MessageType") + if err != nil { + return err + } + *x = MessageType(value) + return nil +} +func (MessageType) EnumDescriptor() ([]byte, []int) { return fileDescriptorRaft, []int{1} } + +type ConfChangeType int32 + +const ( + ConfChangeAddNode ConfChangeType = 0 + ConfChangeRemoveNode ConfChangeType = 1 + ConfChangeUpdateNode ConfChangeType = 2 +) + +var ConfChangeType_name = map[int32]string{ + 0: "ConfChangeAddNode", + 1: "ConfChangeRemoveNode", + 2: "ConfChangeUpdateNode", +} +var ConfChangeType_value = map[string]int32{ + "ConfChangeAddNode": 0, + "ConfChangeRemoveNode": 1, + "ConfChangeUpdateNode": 2, +} + +func (x ConfChangeType) Enum() *ConfChangeType { + p := new(ConfChangeType) + *p = x + return p +} +func (x ConfChangeType) String() string { + return proto.EnumName(ConfChangeType_name, int32(x)) +} +func (x *ConfChangeType) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(ConfChangeType_value, data, "ConfChangeType") + if err != nil { + return err + } + *x = ConfChangeType(value) + return nil +} +func (ConfChangeType) EnumDescriptor() ([]byte, []int) { return fileDescriptorRaft, []int{2} } + +type Entry struct { + Term uint64 `protobuf:"varint,2,opt,name=Term" json:"Term"` + Index uint64 `protobuf:"varint,3,opt,name=Index" json:"Index"` + Type EntryType `protobuf:"varint,1,opt,name=Type,enum=raftpb.EntryType" json:"Type"` + Data []byte `protobuf:"bytes,4,opt,name=Data" json:"Data,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *Entry) Reset() { *m = Entry{} } +func (m *Entry) String() string { return proto.CompactTextString(m) } +func (*Entry) ProtoMessage() {} +func (*Entry) Descriptor() ([]byte, []int) { return fileDescriptorRaft, []int{0} } + +type SnapshotMetadata struct { + ConfState ConfState `protobuf:"bytes,1,opt,name=conf_state,json=confState" json:"conf_state"` + Index uint64 `protobuf:"varint,2,opt,name=index" json:"index"` + Term uint64 `protobuf:"varint,3,opt,name=term" json:"term"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *SnapshotMetadata) Reset() { *m = SnapshotMetadata{} } +func (m *SnapshotMetadata) String() string { return proto.CompactTextString(m) } +func (*SnapshotMetadata) ProtoMessage() {} +func (*SnapshotMetadata) Descriptor() ([]byte, []int) { return fileDescriptorRaft, []int{1} } + +type Snapshot struct { + Data []byte `protobuf:"bytes,1,opt,name=data" json:"data,omitempty"` + Metadata SnapshotMetadata `protobuf:"bytes,2,opt,name=metadata" json:"metadata"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *Snapshot) Reset() { *m = Snapshot{} } +func (m *Snapshot) String() string { return proto.CompactTextString(m) } +func (*Snapshot) ProtoMessage() {} +func (*Snapshot) Descriptor() ([]byte, []int) { return fileDescriptorRaft, []int{2} } + +type Message struct { + Type MessageType `protobuf:"varint,1,opt,name=type,enum=raftpb.MessageType" json:"type"` + To uint64 `protobuf:"varint,2,opt,name=to" json:"to"` + From uint64 `protobuf:"varint,3,opt,name=from" json:"from"` + Term uint64 `protobuf:"varint,4,opt,name=term" json:"term"` + LogTerm uint64 `protobuf:"varint,5,opt,name=logTerm" json:"logTerm"` + Index uint64 `protobuf:"varint,6,opt,name=index" json:"index"` + Entries []Entry `protobuf:"bytes,7,rep,name=entries" json:"entries"` + Commit uint64 `protobuf:"varint,8,opt,name=commit" json:"commit"` + Snapshot Snapshot `protobuf:"bytes,9,opt,name=snapshot" json:"snapshot"` + Reject bool `protobuf:"varint,10,opt,name=reject" json:"reject"` + RejectHint uint64 `protobuf:"varint,11,opt,name=rejectHint" json:"rejectHint"` + Context []byte `protobuf:"bytes,12,opt,name=context" json:"context,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *Message) Reset() { *m = Message{} } +func (m *Message) String() string { return proto.CompactTextString(m) } +func (*Message) ProtoMessage() {} +func (*Message) Descriptor() ([]byte, []int) { return fileDescriptorRaft, []int{3} } + +type HardState struct { + Term uint64 `protobuf:"varint,1,opt,name=term" json:"term"` + Vote uint64 `protobuf:"varint,2,opt,name=vote" json:"vote"` + Commit uint64 `protobuf:"varint,3,opt,name=commit" json:"commit"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *HardState) Reset() { *m = HardState{} } +func (m *HardState) String() string { return proto.CompactTextString(m) } +func (*HardState) ProtoMessage() {} +func (*HardState) Descriptor() ([]byte, []int) { return fileDescriptorRaft, []int{4} } + +type ConfState struct { + Nodes []uint64 `protobuf:"varint,1,rep,name=nodes" json:"nodes,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ConfState) Reset() { *m = ConfState{} } +func (m *ConfState) String() string { return proto.CompactTextString(m) } +func (*ConfState) ProtoMessage() {} +func (*ConfState) Descriptor() ([]byte, []int) { return fileDescriptorRaft, []int{5} } + +type ConfChange struct { + ID uint64 `protobuf:"varint,1,opt,name=ID" json:"ID"` + Type ConfChangeType `protobuf:"varint,2,opt,name=Type,enum=raftpb.ConfChangeType" json:"Type"` + NodeID uint64 `protobuf:"varint,3,opt,name=NodeID" json:"NodeID"` + Context []byte `protobuf:"bytes,4,opt,name=Context" json:"Context,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ConfChange) Reset() { *m = ConfChange{} } +func (m *ConfChange) String() string { return proto.CompactTextString(m) } +func (*ConfChange) ProtoMessage() {} +func (*ConfChange) Descriptor() ([]byte, []int) { return fileDescriptorRaft, []int{6} } + +func init() { + proto.RegisterType((*Entry)(nil), "raftpb.Entry") + proto.RegisterType((*SnapshotMetadata)(nil), "raftpb.SnapshotMetadata") + proto.RegisterType((*Snapshot)(nil), "raftpb.Snapshot") + proto.RegisterType((*Message)(nil), "raftpb.Message") + proto.RegisterType((*HardState)(nil), "raftpb.HardState") + proto.RegisterType((*ConfState)(nil), "raftpb.ConfState") + proto.RegisterType((*ConfChange)(nil), "raftpb.ConfChange") + proto.RegisterEnum("raftpb.EntryType", EntryType_name, EntryType_value) + proto.RegisterEnum("raftpb.MessageType", MessageType_name, MessageType_value) + proto.RegisterEnum("raftpb.ConfChangeType", ConfChangeType_name, ConfChangeType_value) +} +func (m *Entry) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Entry) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + dAtA[i] = 0x8 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Type)) + dAtA[i] = 0x10 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Term)) + dAtA[i] = 0x18 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Index)) + if m.Data != nil { + dAtA[i] = 0x22 + i++ + i = encodeVarintRaft(dAtA, i, uint64(len(m.Data))) + i += copy(dAtA[i:], m.Data) + } + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *SnapshotMetadata) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *SnapshotMetadata) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + dAtA[i] = 0xa + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.ConfState.Size())) + n1, err := m.ConfState.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n1 + dAtA[i] = 0x10 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Index)) + dAtA[i] = 0x18 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Term)) + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *Snapshot) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Snapshot) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.Data != nil { + dAtA[i] = 0xa + i++ + i = encodeVarintRaft(dAtA, i, uint64(len(m.Data))) + i += copy(dAtA[i:], m.Data) + } + dAtA[i] = 0x12 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Metadata.Size())) + n2, err := m.Metadata.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n2 + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *Message) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Message) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + dAtA[i] = 0x8 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Type)) + dAtA[i] = 0x10 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.To)) + dAtA[i] = 0x18 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.From)) + dAtA[i] = 0x20 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Term)) + dAtA[i] = 0x28 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.LogTerm)) + dAtA[i] = 0x30 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Index)) + if len(m.Entries) > 0 { + for _, msg := range m.Entries { + dAtA[i] = 0x3a + i++ + i = encodeVarintRaft(dAtA, i, uint64(msg.Size())) + n, err := msg.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n + } + } + dAtA[i] = 0x40 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Commit)) + dAtA[i] = 0x4a + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Snapshot.Size())) + n3, err := m.Snapshot.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n3 + dAtA[i] = 0x50 + i++ + if m.Reject { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + dAtA[i] = 0x58 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.RejectHint)) + if m.Context != nil { + dAtA[i] = 0x62 + i++ + i = encodeVarintRaft(dAtA, i, uint64(len(m.Context))) + i += copy(dAtA[i:], m.Context) + } + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *HardState) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *HardState) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + dAtA[i] = 0x8 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Term)) + dAtA[i] = 0x10 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Vote)) + dAtA[i] = 0x18 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Commit)) + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *ConfState) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ConfState) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.Nodes) > 0 { + for _, num := range m.Nodes { + dAtA[i] = 0x8 + i++ + i = encodeVarintRaft(dAtA, i, uint64(num)) + } + } + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *ConfChange) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ConfChange) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + dAtA[i] = 0x8 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.ID)) + dAtA[i] = 0x10 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.Type)) + dAtA[i] = 0x18 + i++ + i = encodeVarintRaft(dAtA, i, uint64(m.NodeID)) + if m.Context != nil { + dAtA[i] = 0x22 + i++ + i = encodeVarintRaft(dAtA, i, uint64(len(m.Context))) + i += copy(dAtA[i:], m.Context) + } + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func encodeFixed64Raft(dAtA []byte, offset int, v uint64) int { + dAtA[offset] = uint8(v) + dAtA[offset+1] = uint8(v >> 8) + dAtA[offset+2] = uint8(v >> 16) + dAtA[offset+3] = uint8(v >> 24) + dAtA[offset+4] = uint8(v >> 32) + dAtA[offset+5] = uint8(v >> 40) + dAtA[offset+6] = uint8(v >> 48) + dAtA[offset+7] = uint8(v >> 56) + return offset + 8 +} +func encodeFixed32Raft(dAtA []byte, offset int, v uint32) int { + dAtA[offset] = uint8(v) + dAtA[offset+1] = uint8(v >> 8) + dAtA[offset+2] = uint8(v >> 16) + dAtA[offset+3] = uint8(v >> 24) + return offset + 4 +} +func encodeVarintRaft(dAtA []byte, offset int, v uint64) int { + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return offset + 1 +} +func (m *Entry) Size() (n int) { + var l int + _ = l + n += 1 + sovRaft(uint64(m.Type)) + n += 1 + sovRaft(uint64(m.Term)) + n += 1 + sovRaft(uint64(m.Index)) + if m.Data != nil { + l = len(m.Data) + n += 1 + l + sovRaft(uint64(l)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *SnapshotMetadata) Size() (n int) { + var l int + _ = l + l = m.ConfState.Size() + n += 1 + l + sovRaft(uint64(l)) + n += 1 + sovRaft(uint64(m.Index)) + n += 1 + sovRaft(uint64(m.Term)) + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *Snapshot) Size() (n int) { + var l int + _ = l + if m.Data != nil { + l = len(m.Data) + n += 1 + l + sovRaft(uint64(l)) + } + l = m.Metadata.Size() + n += 1 + l + sovRaft(uint64(l)) + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *Message) Size() (n int) { + var l int + _ = l + n += 1 + sovRaft(uint64(m.Type)) + n += 1 + sovRaft(uint64(m.To)) + n += 1 + sovRaft(uint64(m.From)) + n += 1 + sovRaft(uint64(m.Term)) + n += 1 + sovRaft(uint64(m.LogTerm)) + n += 1 + sovRaft(uint64(m.Index)) + if len(m.Entries) > 0 { + for _, e := range m.Entries { + l = e.Size() + n += 1 + l + sovRaft(uint64(l)) + } + } + n += 1 + sovRaft(uint64(m.Commit)) + l = m.Snapshot.Size() + n += 1 + l + sovRaft(uint64(l)) + n += 2 + n += 1 + sovRaft(uint64(m.RejectHint)) + if m.Context != nil { + l = len(m.Context) + n += 1 + l + sovRaft(uint64(l)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *HardState) Size() (n int) { + var l int + _ = l + n += 1 + sovRaft(uint64(m.Term)) + n += 1 + sovRaft(uint64(m.Vote)) + n += 1 + sovRaft(uint64(m.Commit)) + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *ConfState) Size() (n int) { + var l int + _ = l + if len(m.Nodes) > 0 { + for _, e := range m.Nodes { + n += 1 + sovRaft(uint64(e)) + } + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *ConfChange) Size() (n int) { + var l int + _ = l + n += 1 + sovRaft(uint64(m.ID)) + n += 1 + sovRaft(uint64(m.Type)) + n += 1 + sovRaft(uint64(m.NodeID)) + if m.Context != nil { + l = len(m.Context) + n += 1 + l + sovRaft(uint64(l)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func sovRaft(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozRaft(x uint64) (n int) { + return sovRaft(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (m *Entry) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Entry: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Entry: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType) + } + m.Type = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Type |= (EntryType(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) + } + m.Term = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Term |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Index", wireType) + } + m.Index = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Index |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Data", wireType) + } + var byteLen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + byteLen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if byteLen < 0 { + return ErrInvalidLengthRaft + } + postIndex := iNdEx + byteLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Data = append(m.Data[:0], dAtA[iNdEx:postIndex]...) + if m.Data == nil { + m.Data = []byte{} + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipRaft(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRaft + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *SnapshotMetadata) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: SnapshotMetadata: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: SnapshotMetadata: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ConfState", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRaft + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.ConfState.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Index", wireType) + } + m.Index = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Index |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) + } + m.Term = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Term |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipRaft(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRaft + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *Snapshot) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Snapshot: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Snapshot: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Data", wireType) + } + var byteLen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + byteLen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if byteLen < 0 { + return ErrInvalidLengthRaft + } + postIndex := iNdEx + byteLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Data = append(m.Data[:0], dAtA[iNdEx:postIndex]...) + if m.Data == nil { + m.Data = []byte{} + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Metadata", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRaft + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.Metadata.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipRaft(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRaft + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *Message) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Message: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Message: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType) + } + m.Type = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Type |= (MessageType(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field To", wireType) + } + m.To = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.To |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field From", wireType) + } + m.From = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.From |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) + } + m.Term = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Term |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 5: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field LogTerm", wireType) + } + m.LogTerm = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.LogTerm |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 6: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Index", wireType) + } + m.Index = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Index |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 7: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Entries", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRaft + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Entries = append(m.Entries, Entry{}) + if err := m.Entries[len(m.Entries)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 8: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Commit", wireType) + } + m.Commit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Commit |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 9: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Snapshot", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthRaft + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.Snapshot.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 10: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Reject", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Reject = bool(v != 0) + case 11: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field RejectHint", wireType) + } + m.RejectHint = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.RejectHint |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 12: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Context", wireType) + } + var byteLen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + byteLen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if byteLen < 0 { + return ErrInvalidLengthRaft + } + postIndex := iNdEx + byteLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Context = append(m.Context[:0], dAtA[iNdEx:postIndex]...) + if m.Context == nil { + m.Context = []byte{} + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipRaft(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRaft + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *HardState) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: HardState: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: HardState: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) + } + m.Term = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Term |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Vote", wireType) + } + m.Vote = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Vote |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Commit", wireType) + } + m.Commit = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Commit |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipRaft(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRaft + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ConfState) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ConfState: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ConfState: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Nodes", wireType) + } + var v uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.Nodes = append(m.Nodes, v) + default: + iNdEx = preIndex + skippy, err := skipRaft(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRaft + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *ConfChange) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ConfChange: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ConfChange: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType) + } + m.ID = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ID |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType) + } + m.Type = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Type |= (ConfChangeType(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field NodeID", wireType) + } + m.NodeID = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.NodeID |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Context", wireType) + } + var byteLen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRaft + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + byteLen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if byteLen < 0 { + return ErrInvalidLengthRaft + } + postIndex := iNdEx + byteLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Context = append(m.Context[:0], dAtA[iNdEx:postIndex]...) + if m.Context == nil { + m.Context = []byte{} + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipRaft(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRaft + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipRaft(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRaft + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRaft + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRaft + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthRaft + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRaft + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipRaft(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthRaft = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowRaft = fmt.Errorf("proto: integer overflow") +) + +func init() { proto.RegisterFile("raft.proto", fileDescriptorRaft) } + +var fileDescriptorRaft = []byte{ + // 790 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x64, 0x54, 0xcd, 0x6e, 0xdb, 0x46, + 0x10, 0x16, 0x29, 0xea, 0x6f, 0x28, 0xcb, 0xab, 0xb5, 0x5a, 0x2c, 0x0c, 0x43, 0x55, 0x85, 0x1e, + 0x04, 0x17, 0x76, 0x5b, 0x1d, 0x7a, 0xe8, 0xcd, 0x96, 0x0a, 0x58, 0x40, 0x65, 0xb8, 0xb2, 0xdc, + 0x43, 0x83, 0x20, 0x58, 0x8b, 0x2b, 0x4a, 0x89, 0xc9, 0x25, 0x96, 0x2b, 0xc7, 0xbe, 0x04, 0x79, + 0x80, 0x3c, 0x40, 0x2e, 0x79, 0x1f, 0x1f, 0x0d, 0xe4, 0x1e, 0xc4, 0xce, 0x8b, 0x04, 0xbb, 0x5c, + 0x4a, 0x94, 0x74, 0xdb, 0xf9, 0xbe, 0xe1, 0xcc, 0x37, 0xdf, 0xce, 0x12, 0x40, 0xd0, 0xa9, 0x3c, + 0x8e, 0x04, 0x97, 0x1c, 0x17, 0xd5, 0x39, 0xba, 0xde, 0x6f, 0xf8, 0xdc, 0xe7, 0x1a, 0xfa, 0x4d, + 0x9d, 0x12, 0xb6, 0xfd, 0x0e, 0x0a, 0x7f, 0x87, 0x52, 0xdc, 0xe3, 0x5f, 0xc1, 0x19, 0xdf, 0x47, + 0x8c, 0x58, 0x2d, 0xab, 0x53, 0xeb, 0xd6, 0x8f, 0x93, 0xaf, 0x8e, 0x35, 0xa9, 0x88, 0x53, 0xe7, + 0xe1, 0xcb, 0x4f, 0xb9, 0x91, 0x4e, 0xc2, 0x04, 0x9c, 0x31, 0x13, 0x01, 0xb1, 0x5b, 0x56, 0xc7, + 0x59, 0x32, 0x4c, 0x04, 0x78, 0x1f, 0x0a, 0x83, 0xd0, 0x63, 0x77, 0x24, 0x9f, 0xa1, 0x12, 0x08, + 0x63, 0x70, 0xfa, 0x54, 0x52, 0xe2, 0xb4, 0xac, 0x4e, 0x75, 0xa4, 0xcf, 0xed, 0xf7, 0x16, 0xa0, + 0xcb, 0x90, 0x46, 0xf1, 0x8c, 0xcb, 0x21, 0x93, 0xd4, 0xa3, 0x92, 0xe2, 0x3f, 0x01, 0x26, 0x3c, + 0x9c, 0xbe, 0x8a, 0x25, 0x95, 0x89, 0x22, 0x77, 0xa5, 0xa8, 0xc7, 0xc3, 0xe9, 0xa5, 0x22, 0x4c, + 0xf1, 0xca, 0x24, 0x05, 0x54, 0xf3, 0xb9, 0x6e, 0x9e, 0xd5, 0x95, 0x40, 0x4a, 0xb2, 0x54, 0x92, + 0xb3, 0xba, 0x34, 0xd2, 0xfe, 0x1f, 0xca, 0xa9, 0x02, 0x25, 0x51, 0x29, 0xd0, 0x3d, 0xab, 0x23, + 0x7d, 0xc6, 0x7f, 0x41, 0x39, 0x30, 0xca, 0x74, 0x61, 0xb7, 0x4b, 0x52, 0x2d, 0x9b, 0xca, 0x4d, + 0xdd, 0x65, 0x7e, 0xfb, 0x53, 0x1e, 0x4a, 0x43, 0x16, 0xc7, 0xd4, 0x67, 0xf8, 0x08, 0x1c, 0xb9, + 0x72, 0x78, 0x2f, 0xad, 0x61, 0xe8, 0xac, 0xc7, 0x2a, 0x0d, 0x37, 0xc0, 0x96, 0x7c, 0x6d, 0x12, + 0x5b, 0x72, 0x35, 0xc6, 0x54, 0xf0, 0x8d, 0x31, 0x14, 0xb2, 0x1c, 0xd0, 0xd9, 0x1c, 0x10, 0x37, + 0xa1, 0x74, 0xc3, 0x7d, 0x7d, 0x61, 0x85, 0x0c, 0x99, 0x82, 0x2b, 0xdb, 0x8a, 0xdb, 0xb6, 0x1d, + 0x41, 0x89, 0x85, 0x52, 0xcc, 0x59, 0x4c, 0x4a, 0xad, 0x7c, 0xc7, 0xed, 0xee, 0xac, 0x6d, 0x46, + 0x5a, 0xca, 0xe4, 0xe0, 0x03, 0x28, 0x4e, 0x78, 0x10, 0xcc, 0x25, 0x29, 0x67, 0x6a, 0x19, 0x0c, + 0x77, 0xa1, 0x1c, 0x1b, 0xc7, 0x48, 0x45, 0x3b, 0x89, 0x36, 0x9d, 0x4c, 0x1d, 0x4c, 0xf3, 0x54, + 0x45, 0xc1, 0x5e, 0xb3, 0x89, 0x24, 0xd0, 0xb2, 0x3a, 0xe5, 0xb4, 0x62, 0x82, 0xe1, 0x5f, 0x00, + 0x92, 0xd3, 0xd9, 0x3c, 0x94, 0xc4, 0xcd, 0xf4, 0xcc, 0xe0, 0x98, 0x40, 0x69, 0xc2, 0x43, 0xc9, + 0xee, 0x24, 0xa9, 0xea, 0x8b, 0x4d, 0xc3, 0xf6, 0x4b, 0xa8, 0x9c, 0x51, 0xe1, 0x25, 0xeb, 0x93, + 0x3a, 0x68, 0x6d, 0x39, 0x48, 0xc0, 0xb9, 0xe5, 0x92, 0xad, 0xef, 0xbb, 0x42, 0x32, 0x03, 0xe7, + 0xb7, 0x07, 0x6e, 0xff, 0x0c, 0x95, 0xe5, 0xba, 0xe2, 0x06, 0x14, 0x42, 0xee, 0xb1, 0x98, 0x58, + 0xad, 0x7c, 0xc7, 0x19, 0x25, 0x41, 0xfb, 0x83, 0x05, 0xa0, 0x72, 0x7a, 0x33, 0x1a, 0xfa, 0xfa, + 0xd6, 0x07, 0xfd, 0x35, 0x05, 0xf6, 0xa0, 0x8f, 0x7f, 0x37, 0x8f, 0xd3, 0xd6, 0xab, 0xf3, 0x63, + 0xf6, 0x29, 0x24, 0xdf, 0x6d, 0xbd, 0xd0, 0x03, 0x28, 0x9e, 0x73, 0x8f, 0x0d, 0xfa, 0xeb, 0xba, + 0x12, 0x4c, 0x19, 0xd2, 0x33, 0x86, 0x24, 0x8f, 0x31, 0x0d, 0x0f, 0xff, 0x80, 0xca, 0xf2, 0xc9, + 0xe3, 0x5d, 0x70, 0x75, 0x70, 0xce, 0x45, 0x40, 0x6f, 0x50, 0x0e, 0xef, 0xc1, 0xae, 0x06, 0x56, + 0x8d, 0x91, 0x75, 0xf8, 0xd9, 0x06, 0x37, 0xb3, 0xc4, 0x18, 0xa0, 0x38, 0x8c, 0xfd, 0xb3, 0x45, + 0x84, 0x72, 0xd8, 0x85, 0xd2, 0x30, 0xf6, 0x4f, 0x19, 0x95, 0xc8, 0x32, 0xc1, 0x85, 0xe0, 0x11, + 0xb2, 0x4d, 0xd6, 0x49, 0x14, 0xa1, 0x3c, 0xae, 0x01, 0x24, 0xe7, 0x11, 0x8b, 0x23, 0xe4, 0x98, + 0xc4, 0xff, 0xb8, 0x64, 0xa8, 0xa0, 0x44, 0x98, 0x40, 0xb3, 0x45, 0xc3, 0xaa, 0x85, 0x41, 0x25, + 0x8c, 0xa0, 0xaa, 0x9a, 0x31, 0x2a, 0xe4, 0xb5, 0xea, 0x52, 0xc6, 0x0d, 0x40, 0x59, 0x44, 0x7f, + 0x54, 0xc1, 0x18, 0x6a, 0xc3, 0xd8, 0xbf, 0x0a, 0x05, 0xa3, 0x93, 0x19, 0xbd, 0xbe, 0x61, 0x08, + 0x70, 0x1d, 0x76, 0x4c, 0x21, 0x75, 0x41, 0x8b, 0x18, 0xb9, 0x26, 0xad, 0x37, 0x63, 0x93, 0x37, + 0xff, 0x2e, 0xb8, 0x58, 0x04, 0xa8, 0x8a, 0x7f, 0x80, 0xfa, 0x30, 0xf6, 0xc7, 0x82, 0x86, 0xf1, + 0x94, 0x89, 0x7f, 0x18, 0xf5, 0x98, 0x40, 0x3b, 0xe6, 0xeb, 0xf1, 0x3c, 0x60, 0x7c, 0x21, 0xcf, + 0xf9, 0x5b, 0x54, 0x33, 0x62, 0x46, 0x8c, 0x7a, 0xfa, 0x87, 0x87, 0x76, 0x8d, 0x98, 0x25, 0xa2, + 0xc5, 0x20, 0x33, 0xef, 0x85, 0x60, 0x7a, 0xc4, 0xba, 0xe9, 0x6a, 0x62, 0x9d, 0x83, 0x0f, 0x5f, + 0x40, 0x6d, 0xfd, 0x7a, 0x95, 0x8e, 0x15, 0x72, 0xe2, 0x79, 0xea, 0x2e, 0x51, 0x0e, 0x13, 0x68, + 0xac, 0xe0, 0x11, 0x0b, 0xf8, 0x2d, 0xd3, 0x8c, 0xb5, 0xce, 0x5c, 0x45, 0x1e, 0x95, 0x09, 0x63, + 0x9f, 0x92, 0x87, 0xa7, 0x66, 0xee, 0xf1, 0xa9, 0x99, 0x7b, 0x78, 0x6e, 0x5a, 0x8f, 0xcf, 0x4d, + 0xeb, 0xeb, 0x73, 0xd3, 0xfa, 0xf8, 0xad, 0x99, 0xfb, 0x1e, 0x00, 0x00, 0xff, 0xff, 0xcf, 0x30, + 0x01, 0x41, 0x3a, 0x06, 0x00, 0x00, +} diff --git a/vendor/github.com/coreos/etcd/raft/raftpb/raft.proto b/vendor/github.com/coreos/etcd/raft/raftpb/raft.proto new file mode 100644 index 0000000..806a436 --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/raftpb/raft.proto @@ -0,0 +1,93 @@ +syntax = "proto2"; +package raftpb; + +import "gogoproto/gogo.proto"; + +option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; +option (gogoproto.unmarshaler_all) = true; +option (gogoproto.goproto_getters_all) = false; +option (gogoproto.goproto_enum_prefix_all) = false; + +enum EntryType { + EntryNormal = 0; + EntryConfChange = 1; +} + +message Entry { + optional uint64 Term = 2 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations + optional uint64 Index = 3 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations + optional EntryType Type = 1 [(gogoproto.nullable) = false]; + optional bytes Data = 4; +} + +message SnapshotMetadata { + optional ConfState conf_state = 1 [(gogoproto.nullable) = false]; + optional uint64 index = 2 [(gogoproto.nullable) = false]; + optional uint64 term = 3 [(gogoproto.nullable) = false]; +} + +message Snapshot { + optional bytes data = 1; + optional SnapshotMetadata metadata = 2 [(gogoproto.nullable) = false]; +} + +enum MessageType { + MsgHup = 0; + MsgBeat = 1; + MsgProp = 2; + MsgApp = 3; + MsgAppResp = 4; + MsgVote = 5; + MsgVoteResp = 6; + MsgSnap = 7; + MsgHeartbeat = 8; + MsgHeartbeatResp = 9; + MsgUnreachable = 10; + MsgSnapStatus = 11; + MsgCheckQuorum = 12; + MsgTransferLeader = 13; + MsgTimeoutNow = 14; + MsgReadIndex = 15; + MsgReadIndexResp = 16; + MsgPreVote = 17; + MsgPreVoteResp = 18; +} + +message Message { + optional MessageType type = 1 [(gogoproto.nullable) = false]; + optional uint64 to = 2 [(gogoproto.nullable) = false]; + optional uint64 from = 3 [(gogoproto.nullable) = false]; + optional uint64 term = 4 [(gogoproto.nullable) = false]; + optional uint64 logTerm = 5 [(gogoproto.nullable) = false]; + optional uint64 index = 6 [(gogoproto.nullable) = false]; + repeated Entry entries = 7 [(gogoproto.nullable) = false]; + optional uint64 commit = 8 [(gogoproto.nullable) = false]; + optional Snapshot snapshot = 9 [(gogoproto.nullable) = false]; + optional bool reject = 10 [(gogoproto.nullable) = false]; + optional uint64 rejectHint = 11 [(gogoproto.nullable) = false]; + optional bytes context = 12; +} + +message HardState { + optional uint64 term = 1 [(gogoproto.nullable) = false]; + optional uint64 vote = 2 [(gogoproto.nullable) = false]; + optional uint64 commit = 3 [(gogoproto.nullable) = false]; +} + +message ConfState { + repeated uint64 nodes = 1; +} + +enum ConfChangeType { + ConfChangeAddNode = 0; + ConfChangeRemoveNode = 1; + ConfChangeUpdateNode = 2; +} + +message ConfChange { + optional uint64 ID = 1 [(gogoproto.nullable) = false]; + optional ConfChangeType Type = 2 [(gogoproto.nullable) = false]; + optional uint64 NodeID = 3 [(gogoproto.nullable) = false]; + optional bytes Context = 4; +} diff --git a/vendor/github.com/coreos/etcd/raft/rawnode.go b/vendor/github.com/coreos/etcd/raft/rawnode.go new file mode 100644 index 0000000..b950d51 --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/rawnode.go @@ -0,0 +1,264 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import ( + "errors" + + pb "github.com/coreos/etcd/raft/raftpb" +) + +// ErrStepLocalMsg is returned when try to step a local raft message +var ErrStepLocalMsg = errors.New("raft: cannot step raft local message") + +// ErrStepPeerNotFound is returned when try to step a response message +// but there is no peer found in raft.prs for that node. +var ErrStepPeerNotFound = errors.New("raft: cannot step as peer not found") + +// RawNode is a thread-unsafe Node. +// The methods of this struct correspond to the methods of Node and are described +// more fully there. +type RawNode struct { + raft *raft + prevSoftSt *SoftState + prevHardSt pb.HardState +} + +func (rn *RawNode) newReady() Ready { + return newReady(rn.raft, rn.prevSoftSt, rn.prevHardSt) +} + +func (rn *RawNode) commitReady(rd Ready) { + if rd.SoftState != nil { + rn.prevSoftSt = rd.SoftState + } + if !IsEmptyHardState(rd.HardState) { + rn.prevHardSt = rd.HardState + } + if rn.prevHardSt.Commit != 0 { + // In most cases, prevHardSt and rd.HardState will be the same + // because when there are new entries to apply we just sent a + // HardState with an updated Commit value. However, on initial + // startup the two are different because we don't send a HardState + // until something changes, but we do send any un-applied but + // committed entries (and previously-committed entries may be + // incorporated into the snapshot, even if rd.CommittedEntries is + // empty). Therefore we mark all committed entries as applied + // whether they were included in rd.HardState or not. + rn.raft.raftLog.appliedTo(rn.prevHardSt.Commit) + } + if len(rd.Entries) > 0 { + e := rd.Entries[len(rd.Entries)-1] + rn.raft.raftLog.stableTo(e.Index, e.Term) + } + if !IsEmptySnap(rd.Snapshot) { + rn.raft.raftLog.stableSnapTo(rd.Snapshot.Metadata.Index) + } + if len(rd.ReadStates) != 0 { + rn.raft.readStates = nil + } +} + +// NewRawNode returns a new RawNode given configuration and a list of raft peers. +func NewRawNode(config *Config, peers []Peer) (*RawNode, error) { + if config.ID == 0 { + panic("config.ID must not be zero") + } + r := newRaft(config) + rn := &RawNode{ + raft: r, + } + lastIndex, err := config.Storage.LastIndex() + if err != nil { + panic(err) // TODO(bdarnell) + } + // If the log is empty, this is a new RawNode (like StartNode); otherwise it's + // restoring an existing RawNode (like RestartNode). + // TODO(bdarnell): rethink RawNode initialization and whether the application needs + // to be able to tell us when it expects the RawNode to exist. + if lastIndex == 0 { + r.becomeFollower(1, None) + ents := make([]pb.Entry, len(peers)) + for i, peer := range peers { + cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context} + data, err := cc.Marshal() + if err != nil { + panic("unexpected marshal error") + } + + ents[i] = pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: uint64(i + 1), Data: data} + } + r.raftLog.append(ents...) + r.raftLog.committed = uint64(len(ents)) + for _, peer := range peers { + r.addNode(peer.ID) + } + } + + // Set the initial hard and soft states after performing all initialization. + rn.prevSoftSt = r.softState() + if lastIndex == 0 { + rn.prevHardSt = emptyState + } else { + rn.prevHardSt = r.hardState() + } + + return rn, nil +} + +// Tick advances the internal logical clock by a single tick. +func (rn *RawNode) Tick() { + rn.raft.tick() +} + +// TickQuiesced advances the internal logical clock by a single tick without +// performing any other state machine processing. It allows the caller to avoid +// periodic heartbeats and elections when all of the peers in a Raft group are +// known to be at the same state. Expected usage is to periodically invoke Tick +// or TickQuiesced depending on whether the group is "active" or "quiesced". +// +// WARNING: Be very careful about using this method as it subverts the Raft +// state machine. You should probably be using Tick instead. +func (rn *RawNode) TickQuiesced() { + rn.raft.electionElapsed++ +} + +// Campaign causes this RawNode to transition to candidate state. +func (rn *RawNode) Campaign() error { + return rn.raft.Step(pb.Message{ + Type: pb.MsgHup, + }) +} + +// Propose proposes data be appended to the raft log. +func (rn *RawNode) Propose(data []byte) error { + return rn.raft.Step(pb.Message{ + Type: pb.MsgProp, + From: rn.raft.id, + Entries: []pb.Entry{ + {Data: data}, + }}) +} + +// ProposeConfChange proposes a config change. +func (rn *RawNode) ProposeConfChange(cc pb.ConfChange) error { + data, err := cc.Marshal() + if err != nil { + return err + } + return rn.raft.Step(pb.Message{ + Type: pb.MsgProp, + Entries: []pb.Entry{ + {Type: pb.EntryConfChange, Data: data}, + }, + }) +} + +// ApplyConfChange applies a config change to the local node. +func (rn *RawNode) ApplyConfChange(cc pb.ConfChange) *pb.ConfState { + if cc.NodeID == None { + rn.raft.resetPendingConf() + return &pb.ConfState{Nodes: rn.raft.nodes()} + } + switch cc.Type { + case pb.ConfChangeAddNode: + rn.raft.addNode(cc.NodeID) + case pb.ConfChangeRemoveNode: + rn.raft.removeNode(cc.NodeID) + case pb.ConfChangeUpdateNode: + rn.raft.resetPendingConf() + default: + panic("unexpected conf type") + } + return &pb.ConfState{Nodes: rn.raft.nodes()} +} + +// Step advances the state machine using the given message. +func (rn *RawNode) Step(m pb.Message) error { + // ignore unexpected local messages receiving over network + if IsLocalMsg(m.Type) { + return ErrStepLocalMsg + } + if _, ok := rn.raft.prs[m.From]; ok || !IsResponseMsg(m.Type) { + return rn.raft.Step(m) + } + return ErrStepPeerNotFound +} + +// Ready returns the current point-in-time state of this RawNode. +func (rn *RawNode) Ready() Ready { + rd := rn.newReady() + rn.raft.msgs = nil + return rd +} + +// HasReady called when RawNode user need to check if any Ready pending. +// Checking logic in this method should be consistent with Ready.containsUpdates(). +func (rn *RawNode) HasReady() bool { + r := rn.raft + if !r.softState().equal(rn.prevSoftSt) { + return true + } + if hardSt := r.hardState(); !IsEmptyHardState(hardSt) && !isHardStateEqual(hardSt, rn.prevHardSt) { + return true + } + if r.raftLog.unstable.snapshot != nil && !IsEmptySnap(*r.raftLog.unstable.snapshot) { + return true + } + if len(r.msgs) > 0 || len(r.raftLog.unstableEntries()) > 0 || r.raftLog.hasNextEnts() { + return true + } + if len(r.readStates) != 0 { + return true + } + return false +} + +// Advance notifies the RawNode that the application has applied and saved progress in the +// last Ready results. +func (rn *RawNode) Advance(rd Ready) { + rn.commitReady(rd) +} + +// Status returns the current status of the given group. +func (rn *RawNode) Status() *Status { + status := getStatus(rn.raft) + return &status +} + +// ReportUnreachable reports the given node is not reachable for the last send. +func (rn *RawNode) ReportUnreachable(id uint64) { + _ = rn.raft.Step(pb.Message{Type: pb.MsgUnreachable, From: id}) +} + +// ReportSnapshot reports the status of the sent snapshot. +func (rn *RawNode) ReportSnapshot(id uint64, status SnapshotStatus) { + rej := status == SnapshotFailure + + _ = rn.raft.Step(pb.Message{Type: pb.MsgSnapStatus, From: id, Reject: rej}) +} + +// TransferLeader tries to transfer leadership to the given transferee. +func (rn *RawNode) TransferLeader(transferee uint64) { + _ = rn.raft.Step(pb.Message{Type: pb.MsgTransferLeader, From: transferee}) +} + +// ReadIndex requests a read state. The read state will be set in ready. +// Read State has a read index. Once the application advances further than the read +// index, any linearizable read requests issued before the read request can be +// processed safely. The read state will have the same rctx attached. +func (rn *RawNode) ReadIndex(rctx []byte) { + _ = rn.raft.Step(pb.Message{Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: rctx}}}) +} diff --git a/vendor/github.com/coreos/etcd/raft/read_only.go b/vendor/github.com/coreos/etcd/raft/read_only.go new file mode 100644 index 0000000..05a21da --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/read_only.go @@ -0,0 +1,118 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import pb "github.com/coreos/etcd/raft/raftpb" + +// ReadState provides state for read only query. +// It's caller's responsibility to call ReadIndex first before getting +// this state from ready, It's also caller's duty to differentiate if this +// state is what it requests through RequestCtx, eg. given a unique id as +// RequestCtx +type ReadState struct { + Index uint64 + RequestCtx []byte +} + +type readIndexStatus struct { + req pb.Message + index uint64 + acks map[uint64]struct{} +} + +type readOnly struct { + option ReadOnlyOption + pendingReadIndex map[string]*readIndexStatus + readIndexQueue []string +} + +func newReadOnly(option ReadOnlyOption) *readOnly { + return &readOnly{ + option: option, + pendingReadIndex: make(map[string]*readIndexStatus), + } +} + +// addRequest adds a read only reuqest into readonly struct. +// `index` is the commit index of the raft state machine when it received +// the read only request. +// `m` is the original read only request message from the local or remote node. +func (ro *readOnly) addRequest(index uint64, m pb.Message) { + ctx := string(m.Entries[0].Data) + if _, ok := ro.pendingReadIndex[ctx]; ok { + return + } + ro.pendingReadIndex[ctx] = &readIndexStatus{index: index, req: m, acks: make(map[uint64]struct{})} + ro.readIndexQueue = append(ro.readIndexQueue, ctx) +} + +// recvAck notifies the readonly struct that the raft state machine received +// an acknowledgment of the heartbeat that attached with the read only request +// context. +func (ro *readOnly) recvAck(m pb.Message) int { + rs, ok := ro.pendingReadIndex[string(m.Context)] + if !ok { + return 0 + } + + rs.acks[m.From] = struct{}{} + // add one to include an ack from local node + return len(rs.acks) + 1 +} + +// advance advances the read only request queue kept by the readonly struct. +// It dequeues the requests until it finds the read only request that has +// the same context as the given `m`. +func (ro *readOnly) advance(m pb.Message) []*readIndexStatus { + var ( + i int + found bool + ) + + ctx := string(m.Context) + rss := []*readIndexStatus{} + + for _, okctx := range ro.readIndexQueue { + i++ + rs, ok := ro.pendingReadIndex[okctx] + if !ok { + panic("cannot find corresponding read state from pending map") + } + rss = append(rss, rs) + if okctx == ctx { + found = true + break + } + } + + if found { + ro.readIndexQueue = ro.readIndexQueue[i:] + for _, rs := range rss { + delete(ro.pendingReadIndex, string(rs.req.Context)) + } + return rss + } + + return nil +} + +// lastPendingRequestCtx returns the context of the last pending read only +// request in readonly struct. +func (ro *readOnly) lastPendingRequestCtx() string { + if len(ro.readIndexQueue) == 0 { + return "" + } + return ro.readIndexQueue[len(ro.readIndexQueue)-1] +} diff --git a/vendor/github.com/coreos/etcd/raft/status.go b/vendor/github.com/coreos/etcd/raft/status.go new file mode 100644 index 0000000..b690fa5 --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/status.go @@ -0,0 +1,76 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import ( + "fmt" + + pb "github.com/coreos/etcd/raft/raftpb" +) + +type Status struct { + ID uint64 + + pb.HardState + SoftState + + Applied uint64 + Progress map[uint64]Progress +} + +// getStatus gets a copy of the current raft status. +func getStatus(r *raft) Status { + s := Status{ID: r.id} + s.HardState = r.hardState() + s.SoftState = *r.softState() + + s.Applied = r.raftLog.applied + + if s.RaftState == StateLeader { + s.Progress = make(map[uint64]Progress) + for id, p := range r.prs { + s.Progress[id] = *p + } + } + + return s +} + +// MarshalJSON translates the raft status into JSON. +// TODO: try to simplify this by introducing ID type into raft +func (s Status) MarshalJSON() ([]byte, error) { + j := fmt.Sprintf(`{"id":"%x","term":%d,"vote":"%x","commit":%d,"lead":"%x","raftState":%q,"progress":{`, + s.ID, s.Term, s.Vote, s.Commit, s.Lead, s.RaftState) + + if len(s.Progress) == 0 { + j += "}}" + } else { + for k, v := range s.Progress { + subj := fmt.Sprintf(`"%x":{"match":%d,"next":%d,"state":%q},`, k, v.Match, v.Next, v.State) + j += subj + } + // remove the trailing "," + j = j[:len(j)-1] + "}}" + } + return []byte(j), nil +} + +func (s Status) String() string { + b, err := s.MarshalJSON() + if err != nil { + raftLogger.Panicf("unexpected error: %v", err) + } + return string(b) +} diff --git a/vendor/github.com/coreos/etcd/raft/storage.go b/vendor/github.com/coreos/etcd/raft/storage.go new file mode 100644 index 0000000..69c3a7d --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/storage.go @@ -0,0 +1,271 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import ( + "errors" + "sync" + + pb "github.com/coreos/etcd/raft/raftpb" +) + +// ErrCompacted is returned by Storage.Entries/Compact when a requested +// index is unavailable because it predates the last snapshot. +var ErrCompacted = errors.New("requested index is unavailable due to compaction") + +// ErrSnapOutOfDate is returned by Storage.CreateSnapshot when a requested +// index is older than the existing snapshot. +var ErrSnapOutOfDate = errors.New("requested index is older than the existing snapshot") + +// ErrUnavailable is returned by Storage interface when the requested log entries +// are unavailable. +var ErrUnavailable = errors.New("requested entry at index is unavailable") + +// ErrSnapshotTemporarilyUnavailable is returned by the Storage interface when the required +// snapshot is temporarily unavailable. +var ErrSnapshotTemporarilyUnavailable = errors.New("snapshot is temporarily unavailable") + +// Storage is an interface that may be implemented by the application +// to retrieve log entries from storage. +// +// If any Storage method returns an error, the raft instance will +// become inoperable and refuse to participate in elections; the +// application is responsible for cleanup and recovery in this case. +type Storage interface { + // InitialState returns the saved HardState and ConfState information. + InitialState() (pb.HardState, pb.ConfState, error) + // Entries returns a slice of log entries in the range [lo,hi). + // MaxSize limits the total size of the log entries returned, but + // Entries returns at least one entry if any. + Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) + // Term returns the term of entry i, which must be in the range + // [FirstIndex()-1, LastIndex()]. The term of the entry before + // FirstIndex is retained for matching purposes even though the + // rest of that entry may not be available. + Term(i uint64) (uint64, error) + // LastIndex returns the index of the last entry in the log. + LastIndex() (uint64, error) + // FirstIndex returns the index of the first log entry that is + // possibly available via Entries (older entries have been incorporated + // into the latest Snapshot; if storage only contains the dummy entry the + // first log entry is not available). + FirstIndex() (uint64, error) + // Snapshot returns the most recent snapshot. + // If snapshot is temporarily unavailable, it should return ErrSnapshotTemporarilyUnavailable, + // so raft state machine could know that Storage needs some time to prepare + // snapshot and call Snapshot later. + Snapshot() (pb.Snapshot, error) +} + +// MemoryStorage implements the Storage interface backed by an +// in-memory array. +type MemoryStorage struct { + // Protects access to all fields. Most methods of MemoryStorage are + // run on the raft goroutine, but Append() is run on an application + // goroutine. + sync.Mutex + + hardState pb.HardState + snapshot pb.Snapshot + // ents[i] has raft log position i+snapshot.Metadata.Index + ents []pb.Entry +} + +// NewMemoryStorage creates an empty MemoryStorage. +func NewMemoryStorage() *MemoryStorage { + return &MemoryStorage{ + // When starting from scratch populate the list with a dummy entry at term zero. + ents: make([]pb.Entry, 1), + } +} + +// InitialState implements the Storage interface. +func (ms *MemoryStorage) InitialState() (pb.HardState, pb.ConfState, error) { + return ms.hardState, ms.snapshot.Metadata.ConfState, nil +} + +// SetHardState saves the current HardState. +func (ms *MemoryStorage) SetHardState(st pb.HardState) error { + ms.Lock() + defer ms.Unlock() + ms.hardState = st + return nil +} + +// Entries implements the Storage interface. +func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) { + ms.Lock() + defer ms.Unlock() + offset := ms.ents[0].Index + if lo <= offset { + return nil, ErrCompacted + } + if hi > ms.lastIndex()+1 { + raftLogger.Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, ms.lastIndex()) + } + // only contains dummy entries. + if len(ms.ents) == 1 { + return nil, ErrUnavailable + } + + ents := ms.ents[lo-offset : hi-offset] + return limitSize(ents, maxSize), nil +} + +// Term implements the Storage interface. +func (ms *MemoryStorage) Term(i uint64) (uint64, error) { + ms.Lock() + defer ms.Unlock() + offset := ms.ents[0].Index + if i < offset { + return 0, ErrCompacted + } + if int(i-offset) >= len(ms.ents) { + return 0, ErrUnavailable + } + return ms.ents[i-offset].Term, nil +} + +// LastIndex implements the Storage interface. +func (ms *MemoryStorage) LastIndex() (uint64, error) { + ms.Lock() + defer ms.Unlock() + return ms.lastIndex(), nil +} + +func (ms *MemoryStorage) lastIndex() uint64 { + return ms.ents[0].Index + uint64(len(ms.ents)) - 1 +} + +// FirstIndex implements the Storage interface. +func (ms *MemoryStorage) FirstIndex() (uint64, error) { + ms.Lock() + defer ms.Unlock() + return ms.firstIndex(), nil +} + +func (ms *MemoryStorage) firstIndex() uint64 { + return ms.ents[0].Index + 1 +} + +// Snapshot implements the Storage interface. +func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) { + ms.Lock() + defer ms.Unlock() + return ms.snapshot, nil +} + +// ApplySnapshot overwrites the contents of this Storage object with +// those of the given snapshot. +func (ms *MemoryStorage) ApplySnapshot(snap pb.Snapshot) error { + ms.Lock() + defer ms.Unlock() + + //handle check for old snapshot being applied + msIndex := ms.snapshot.Metadata.Index + snapIndex := snap.Metadata.Index + if msIndex >= snapIndex { + return ErrSnapOutOfDate + } + + ms.snapshot = snap + ms.ents = []pb.Entry{{Term: snap.Metadata.Term, Index: snap.Metadata.Index}} + return nil +} + +// CreateSnapshot makes a snapshot which can be retrieved with Snapshot() and +// can be used to reconstruct the state at that point. +// If any configuration changes have been made since the last compaction, +// the result of the last ApplyConfChange must be passed in. +func (ms *MemoryStorage) CreateSnapshot(i uint64, cs *pb.ConfState, data []byte) (pb.Snapshot, error) { + ms.Lock() + defer ms.Unlock() + if i <= ms.snapshot.Metadata.Index { + return pb.Snapshot{}, ErrSnapOutOfDate + } + + offset := ms.ents[0].Index + if i > ms.lastIndex() { + raftLogger.Panicf("snapshot %d is out of bound lastindex(%d)", i, ms.lastIndex()) + } + + ms.snapshot.Metadata.Index = i + ms.snapshot.Metadata.Term = ms.ents[i-offset].Term + if cs != nil { + ms.snapshot.Metadata.ConfState = *cs + } + ms.snapshot.Data = data + return ms.snapshot, nil +} + +// Compact discards all log entries prior to compactIndex. +// It is the application's responsibility to not attempt to compact an index +// greater than raftLog.applied. +func (ms *MemoryStorage) Compact(compactIndex uint64) error { + ms.Lock() + defer ms.Unlock() + offset := ms.ents[0].Index + if compactIndex <= offset { + return ErrCompacted + } + if compactIndex > ms.lastIndex() { + raftLogger.Panicf("compact %d is out of bound lastindex(%d)", compactIndex, ms.lastIndex()) + } + + i := compactIndex - offset + ents := make([]pb.Entry, 1, 1+uint64(len(ms.ents))-i) + ents[0].Index = ms.ents[i].Index + ents[0].Term = ms.ents[i].Term + ents = append(ents, ms.ents[i+1:]...) + ms.ents = ents + return nil +} + +// Append the new entries to storage. +// TODO (xiangli): ensure the entries are continuous and +// entries[0].Index > ms.entries[0].Index +func (ms *MemoryStorage) Append(entries []pb.Entry) error { + if len(entries) == 0 { + return nil + } + + ms.Lock() + defer ms.Unlock() + + first := ms.firstIndex() + last := entries[0].Index + uint64(len(entries)) - 1 + + // shortcut if there is no new entry. + if last < first { + return nil + } + // truncate compacted entries + if first > entries[0].Index { + entries = entries[first-entries[0].Index:] + } + + offset := entries[0].Index - ms.ents[0].Index + switch { + case uint64(len(ms.ents)) > offset: + ms.ents = append([]pb.Entry{}, ms.ents[:offset]...) + ms.ents = append(ms.ents, entries...) + case uint64(len(ms.ents)) == offset: + ms.ents = append(ms.ents, entries...) + default: + raftLogger.Panicf("missing log entry [last: %d, append at: %d]", + ms.lastIndex(), entries[0].Index) + } + return nil +} diff --git a/vendor/github.com/coreos/etcd/raft/util.go b/vendor/github.com/coreos/etcd/raft/util.go new file mode 100644 index 0000000..f4141fe --- /dev/null +++ b/vendor/github.com/coreos/etcd/raft/util.go @@ -0,0 +1,129 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import ( + "bytes" + "fmt" + + pb "github.com/coreos/etcd/raft/raftpb" +) + +func (st StateType) MarshalJSON() ([]byte, error) { + return []byte(fmt.Sprintf("%q", st.String())), nil +} + +// uint64Slice implements sort interface +type uint64Slice []uint64 + +func (p uint64Slice) Len() int { return len(p) } +func (p uint64Slice) Less(i, j int) bool { return p[i] < p[j] } +func (p uint64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +func min(a, b uint64) uint64 { + if a > b { + return b + } + return a +} + +func max(a, b uint64) uint64 { + if a > b { + return a + } + return b +} + +func IsLocalMsg(msgt pb.MessageType) bool { + return msgt == pb.MsgHup || msgt == pb.MsgBeat || msgt == pb.MsgUnreachable || + msgt == pb.MsgSnapStatus || msgt == pb.MsgCheckQuorum +} + +func IsResponseMsg(msgt pb.MessageType) bool { + return msgt == pb.MsgAppResp || msgt == pb.MsgVoteResp || msgt == pb.MsgHeartbeatResp || msgt == pb.MsgUnreachable || msgt == pb.MsgPreVoteResp +} + +// voteResponseType maps vote and prevote message types to their corresponding responses. +func voteRespMsgType(msgt pb.MessageType) pb.MessageType { + switch msgt { + case pb.MsgVote: + return pb.MsgVoteResp + case pb.MsgPreVote: + return pb.MsgPreVoteResp + default: + panic(fmt.Sprintf("not a vote message: %s", msgt)) + } +} + +// EntryFormatter can be implemented by the application to provide human-readable formatting +// of entry data. Nil is a valid EntryFormatter and will use a default format. +type EntryFormatter func([]byte) string + +// DescribeMessage returns a concise human-readable description of a +// Message for debugging. +func DescribeMessage(m pb.Message, f EntryFormatter) string { + var buf bytes.Buffer + fmt.Fprintf(&buf, "%x->%x %v Term:%d Log:%d/%d", m.From, m.To, m.Type, m.Term, m.LogTerm, m.Index) + if m.Reject { + fmt.Fprintf(&buf, " Rejected") + if m.RejectHint != 0 { + fmt.Fprintf(&buf, "(Hint:%d)", m.RejectHint) + } + } + if m.Commit != 0 { + fmt.Fprintf(&buf, " Commit:%d", m.Commit) + } + if len(m.Entries) > 0 { + fmt.Fprintf(&buf, " Entries:[") + for i, e := range m.Entries { + if i != 0 { + buf.WriteString(", ") + } + buf.WriteString(DescribeEntry(e, f)) + } + fmt.Fprintf(&buf, "]") + } + if !IsEmptySnap(m.Snapshot) { + fmt.Fprintf(&buf, " Snapshot:%v", m.Snapshot) + } + return buf.String() +} + +// DescribeEntry returns a concise human-readable description of an +// Entry for debugging. +func DescribeEntry(e pb.Entry, f EntryFormatter) string { + var formatted string + if e.Type == pb.EntryNormal && f != nil { + formatted = f(e.Data) + } else { + formatted = fmt.Sprintf("%q", e.Data) + } + return fmt.Sprintf("%d/%d %s %s", e.Term, e.Index, e.Type, formatted) +} + +func limitSize(ents []pb.Entry, maxSize uint64) []pb.Entry { + if len(ents) == 0 { + return ents + } + size := ents[0].Size() + var limit int + for limit = 1; limit < len(ents); limit++ { + size += ents[limit].Size() + if uint64(size) > maxSize { + break + } + } + return ents[:limit] +} diff --git a/vendor/github.com/coreos/etcd/snap/db.go b/vendor/github.com/coreos/etcd/snap/db.go new file mode 100644 index 0000000..743deac --- /dev/null +++ b/vendor/github.com/coreos/etcd/snap/db.go @@ -0,0 +1,74 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package snap + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "path" + + "github.com/coreos/etcd/pkg/fileutil" +) + +// SaveDBFrom saves snapshot of the database from the given reader. It +// guarantees the save operation is atomic. +func (s *Snapshotter) SaveDBFrom(r io.Reader, id uint64) (int64, error) { + f, err := ioutil.TempFile(s.dir, "tmp") + if err != nil { + return 0, err + } + var n int64 + n, err = io.Copy(f, r) + if err == nil { + err = fileutil.Fsync(f) + } + f.Close() + if err != nil { + os.Remove(f.Name()) + return n, err + } + fn := path.Join(s.dir, fmt.Sprintf("%016x.snap.db", id)) + if fileutil.Exist(fn) { + os.Remove(f.Name()) + return n, nil + } + err = os.Rename(f.Name(), fn) + if err != nil { + os.Remove(f.Name()) + return n, err + } + + plog.Infof("saved database snapshot to disk [total bytes: %d]", n) + + return n, nil +} + +// DBFilePath returns the file path for the snapshot of the database with +// given id. If the snapshot does not exist, it returns error. +func (s *Snapshotter) DBFilePath(id uint64) (string, error) { + fns, err := fileutil.ReadDir(s.dir) + if err != nil { + return "", err + } + wfn := fmt.Sprintf("%016x.snap.db", id) + for _, fn := range fns { + if fn == wfn { + return path.Join(s.dir, fn), nil + } + } + return "", fmt.Errorf("snap: snapshot file doesn't exist") +} diff --git a/vendor/github.com/coreos/etcd/snap/message.go b/vendor/github.com/coreos/etcd/snap/message.go new file mode 100644 index 0000000..d73713f --- /dev/null +++ b/vendor/github.com/coreos/etcd/snap/message.go @@ -0,0 +1,64 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package snap + +import ( + "io" + + "github.com/coreos/etcd/pkg/ioutil" + "github.com/coreos/etcd/raft/raftpb" +) + +// Message is a struct that contains a raft Message and a ReadCloser. The type +// of raft message MUST be MsgSnap, which contains the raft meta-data and an +// additional data []byte field that contains the snapshot of the actual state +// machine. +// Message contains the ReadCloser field for handling large snapshot. This avoid +// copying the entire snapshot into a byte array, which consumes a lot of memory. +// +// User of Message should close the Message after sending it. +type Message struct { + raftpb.Message + ReadCloser io.ReadCloser + TotalSize int64 + closeC chan bool +} + +func NewMessage(rs raftpb.Message, rc io.ReadCloser, rcSize int64) *Message { + return &Message{ + Message: rs, + ReadCloser: ioutil.NewExactReadCloser(rc, rcSize), + TotalSize: int64(rs.Size()) + rcSize, + closeC: make(chan bool, 1), + } +} + +// CloseNotify returns a channel that receives a single value +// when the message sent is finished. true indicates the sent +// is successful. +func (m Message) CloseNotify() <-chan bool { + return m.closeC +} + +func (m Message) CloseWithError(err error) { + if cerr := m.ReadCloser.Close(); cerr != nil { + err = cerr + } + if err == nil { + m.closeC <- true + } else { + m.closeC <- false + } +} diff --git a/vendor/github.com/coreos/etcd/snap/metrics.go b/vendor/github.com/coreos/etcd/snap/metrics.go new file mode 100644 index 0000000..433ef09 --- /dev/null +++ b/vendor/github.com/coreos/etcd/snap/metrics.go @@ -0,0 +1,41 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package snap + +import "github.com/prometheus/client_golang/prometheus" + +var ( + // TODO: save_fsync latency? + saveDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "etcd_debugging", + Subsystem: "snap", + Name: "save_total_duration_seconds", + Help: "The total latency distributions of save called by snapshot.", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), + }) + + marshallingDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "etcd_debugging", + Subsystem: "snap", + Name: "save_marshalling_duration_seconds", + Help: "The marshalling cost distributions of save called by snapshot.", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), + }) +) + +func init() { + prometheus.MustRegister(saveDurations) + prometheus.MustRegister(marshallingDurations) +} diff --git a/vendor/github.com/coreos/etcd/snap/snappb/snap.pb.go b/vendor/github.com/coreos/etcd/snap/snappb/snap.pb.go new file mode 100644 index 0000000..130e227 --- /dev/null +++ b/vendor/github.com/coreos/etcd/snap/snappb/snap.pb.go @@ -0,0 +1,353 @@ +// Code generated by protoc-gen-gogo. +// source: snap.proto +// DO NOT EDIT! + +/* + Package snappb is a generated protocol buffer package. + + It is generated from these files: + snap.proto + + It has these top-level messages: + Snapshot +*/ +package snappb + +import ( + "fmt" + + proto "github.com/golang/protobuf/proto" + + math "math" + + io "io" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type Snapshot struct { + Crc uint32 `protobuf:"varint,1,opt,name=crc" json:"crc"` + Data []byte `protobuf:"bytes,2,opt,name=data" json:"data,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *Snapshot) Reset() { *m = Snapshot{} } +func (m *Snapshot) String() string { return proto.CompactTextString(m) } +func (*Snapshot) ProtoMessage() {} +func (*Snapshot) Descriptor() ([]byte, []int) { return fileDescriptorSnap, []int{0} } + +func init() { + proto.RegisterType((*Snapshot)(nil), "snappb.snapshot") +} +func (m *Snapshot) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Snapshot) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + dAtA[i] = 0x8 + i++ + i = encodeVarintSnap(dAtA, i, uint64(m.Crc)) + if m.Data != nil { + dAtA[i] = 0x12 + i++ + i = encodeVarintSnap(dAtA, i, uint64(len(m.Data))) + i += copy(dAtA[i:], m.Data) + } + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func encodeFixed64Snap(dAtA []byte, offset int, v uint64) int { + dAtA[offset] = uint8(v) + dAtA[offset+1] = uint8(v >> 8) + dAtA[offset+2] = uint8(v >> 16) + dAtA[offset+3] = uint8(v >> 24) + dAtA[offset+4] = uint8(v >> 32) + dAtA[offset+5] = uint8(v >> 40) + dAtA[offset+6] = uint8(v >> 48) + dAtA[offset+7] = uint8(v >> 56) + return offset + 8 +} +func encodeFixed32Snap(dAtA []byte, offset int, v uint32) int { + dAtA[offset] = uint8(v) + dAtA[offset+1] = uint8(v >> 8) + dAtA[offset+2] = uint8(v >> 16) + dAtA[offset+3] = uint8(v >> 24) + return offset + 4 +} +func encodeVarintSnap(dAtA []byte, offset int, v uint64) int { + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return offset + 1 +} +func (m *Snapshot) Size() (n int) { + var l int + _ = l + n += 1 + sovSnap(uint64(m.Crc)) + if m.Data != nil { + l = len(m.Data) + n += 1 + l + sovSnap(uint64(l)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func sovSnap(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozSnap(x uint64) (n int) { + return sovSnap(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (m *Snapshot) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowSnap + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: snapshot: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: snapshot: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Crc", wireType) + } + m.Crc = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowSnap + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Crc |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Data", wireType) + } + var byteLen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowSnap + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + byteLen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if byteLen < 0 { + return ErrInvalidLengthSnap + } + postIndex := iNdEx + byteLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Data = append(m.Data[:0], dAtA[iNdEx:postIndex]...) + if m.Data == nil { + m.Data = []byte{} + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipSnap(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthSnap + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipSnap(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowSnap + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowSnap + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowSnap + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthSnap + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowSnap + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipSnap(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthSnap = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowSnap = fmt.Errorf("proto: integer overflow") +) + +func init() { proto.RegisterFile("snap.proto", fileDescriptorSnap) } + +var fileDescriptorSnap = []byte{ + // 126 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xe2, 0xe2, 0x2a, 0xce, 0x4b, 0x2c, + 0xd0, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x62, 0x03, 0xb1, 0x0b, 0x92, 0xa4, 0x44, 0xd2, 0xf3, + 0xd3, 0xf3, 0xc1, 0x42, 0xfa, 0x20, 0x16, 0x44, 0x56, 0xc9, 0x8c, 0x8b, 0x03, 0x24, 0x5f, 0x9c, + 0x91, 0x5f, 0x22, 0x24, 0xc6, 0xc5, 0x9c, 0x5c, 0x94, 0x2c, 0xc1, 0xa8, 0xc0, 0xa8, 0xc1, 0xeb, + 0xc4, 0x72, 0xe2, 0x9e, 0x3c, 0x43, 0x10, 0x48, 0x40, 0x48, 0x88, 0x8b, 0x25, 0x25, 0xb1, 0x24, + 0x51, 0x82, 0x49, 0x81, 0x51, 0x83, 0x27, 0x08, 0xcc, 0x76, 0x12, 0x39, 0xf1, 0x50, 0x8e, 0xe1, + 0xc4, 0x23, 0x39, 0xc6, 0x0b, 0x8f, 0xe4, 0x18, 0x1f, 0x3c, 0x92, 0x63, 0x9c, 0xf1, 0x58, 0x8e, + 0x01, 0x10, 0x00, 0x00, 0xff, 0xff, 0xd8, 0x0f, 0x32, 0xb2, 0x78, 0x00, 0x00, 0x00, +} diff --git a/vendor/github.com/coreos/etcd/snap/snappb/snap.proto b/vendor/github.com/coreos/etcd/snap/snappb/snap.proto new file mode 100644 index 0000000..cd3d21d --- /dev/null +++ b/vendor/github.com/coreos/etcd/snap/snappb/snap.proto @@ -0,0 +1,14 @@ +syntax = "proto2"; +package snappb; + +import "gogoproto/gogo.proto"; + +option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; +option (gogoproto.unmarshaler_all) = true; +option (gogoproto.goproto_getters_all) = false; + +message snapshot { + optional uint32 crc = 1 [(gogoproto.nullable) = false]; + optional bytes data = 2; +} diff --git a/vendor/github.com/coreos/etcd/snap/snapshotter.go b/vendor/github.com/coreos/etcd/snap/snapshotter.go new file mode 100644 index 0000000..50d09dd --- /dev/null +++ b/vendor/github.com/coreos/etcd/snap/snapshotter.go @@ -0,0 +1,204 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package snap stores raft nodes' states with snapshots. +package snap + +import ( + "errors" + "fmt" + "hash/crc32" + "io/ioutil" + "os" + "path" + "sort" + "strings" + "time" + + pioutil "github.com/coreos/etcd/pkg/ioutil" + "github.com/coreos/etcd/pkg/pbutil" + "github.com/coreos/etcd/raft" + "github.com/coreos/etcd/raft/raftpb" + "github.com/coreos/etcd/snap/snappb" + + "github.com/coreos/pkg/capnslog" +) + +const ( + snapSuffix = ".snap" +) + +var ( + plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "snap") + + ErrNoSnapshot = errors.New("snap: no available snapshot") + ErrEmptySnapshot = errors.New("snap: empty snapshot") + ErrCRCMismatch = errors.New("snap: crc mismatch") + crcTable = crc32.MakeTable(crc32.Castagnoli) + + // A map of valid files that can be present in the snap folder. + validFiles = map[string]bool{ + "db": true, + } +) + +type Snapshotter struct { + dir string +} + +func New(dir string) *Snapshotter { + return &Snapshotter{ + dir: dir, + } +} + +func (s *Snapshotter) SaveSnap(snapshot raftpb.Snapshot) error { + if raft.IsEmptySnap(snapshot) { + return nil + } + return s.save(&snapshot) +} + +func (s *Snapshotter) save(snapshot *raftpb.Snapshot) error { + start := time.Now() + + fname := fmt.Sprintf("%016x-%016x%s", snapshot.Metadata.Term, snapshot.Metadata.Index, snapSuffix) + b := pbutil.MustMarshal(snapshot) + crc := crc32.Update(0, crcTable, b) + snap := snappb.Snapshot{Crc: crc, Data: b} + d, err := snap.Marshal() + if err != nil { + return err + } else { + marshallingDurations.Observe(float64(time.Since(start)) / float64(time.Second)) + } + + err = pioutil.WriteAndSyncFile(path.Join(s.dir, fname), d, 0666) + if err == nil { + saveDurations.Observe(float64(time.Since(start)) / float64(time.Second)) + } else { + err1 := os.Remove(path.Join(s.dir, fname)) + if err1 != nil { + plog.Errorf("failed to remove broken snapshot file %s", path.Join(s.dir, fname)) + } + } + return err +} + +func (s *Snapshotter) Load() (*raftpb.Snapshot, error) { + names, err := s.snapNames() + if err != nil { + return nil, err + } + var snap *raftpb.Snapshot + for _, name := range names { + if snap, err = loadSnap(s.dir, name); err == nil { + break + } + } + if err != nil { + return nil, ErrNoSnapshot + } + return snap, nil +} + +func loadSnap(dir, name string) (*raftpb.Snapshot, error) { + fpath := path.Join(dir, name) + snap, err := Read(fpath) + if err != nil { + renameBroken(fpath) + } + return snap, err +} + +// Read reads the snapshot named by snapname and returns the snapshot. +func Read(snapname string) (*raftpb.Snapshot, error) { + b, err := ioutil.ReadFile(snapname) + if err != nil { + plog.Errorf("cannot read file %v: %v", snapname, err) + return nil, err + } + + if len(b) == 0 { + plog.Errorf("unexpected empty snapshot") + return nil, ErrEmptySnapshot + } + + var serializedSnap snappb.Snapshot + if err = serializedSnap.Unmarshal(b); err != nil { + plog.Errorf("corrupted snapshot file %v: %v", snapname, err) + return nil, err + } + + if len(serializedSnap.Data) == 0 || serializedSnap.Crc == 0 { + plog.Errorf("unexpected empty snapshot") + return nil, ErrEmptySnapshot + } + + crc := crc32.Update(0, crcTable, serializedSnap.Data) + if crc != serializedSnap.Crc { + plog.Errorf("corrupted snapshot file %v: crc mismatch", snapname) + return nil, ErrCRCMismatch + } + + var snap raftpb.Snapshot + if err = snap.Unmarshal(serializedSnap.Data); err != nil { + plog.Errorf("corrupted snapshot file %v: %v", snapname, err) + return nil, err + } + return &snap, nil +} + +// snapNames returns the filename of the snapshots in logical time order (from newest to oldest). +// If there is no available snapshots, an ErrNoSnapshot will be returned. +func (s *Snapshotter) snapNames() ([]string, error) { + dir, err := os.Open(s.dir) + if err != nil { + return nil, err + } + defer dir.Close() + names, err := dir.Readdirnames(-1) + if err != nil { + return nil, err + } + snaps := checkSuffix(names) + if len(snaps) == 0 { + return nil, ErrNoSnapshot + } + sort.Sort(sort.Reverse(sort.StringSlice(snaps))) + return snaps, nil +} + +func checkSuffix(names []string) []string { + snaps := []string{} + for i := range names { + if strings.HasSuffix(names[i], snapSuffix) { + snaps = append(snaps, names[i]) + } else { + // If we find a file which is not a snapshot then check if it's + // a vaild file. If not throw out a warning. + if _, ok := validFiles[names[i]]; !ok { + plog.Warningf("skipped unexpected non snapshot file %v", names[i]) + } + } + } + return snaps +} + +func renameBroken(path string) { + brokenPath := path + ".broken" + if err := os.Rename(path, brokenPath); err != nil { + plog.Warningf("cannot rename broken snapshot file %v to %v: %v", path, brokenPath, err) + } +} diff --git a/vendor/github.com/coreos/etcd/wal/decoder.go b/vendor/github.com/coreos/etcd/wal/decoder.go new file mode 100644 index 0000000..0d9b442 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/decoder.go @@ -0,0 +1,185 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package wal + +import ( + "bufio" + "encoding/binary" + "hash" + "io" + "sync" + + "github.com/coreos/etcd/pkg/crc" + "github.com/coreos/etcd/pkg/pbutil" + "github.com/coreos/etcd/raft/raftpb" + "github.com/coreos/etcd/wal/walpb" +) + +const minSectorSize = 512 + +type decoder struct { + mu sync.Mutex + brs []*bufio.Reader + + // lastValidOff file offset following the last valid decoded record + lastValidOff int64 + crc hash.Hash32 +} + +func newDecoder(r ...io.Reader) *decoder { + readers := make([]*bufio.Reader, len(r)) + for i := range r { + readers[i] = bufio.NewReader(r[i]) + } + return &decoder{ + brs: readers, + crc: crc.New(0, crcTable), + } +} + +func (d *decoder) decode(rec *walpb.Record) error { + rec.Reset() + d.mu.Lock() + defer d.mu.Unlock() + return d.decodeRecord(rec) +} + +func (d *decoder) decodeRecord(rec *walpb.Record) error { + if len(d.brs) == 0 { + return io.EOF + } + + l, err := readInt64(d.brs[0]) + if err == io.EOF || (err == nil && l == 0) { + // hit end of file or preallocated space + d.brs = d.brs[1:] + if len(d.brs) == 0 { + return io.EOF + } + d.lastValidOff = 0 + return d.decodeRecord(rec) + } + if err != nil { + return err + } + + recBytes, padBytes := decodeFrameSize(l) + + data := make([]byte, recBytes+padBytes) + if _, err = io.ReadFull(d.brs[0], data); err != nil { + // ReadFull returns io.EOF only if no bytes were read + // the decoder should treat this as an ErrUnexpectedEOF instead. + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return err + } + if err := rec.Unmarshal(data[:recBytes]); err != nil { + if d.isTornEntry(data) { + return io.ErrUnexpectedEOF + } + return err + } + + // skip crc checking if the record type is crcType + if rec.Type != crcType { + d.crc.Write(rec.Data) + if err := rec.Validate(d.crc.Sum32()); err != nil { + if d.isTornEntry(data) { + return io.ErrUnexpectedEOF + } + return err + } + } + // record decoded as valid; point last valid offset to end of record + d.lastValidOff += recBytes + padBytes + 8 + return nil +} + +func decodeFrameSize(lenField int64) (recBytes int64, padBytes int64) { + // the record size is stored in the lower 56 bits of the 64-bit length + recBytes = int64(uint64(lenField) & ^(uint64(0xff) << 56)) + // non-zero padding is indicated by set MSb / a negative length + if lenField < 0 { + // padding is stored in lower 3 bits of length MSB + padBytes = int64((uint64(lenField) >> 56) & 0x7) + } + return +} + +// isTornEntry determines whether the last entry of the WAL was partially written +// and corrupted because of a torn write. +func (d *decoder) isTornEntry(data []byte) bool { + if len(d.brs) != 1 { + return false + } + + fileOff := d.lastValidOff + 8 + curOff := 0 + chunks := [][]byte{} + // split data on sector boundaries + for curOff < len(data) { + chunkLen := int(minSectorSize - (fileOff % minSectorSize)) + if chunkLen > len(data)-curOff { + chunkLen = len(data) - curOff + } + chunks = append(chunks, data[curOff:curOff+chunkLen]) + fileOff += int64(chunkLen) + curOff += chunkLen + } + + // if any data for a sector chunk is all 0, it's a torn write + for _, sect := range chunks { + isZero := true + for _, v := range sect { + if v != 0 { + isZero = false + break + } + } + if isZero { + return true + } + } + return false +} + +func (d *decoder) updateCRC(prevCrc uint32) { + d.crc = crc.New(prevCrc, crcTable) +} + +func (d *decoder) lastCRC() uint32 { + return d.crc.Sum32() +} + +func (d *decoder) lastOffset() int64 { return d.lastValidOff } + +func mustUnmarshalEntry(d []byte) raftpb.Entry { + var e raftpb.Entry + pbutil.MustUnmarshal(&e, d) + return e +} + +func mustUnmarshalState(d []byte) raftpb.HardState { + var s raftpb.HardState + pbutil.MustUnmarshal(&s, d) + return s +} + +func readInt64(r io.Reader) (int64, error) { + var n int64 + err := binary.Read(r, binary.LittleEndian, &n) + return n, err +} diff --git a/vendor/github.com/coreos/etcd/wal/doc.go b/vendor/github.com/coreos/etcd/wal/doc.go new file mode 100644 index 0000000..a3abd69 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/doc.go @@ -0,0 +1,75 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* +Package wal provides an implementation of a write ahead log that is used by +etcd. + +A WAL is created at a particular directory and is made up of a number of +segmented WAL files. Inside of each file the raft state and entries are appended +to it with the Save method: + + metadata := []byte{} + w, err := wal.Create("/var/lib/etcd", metadata) + ... + err := w.Save(s, ents) + +After saving a raft snapshot to disk, SaveSnapshot method should be called to +record it. So WAL can match with the saved snapshot when restarting. + + err := w.SaveSnapshot(walpb.Snapshot{Index: 10, Term: 2}) + +When a user has finished using a WAL it must be closed: + + w.Close() + +Each WAL file is a stream of WAL records. A WAL record is a length field and a wal record +protobuf. The record protobuf contains a CRC, a type, and a data payload. The length field is a +64-bit packed structure holding the length of the remaining logical record data in its lower +56 bits and its physical padding in the first three bits of the most significant byte. Each +record is 8-byte aligned so that the length field is never torn. The CRC contains the CRC32 +value of all record protobufs preceding the current record. + +WAL files are placed inside of the directory in the following format: +$seq-$index.wal + +The first WAL file to be created will be 0000000000000000-0000000000000000.wal +indicating an initial sequence of 0 and an initial raft index of 0. The first +entry written to WAL MUST have raft index 0. + +WAL will cut its current tail wal file if its size exceeds 64MB. This will increment an internal +sequence number and cause a new file to be created. If the last raft index saved +was 0x20 and this is the first time cut has been called on this WAL then the sequence will +increment from 0x0 to 0x1. The new file will be: 0000000000000001-0000000000000021.wal. +If a second cut issues 0x10 entries with incremental index later then the file will be called: +0000000000000002-0000000000000031.wal. + +At a later time a WAL can be opened at a particular snapshot. If there is no +snapshot, an empty snapshot should be passed in. + + w, err := wal.Open("/var/lib/etcd", walpb.Snapshot{Index: 10, Term: 2}) + ... + +The snapshot must have been written to the WAL. + +Additional items cannot be Saved to this WAL until all of the items from the given +snapshot to the end of the WAL are read first: + + metadata, state, ents, err := w.ReadAll() + +This will give you the metadata, the last raft.State and the slice of +raft.Entry items in the log. + +*/ +package wal diff --git a/vendor/github.com/coreos/etcd/wal/encoder.go b/vendor/github.com/coreos/etcd/wal/encoder.go new file mode 100644 index 0000000..efe5892 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/encoder.go @@ -0,0 +1,120 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package wal + +import ( + "encoding/binary" + "hash" + "io" + "os" + "sync" + + "github.com/coreos/etcd/pkg/crc" + "github.com/coreos/etcd/pkg/ioutil" + "github.com/coreos/etcd/wal/walpb" +) + +// walPageBytes is the alignment for flushing records to the backing Writer. +// It should be a multiple of the minimum sector size so that WAL can safely +// distinguish between torn writes and ordinary data corruption. +const walPageBytes = 8 * minSectorSize + +type encoder struct { + mu sync.Mutex + bw *ioutil.PageWriter + + crc hash.Hash32 + buf []byte + uint64buf []byte +} + +func newEncoder(w io.Writer, prevCrc uint32, pageOffset int) *encoder { + return &encoder{ + bw: ioutil.NewPageWriter(w, walPageBytes, pageOffset), + crc: crc.New(prevCrc, crcTable), + // 1MB buffer + buf: make([]byte, 1024*1024), + uint64buf: make([]byte, 8), + } +} + +// newFileEncoder creates a new encoder with current file offset for the page writer. +func newFileEncoder(f *os.File, prevCrc uint32) (*encoder, error) { + offset, err := f.Seek(0, os.SEEK_CUR) + if err != nil { + return nil, err + } + return newEncoder(f, prevCrc, int(offset)), nil +} + +func (e *encoder) encode(rec *walpb.Record) error { + e.mu.Lock() + defer e.mu.Unlock() + + e.crc.Write(rec.Data) + rec.Crc = e.crc.Sum32() + var ( + data []byte + err error + n int + ) + + if rec.Size() > len(e.buf) { + data, err = rec.Marshal() + if err != nil { + return err + } + } else { + n, err = rec.MarshalTo(e.buf) + if err != nil { + return err + } + data = e.buf[:n] + } + + lenField, padBytes := encodeFrameSize(len(data)) + if err = writeUint64(e.bw, lenField, e.uint64buf); err != nil { + return err + } + + if padBytes != 0 { + data = append(data, make([]byte, padBytes)...) + } + _, err = e.bw.Write(data) + return err +} + +func encodeFrameSize(dataBytes int) (lenField uint64, padBytes int) { + lenField = uint64(dataBytes) + // force 8 byte alignment so length never gets a torn write + padBytes = (8 - (dataBytes % 8)) % 8 + if padBytes != 0 { + lenField |= uint64(0x80|padBytes) << 56 + } + return +} + +func (e *encoder) flush() error { + e.mu.Lock() + defer e.mu.Unlock() + return e.bw.Flush() +} + +func writeUint64(w io.Writer, n uint64, buf []byte) error { + // http://golang.org/src/encoding/binary/binary.go + binary.LittleEndian.PutUint64(buf, n) + _, err := w.Write(buf) + return err +} diff --git a/vendor/github.com/coreos/etcd/wal/file_pipeline.go b/vendor/github.com/coreos/etcd/wal/file_pipeline.go new file mode 100644 index 0000000..3412210 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/file_pipeline.go @@ -0,0 +1,97 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package wal + +import ( + "fmt" + "os" + "path" + + "github.com/coreos/etcd/pkg/fileutil" +) + +// filePipeline pipelines allocating disk space +type filePipeline struct { + // dir to put files + dir string + // size of files to make, in bytes + size int64 + // count number of files generated + count int + + filec chan *fileutil.LockedFile + errc chan error + donec chan struct{} +} + +func newFilePipeline(dir string, fileSize int64) *filePipeline { + fp := &filePipeline{ + dir: dir, + size: fileSize, + filec: make(chan *fileutil.LockedFile), + errc: make(chan error, 1), + donec: make(chan struct{}), + } + go fp.run() + return fp +} + +// Open returns a fresh file for writing. Rename the file before calling +// Open again or there will be file collisions. +func (fp *filePipeline) Open() (f *fileutil.LockedFile, err error) { + select { + case f = <-fp.filec: + case err = <-fp.errc: + } + return +} + +func (fp *filePipeline) Close() error { + close(fp.donec) + return <-fp.errc +} + +func (fp *filePipeline) alloc() (f *fileutil.LockedFile, err error) { + // count % 2 so this file isn't the same as the one last published + fpath := path.Join(fp.dir, fmt.Sprintf("%d.tmp", fp.count%2)) + if f, err = fileutil.LockFile(fpath, os.O_CREATE|os.O_WRONLY, fileutil.PrivateFileMode); err != nil { + return nil, err + } + if err = fileutil.Preallocate(f.File, fp.size, true); err != nil { + plog.Errorf("failed to allocate space when creating new wal file (%v)", err) + f.Close() + return nil, err + } + fp.count++ + return f, nil +} + +func (fp *filePipeline) run() { + defer close(fp.errc) + for { + f, err := fp.alloc() + if err != nil { + fp.errc <- err + return + } + select { + case fp.filec <- f: + case <-fp.donec: + os.Remove(f.Name()) + f.Close() + return + } + } +} diff --git a/vendor/github.com/coreos/etcd/wal/metrics.go b/vendor/github.com/coreos/etcd/wal/metrics.go new file mode 100644 index 0000000..9e089d3 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/metrics.go @@ -0,0 +1,31 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package wal + +import "github.com/prometheus/client_golang/prometheus" + +var ( + syncDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "etcd", + Subsystem: "disk", + Name: "wal_fsync_duration_seconds", + Help: "The latency distributions of fsync called by wal.", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), + }) +) + +func init() { + prometheus.MustRegister(syncDurations) +} diff --git a/vendor/github.com/coreos/etcd/wal/repair.go b/vendor/github.com/coreos/etcd/wal/repair.go new file mode 100644 index 0000000..0a920e2 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/repair.go @@ -0,0 +1,99 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package wal + +import ( + "io" + "os" + "path" + + "github.com/coreos/etcd/pkg/fileutil" + "github.com/coreos/etcd/wal/walpb" +) + +// Repair tries to repair ErrUnexpectedEOF in the +// last wal file by truncating. +func Repair(dirpath string) bool { + f, err := openLast(dirpath) + if err != nil { + return false + } + defer f.Close() + + rec := &walpb.Record{} + decoder := newDecoder(f) + for { + lastOffset := decoder.lastOffset() + err := decoder.decode(rec) + switch err { + case nil: + // update crc of the decoder when necessary + switch rec.Type { + case crcType: + crc := decoder.crc.Sum32() + // current crc of decoder must match the crc of the record. + // do no need to match 0 crc, since the decoder is a new one at this case. + if crc != 0 && rec.Validate(crc) != nil { + return false + } + decoder.updateCRC(rec.Crc) + } + continue + case io.EOF: + return true + case io.ErrUnexpectedEOF: + plog.Noticef("repairing %v", f.Name()) + bf, bferr := os.Create(f.Name() + ".broken") + if bferr != nil { + plog.Errorf("could not repair %v, failed to create backup file", f.Name()) + return false + } + defer bf.Close() + + if _, err = f.Seek(0, os.SEEK_SET); err != nil { + plog.Errorf("could not repair %v, failed to read file", f.Name()) + return false + } + + if _, err = io.Copy(bf, f); err != nil { + plog.Errorf("could not repair %v, failed to copy file", f.Name()) + return false + } + + if err = f.Truncate(int64(lastOffset)); err != nil { + plog.Errorf("could not repair %v, failed to truncate file", f.Name()) + return false + } + if err = fileutil.Fsync(f.File); err != nil { + plog.Errorf("could not repair %v, failed to sync file", f.Name()) + return false + } + return true + default: + plog.Errorf("could not repair error (%v)", err) + return false + } + } +} + +// openLast opens the last wal file for read and write. +func openLast(dirpath string) (*fileutil.LockedFile, error) { + names, err := readWalNames(dirpath) + if err != nil { + return nil, err + } + last := path.Join(dirpath, names[len(names)-1]) + return fileutil.LockFile(last, os.O_RDWR, fileutil.PrivateFileMode) +} diff --git a/vendor/github.com/coreos/etcd/wal/util.go b/vendor/github.com/coreos/etcd/wal/util.go new file mode 100644 index 0000000..5c56e22 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/util.go @@ -0,0 +1,107 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package wal + +import ( + "errors" + "fmt" + "strings" + + "github.com/coreos/etcd/pkg/fileutil" +) + +var ( + badWalName = errors.New("bad wal name") +) + +func Exist(dirpath string) bool { + names, err := fileutil.ReadDir(dirpath) + if err != nil { + return false + } + return len(names) != 0 +} + +// searchIndex returns the last array index of names whose raft index section is +// equal to or smaller than the given index. +// The given names MUST be sorted. +func searchIndex(names []string, index uint64) (int, bool) { + for i := len(names) - 1; i >= 0; i-- { + name := names[i] + _, curIndex, err := parseWalName(name) + if err != nil { + plog.Panicf("parse correct name should never fail: %v", err) + } + if index >= curIndex { + return i, true + } + } + return -1, false +} + +// names should have been sorted based on sequence number. +// isValidSeq checks whether seq increases continuously. +func isValidSeq(names []string) bool { + var lastSeq uint64 + for _, name := range names { + curSeq, _, err := parseWalName(name) + if err != nil { + plog.Panicf("parse correct name should never fail: %v", err) + } + if lastSeq != 0 && lastSeq != curSeq-1 { + return false + } + lastSeq = curSeq + } + return true +} +func readWalNames(dirpath string) ([]string, error) { + names, err := fileutil.ReadDir(dirpath) + if err != nil { + return nil, err + } + wnames := checkWalNames(names) + if len(wnames) == 0 { + return nil, ErrFileNotFound + } + return wnames, nil +} + +func checkWalNames(names []string) []string { + wnames := make([]string, 0) + for _, name := range names { + if _, _, err := parseWalName(name); err != nil { + // don't complain about left over tmp files + if !strings.HasSuffix(name, ".tmp") { + plog.Warningf("ignored file %v in wal", name) + } + continue + } + wnames = append(wnames, name) + } + return wnames +} + +func parseWalName(str string) (seq, index uint64, err error) { + if !strings.HasSuffix(str, ".wal") { + return 0, 0, badWalName + } + _, err = fmt.Sscanf(str, "%016x-%016x.wal", &seq, &index) + return seq, index, err +} + +func walName(seq, index uint64) string { + return fmt.Sprintf("%016x-%016x.wal", seq, index) +} diff --git a/vendor/github.com/coreos/etcd/wal/wal.go b/vendor/github.com/coreos/etcd/wal/wal.go new file mode 100644 index 0000000..69ed6b2 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/wal.go @@ -0,0 +1,637 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package wal + +import ( + "bytes" + "errors" + "fmt" + "hash/crc32" + "io" + "os" + "path" + "sync" + "time" + + "github.com/coreos/etcd/pkg/fileutil" + "github.com/coreos/etcd/pkg/pbutil" + "github.com/coreos/etcd/raft" + "github.com/coreos/etcd/raft/raftpb" + "github.com/coreos/etcd/wal/walpb" + + "github.com/coreos/pkg/capnslog" +) + +const ( + metadataType int64 = iota + 1 + entryType + stateType + crcType + snapshotType + + // warnSyncDuration is the amount of time allotted to an fsync before + // logging a warning + warnSyncDuration = time.Second +) + +var ( + // SegmentSizeBytes is the preallocated size of each wal segment file. + // The actual size might be larger than this. In general, the default + // value should be used, but this is defined as an exported variable + // so that tests can set a different segment size. + SegmentSizeBytes int64 = 64 * 1000 * 1000 // 64MB + + plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "wal") + + ErrMetadataConflict = errors.New("wal: conflicting metadata found") + ErrFileNotFound = errors.New("wal: file not found") + ErrCRCMismatch = errors.New("wal: crc mismatch") + ErrSnapshotMismatch = errors.New("wal: snapshot mismatch") + ErrSnapshotNotFound = errors.New("wal: snapshot not found") + crcTable = crc32.MakeTable(crc32.Castagnoli) +) + +// WAL is a logical representation of the stable storage. +// WAL is either in read mode or append mode but not both. +// A newly created WAL is in append mode, and ready for appending records. +// A just opened WAL is in read mode, and ready for reading records. +// The WAL will be ready for appending after reading out all the previous records. +type WAL struct { + dir string // the living directory of the underlay files + + // dirFile is a fd for the wal directory for syncing on Rename + dirFile *os.File + + metadata []byte // metadata recorded at the head of each WAL + state raftpb.HardState // hardstate recorded at the head of WAL + + start walpb.Snapshot // snapshot to start reading + decoder *decoder // decoder to decode records + readClose func() error // closer for decode reader + + mu sync.Mutex + enti uint64 // index of the last entry saved to the wal + encoder *encoder // encoder to encode records + + locks []*fileutil.LockedFile // the locked files the WAL holds (the name is increasing) + fp *filePipeline +} + +// Create creates a WAL ready for appending records. The given metadata is +// recorded at the head of each WAL file, and can be retrieved with ReadAll. +func Create(dirpath string, metadata []byte) (*WAL, error) { + if Exist(dirpath) { + return nil, os.ErrExist + } + + // keep temporary wal directory so WAL initialization appears atomic + tmpdirpath := path.Clean(dirpath) + ".tmp" + if fileutil.Exist(tmpdirpath) { + if err := os.RemoveAll(tmpdirpath); err != nil { + return nil, err + } + } + if err := fileutil.CreateDirAll(tmpdirpath); err != nil { + return nil, err + } + + p := path.Join(tmpdirpath, walName(0, 0)) + f, err := fileutil.LockFile(p, os.O_WRONLY|os.O_CREATE, fileutil.PrivateFileMode) + if err != nil { + return nil, err + } + if _, err = f.Seek(0, os.SEEK_END); err != nil { + return nil, err + } + if err = fileutil.Preallocate(f.File, SegmentSizeBytes, true); err != nil { + return nil, err + } + + w := &WAL{ + dir: dirpath, + metadata: metadata, + } + w.encoder, err = newFileEncoder(f.File, 0) + if err != nil { + return nil, err + } + w.locks = append(w.locks, f) + if err = w.saveCrc(0); err != nil { + return nil, err + } + if err = w.encoder.encode(&walpb.Record{Type: metadataType, Data: metadata}); err != nil { + return nil, err + } + if err = w.SaveSnapshot(walpb.Snapshot{}); err != nil { + return nil, err + } + + if w, err = w.renameWal(tmpdirpath); err != nil { + return nil, err + } + + // directory was renamed; sync parent dir to persist rename + pdir, perr := fileutil.OpenDir(path.Dir(w.dir)) + if perr != nil { + return nil, perr + } + if perr = fileutil.Fsync(pdir); perr != nil { + return nil, perr + } + if perr = pdir.Close(); err != nil { + return nil, perr + } + + return w, nil +} + +// Open opens the WAL at the given snap. +// The snap SHOULD have been previously saved to the WAL, or the following +// ReadAll will fail. +// The returned WAL is ready to read and the first record will be the one after +// the given snap. The WAL cannot be appended to before reading out all of its +// previous records. +func Open(dirpath string, snap walpb.Snapshot) (*WAL, error) { + w, err := openAtIndex(dirpath, snap, true) + if err != nil { + return nil, err + } + if w.dirFile, err = fileutil.OpenDir(w.dir); err != nil { + return nil, err + } + return w, nil +} + +// OpenForRead only opens the wal files for read. +// Write on a read only wal panics. +func OpenForRead(dirpath string, snap walpb.Snapshot) (*WAL, error) { + return openAtIndex(dirpath, snap, false) +} + +func openAtIndex(dirpath string, snap walpb.Snapshot, write bool) (*WAL, error) { + names, err := readWalNames(dirpath) + if err != nil { + return nil, err + } + + nameIndex, ok := searchIndex(names, snap.Index) + if !ok || !isValidSeq(names[nameIndex:]) { + return nil, ErrFileNotFound + } + + // open the wal files + rcs := make([]io.ReadCloser, 0) + rs := make([]io.Reader, 0) + ls := make([]*fileutil.LockedFile, 0) + for _, name := range names[nameIndex:] { + p := path.Join(dirpath, name) + if write { + l, err := fileutil.TryLockFile(p, os.O_RDWR, fileutil.PrivateFileMode) + if err != nil { + closeAll(rcs...) + return nil, err + } + ls = append(ls, l) + rcs = append(rcs, l) + } else { + rf, err := os.OpenFile(p, os.O_RDONLY, fileutil.PrivateFileMode) + if err != nil { + closeAll(rcs...) + return nil, err + } + ls = append(ls, nil) + rcs = append(rcs, rf) + } + rs = append(rs, rcs[len(rcs)-1]) + } + + closer := func() error { return closeAll(rcs...) } + + // create a WAL ready for reading + w := &WAL{ + dir: dirpath, + start: snap, + decoder: newDecoder(rs...), + readClose: closer, + locks: ls, + } + + if write { + // write reuses the file descriptors from read; don't close so + // WAL can append without dropping the file lock + w.readClose = nil + if _, _, err := parseWalName(path.Base(w.tail().Name())); err != nil { + closer() + return nil, err + } + w.fp = newFilePipeline(w.dir, SegmentSizeBytes) + } + + return w, nil +} + +// ReadAll reads out records of the current WAL. +// If opened in write mode, it must read out all records until EOF. Or an error +// will be returned. +// If opened in read mode, it will try to read all records if possible. +// If it cannot read out the expected snap, it will return ErrSnapshotNotFound. +// If loaded snap doesn't match with the expected one, it will return +// all the records and error ErrSnapshotMismatch. +// TODO: detect not-last-snap error. +// TODO: maybe loose the checking of match. +// After ReadAll, the WAL will be ready for appending new records. +func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.Entry, err error) { + w.mu.Lock() + defer w.mu.Unlock() + + rec := &walpb.Record{} + decoder := w.decoder + + var match bool + for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) { + switch rec.Type { + case entryType: + e := mustUnmarshalEntry(rec.Data) + if e.Index > w.start.Index { + ents = append(ents[:e.Index-w.start.Index-1], e) + } + w.enti = e.Index + case stateType: + state = mustUnmarshalState(rec.Data) + case metadataType: + if metadata != nil && !bytes.Equal(metadata, rec.Data) { + state.Reset() + return nil, state, nil, ErrMetadataConflict + } + metadata = rec.Data + case crcType: + crc := decoder.crc.Sum32() + // current crc of decoder must match the crc of the record. + // do no need to match 0 crc, since the decoder is a new one at this case. + if crc != 0 && rec.Validate(crc) != nil { + state.Reset() + return nil, state, nil, ErrCRCMismatch + } + decoder.updateCRC(rec.Crc) + case snapshotType: + var snap walpb.Snapshot + pbutil.MustUnmarshal(&snap, rec.Data) + if snap.Index == w.start.Index { + if snap.Term != w.start.Term { + state.Reset() + return nil, state, nil, ErrSnapshotMismatch + } + match = true + } + default: + state.Reset() + return nil, state, nil, fmt.Errorf("unexpected block type %d", rec.Type) + } + } + + switch w.tail() { + case nil: + // We do not have to read out all entries in read mode. + // The last record maybe a partial written one, so + // ErrunexpectedEOF might be returned. + if err != io.EOF && err != io.ErrUnexpectedEOF { + state.Reset() + return nil, state, nil, err + } + default: + // We must read all of the entries if WAL is opened in write mode. + if err != io.EOF { + state.Reset() + return nil, state, nil, err + } + // decodeRecord() will return io.EOF if it detects a zero record, + // but this zero record may be followed by non-zero records from + // a torn write. Overwriting some of these non-zero records, but + // not all, will cause CRC errors on WAL open. Since the records + // were never fully synced to disk in the first place, it's safe + // to zero them out to avoid any CRC errors from new writes. + if _, err = w.tail().Seek(w.decoder.lastOffset(), os.SEEK_SET); err != nil { + return nil, state, nil, err + } + if err = fileutil.ZeroToEnd(w.tail().File); err != nil { + return nil, state, nil, err + } + } + + err = nil + if !match { + err = ErrSnapshotNotFound + } + + // close decoder, disable reading + if w.readClose != nil { + w.readClose() + w.readClose = nil + } + w.start = walpb.Snapshot{} + + w.metadata = metadata + + if w.tail() != nil { + // create encoder (chain crc with the decoder), enable appending + w.encoder, err = newFileEncoder(w.tail().File, w.decoder.lastCRC()) + if err != nil { + return + } + } + w.decoder = nil + + return metadata, state, ents, err +} + +// cut closes current file written and creates a new one ready to append. +// cut first creates a temp wal file and writes necessary headers into it. +// Then cut atomically rename temp wal file to a wal file. +func (w *WAL) cut() error { + // close old wal file; truncate to avoid wasting space if an early cut + off, serr := w.tail().Seek(0, os.SEEK_CUR) + if serr != nil { + return serr + } + if err := w.tail().Truncate(off); err != nil { + return err + } + if err := w.sync(); err != nil { + return err + } + + fpath := path.Join(w.dir, walName(w.seq()+1, w.enti+1)) + + // create a temp wal file with name sequence + 1, or truncate the existing one + newTail, err := w.fp.Open() + if err != nil { + return err + } + + // update writer and save the previous crc + w.locks = append(w.locks, newTail) + prevCrc := w.encoder.crc.Sum32() + w.encoder, err = newFileEncoder(w.tail().File, prevCrc) + if err != nil { + return err + } + if err = w.saveCrc(prevCrc); err != nil { + return err + } + if err = w.encoder.encode(&walpb.Record{Type: metadataType, Data: w.metadata}); err != nil { + return err + } + if err = w.saveState(&w.state); err != nil { + return err + } + // atomically move temp wal file to wal file + if err = w.sync(); err != nil { + return err + } + + off, err = w.tail().Seek(0, os.SEEK_CUR) + if err != nil { + return err + } + + if err = os.Rename(newTail.Name(), fpath); err != nil { + return err + } + if err = fileutil.Fsync(w.dirFile); err != nil { + return err + } + + newTail.Close() + + if newTail, err = fileutil.LockFile(fpath, os.O_WRONLY, fileutil.PrivateFileMode); err != nil { + return err + } + if _, err = newTail.Seek(off, os.SEEK_SET); err != nil { + return err + } + + w.locks[len(w.locks)-1] = newTail + + prevCrc = w.encoder.crc.Sum32() + w.encoder, err = newFileEncoder(w.tail().File, prevCrc) + if err != nil { + return err + } + + plog.Infof("segmented wal file %v is created", fpath) + return nil +} + +func (w *WAL) sync() error { + if w.encoder != nil { + if err := w.encoder.flush(); err != nil { + return err + } + } + start := time.Now() + err := fileutil.Fdatasync(w.tail().File) + + duration := time.Since(start) + if duration > warnSyncDuration { + plog.Warningf("sync duration of %v, expected less than %v", duration, warnSyncDuration) + } + syncDurations.Observe(duration.Seconds()) + + return err +} + +// ReleaseLockTo releases the locks, which has smaller index than the given index +// except the largest one among them. +// For example, if WAL is holding lock 1,2,3,4,5,6, ReleaseLockTo(4) will release +// lock 1,2 but keep 3. ReleaseLockTo(5) will release 1,2,3 but keep 4. +func (w *WAL) ReleaseLockTo(index uint64) error { + w.mu.Lock() + defer w.mu.Unlock() + + var smaller int + found := false + + for i, l := range w.locks { + _, lockIndex, err := parseWalName(path.Base(l.Name())) + if err != nil { + return err + } + if lockIndex >= index { + smaller = i - 1 + found = true + break + } + } + + // if no lock index is greater than the release index, we can + // release lock up to the last one(excluding). + if !found && len(w.locks) != 0 { + smaller = len(w.locks) - 1 + } + + if smaller <= 0 { + return nil + } + + for i := 0; i < smaller; i++ { + if w.locks[i] == nil { + continue + } + w.locks[i].Close() + } + w.locks = w.locks[smaller:] + + return nil +} + +func (w *WAL) Close() error { + w.mu.Lock() + defer w.mu.Unlock() + + if w.fp != nil { + w.fp.Close() + w.fp = nil + } + + if w.tail() != nil { + if err := w.sync(); err != nil { + return err + } + } + for _, l := range w.locks { + if l == nil { + continue + } + if err := l.Close(); err != nil { + plog.Errorf("failed to unlock during closing wal: %s", err) + } + } + + return w.dirFile.Close() +} + +func (w *WAL) saveEntry(e *raftpb.Entry) error { + // TODO: add MustMarshalTo to reduce one allocation. + b := pbutil.MustMarshal(e) + rec := &walpb.Record{Type: entryType, Data: b} + if err := w.encoder.encode(rec); err != nil { + return err + } + w.enti = e.Index + return nil +} + +func (w *WAL) saveState(s *raftpb.HardState) error { + if raft.IsEmptyHardState(*s) { + return nil + } + w.state = *s + b := pbutil.MustMarshal(s) + rec := &walpb.Record{Type: stateType, Data: b} + return w.encoder.encode(rec) +} + +func (w *WAL) Save(st raftpb.HardState, ents []raftpb.Entry) error { + w.mu.Lock() + defer w.mu.Unlock() + + // short cut, do not call sync + if raft.IsEmptyHardState(st) && len(ents) == 0 { + return nil + } + + mustSync := mustSync(st, w.state, len(ents)) + + // TODO(xiangli): no more reference operator + for i := range ents { + if err := w.saveEntry(&ents[i]); err != nil { + return err + } + } + if err := w.saveState(&st); err != nil { + return err + } + + curOff, err := w.tail().Seek(0, os.SEEK_CUR) + if err != nil { + return err + } + if curOff < SegmentSizeBytes { + if mustSync { + return w.sync() + } + return nil + } + + return w.cut() +} + +func (w *WAL) SaveSnapshot(e walpb.Snapshot) error { + b := pbutil.MustMarshal(&e) + + w.mu.Lock() + defer w.mu.Unlock() + + rec := &walpb.Record{Type: snapshotType, Data: b} + if err := w.encoder.encode(rec); err != nil { + return err + } + // update enti only when snapshot is ahead of last index + if w.enti < e.Index { + w.enti = e.Index + } + return w.sync() +} + +func (w *WAL) saveCrc(prevCrc uint32) error { + return w.encoder.encode(&walpb.Record{Type: crcType, Crc: prevCrc}) +} + +func (w *WAL) tail() *fileutil.LockedFile { + if len(w.locks) > 0 { + return w.locks[len(w.locks)-1] + } + return nil +} + +func (w *WAL) seq() uint64 { + t := w.tail() + if t == nil { + return 0 + } + seq, _, err := parseWalName(path.Base(t.Name())) + if err != nil { + plog.Fatalf("bad wal name %s (%v)", t.Name(), err) + } + return seq +} + +func mustSync(st, prevst raftpb.HardState, entsnum int) bool { + // Persistent state on all servers: + // (Updated on stable storage before responding to RPCs) + // currentTerm + // votedFor + // log entries[] + return entsnum != 0 || st.Vote != prevst.Vote || st.Term != prevst.Term +} + +func closeAll(rcs ...io.ReadCloser) error { + for _, f := range rcs { + if err := f.Close(); err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/coreos/etcd/wal/wal_unix.go b/vendor/github.com/coreos/etcd/wal/wal_unix.go new file mode 100644 index 0000000..82fd6a1 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/wal_unix.go @@ -0,0 +1,44 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !windows + +package wal + +import ( + "os" + + "github.com/coreos/etcd/pkg/fileutil" +) + +func (w *WAL) renameWal(tmpdirpath string) (*WAL, error) { + // On non-Windows platforms, hold the lock while renaming. Releasing + // the lock and trying to reacquire it quickly can be flaky because + // it's possible the process will fork to spawn a process while this is + // happening. The fds are set up as close-on-exec by the Go runtime, + // but there is a window between the fork and the exec where another + // process holds the lock. + + if err := os.RemoveAll(w.dir); err != nil { + return nil, err + } + if err := os.Rename(tmpdirpath, w.dir); err != nil { + return nil, err + } + + w.fp = newFilePipeline(w.dir, SegmentSizeBytes) + df, err := fileutil.OpenDir(w.dir) + w.dirFile = df + return w, err +} diff --git a/vendor/github.com/coreos/etcd/wal/wal_windows.go b/vendor/github.com/coreos/etcd/wal/wal_windows.go new file mode 100644 index 0000000..0b9e434 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/wal_windows.go @@ -0,0 +1,41 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package wal + +import ( + "os" + + "github.com/coreos/etcd/wal/walpb" +) + +func (w *WAL) renameWal(tmpdirpath string) (*WAL, error) { + // rename of directory with locked files doesn't work on + // windows; close the WAL to release the locks so the directory + // can be renamed + w.Close() + if err := os.Rename(tmpdirpath, w.dir); err != nil { + return nil, err + } + // reopen and relock + newWAL, oerr := Open(w.dir, walpb.Snapshot{}) + if oerr != nil { + return nil, oerr + } + if _, _, _, err := newWAL.ReadAll(); err != nil { + newWAL.Close() + return nil, err + } + return newWAL, nil +} diff --git a/vendor/github.com/coreos/etcd/wal/walpb/record.go b/vendor/github.com/coreos/etcd/wal/walpb/record.go new file mode 100644 index 0000000..30a05e0 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/walpb/record.go @@ -0,0 +1,29 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package walpb + +import "errors" + +var ( + ErrCRCMismatch = errors.New("walpb: crc mismatch") +) + +func (rec *Record) Validate(crc uint32) error { + if rec.Crc == crc { + return nil + } + rec.Reset() + return ErrCRCMismatch +} diff --git a/vendor/github.com/coreos/etcd/wal/walpb/record.pb.go b/vendor/github.com/coreos/etcd/wal/walpb/record.pb.go new file mode 100644 index 0000000..e1a77d5 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/walpb/record.pb.go @@ -0,0 +1,521 @@ +// Code generated by protoc-gen-gogo. +// source: record.proto +// DO NOT EDIT! + +/* + Package walpb is a generated protocol buffer package. + + It is generated from these files: + record.proto + + It has these top-level messages: + Record + Snapshot +*/ +package walpb + +import ( + "fmt" + + proto "github.com/golang/protobuf/proto" + + math "math" + + io "io" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type Record struct { + Type int64 `protobuf:"varint,1,opt,name=type" json:"type"` + Crc uint32 `protobuf:"varint,2,opt,name=crc" json:"crc"` + Data []byte `protobuf:"bytes,3,opt,name=data" json:"data,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *Record) Reset() { *m = Record{} } +func (m *Record) String() string { return proto.CompactTextString(m) } +func (*Record) ProtoMessage() {} +func (*Record) Descriptor() ([]byte, []int) { return fileDescriptorRecord, []int{0} } + +type Snapshot struct { + Index uint64 `protobuf:"varint,1,opt,name=index" json:"index"` + Term uint64 `protobuf:"varint,2,opt,name=term" json:"term"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *Snapshot) Reset() { *m = Snapshot{} } +func (m *Snapshot) String() string { return proto.CompactTextString(m) } +func (*Snapshot) ProtoMessage() {} +func (*Snapshot) Descriptor() ([]byte, []int) { return fileDescriptorRecord, []int{1} } + +func init() { + proto.RegisterType((*Record)(nil), "walpb.Record") + proto.RegisterType((*Snapshot)(nil), "walpb.Snapshot") +} +func (m *Record) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Record) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + dAtA[i] = 0x8 + i++ + i = encodeVarintRecord(dAtA, i, uint64(m.Type)) + dAtA[i] = 0x10 + i++ + i = encodeVarintRecord(dAtA, i, uint64(m.Crc)) + if m.Data != nil { + dAtA[i] = 0x1a + i++ + i = encodeVarintRecord(dAtA, i, uint64(len(m.Data))) + i += copy(dAtA[i:], m.Data) + } + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *Snapshot) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Snapshot) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + dAtA[i] = 0x8 + i++ + i = encodeVarintRecord(dAtA, i, uint64(m.Index)) + dAtA[i] = 0x10 + i++ + i = encodeVarintRecord(dAtA, i, uint64(m.Term)) + if m.XXX_unrecognized != nil { + i += copy(dAtA[i:], m.XXX_unrecognized) + } + return i, nil +} + +func encodeFixed64Record(dAtA []byte, offset int, v uint64) int { + dAtA[offset] = uint8(v) + dAtA[offset+1] = uint8(v >> 8) + dAtA[offset+2] = uint8(v >> 16) + dAtA[offset+3] = uint8(v >> 24) + dAtA[offset+4] = uint8(v >> 32) + dAtA[offset+5] = uint8(v >> 40) + dAtA[offset+6] = uint8(v >> 48) + dAtA[offset+7] = uint8(v >> 56) + return offset + 8 +} +func encodeFixed32Record(dAtA []byte, offset int, v uint32) int { + dAtA[offset] = uint8(v) + dAtA[offset+1] = uint8(v >> 8) + dAtA[offset+2] = uint8(v >> 16) + dAtA[offset+3] = uint8(v >> 24) + return offset + 4 +} +func encodeVarintRecord(dAtA []byte, offset int, v uint64) int { + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return offset + 1 +} +func (m *Record) Size() (n int) { + var l int + _ = l + n += 1 + sovRecord(uint64(m.Type)) + n += 1 + sovRecord(uint64(m.Crc)) + if m.Data != nil { + l = len(m.Data) + n += 1 + l + sovRecord(uint64(l)) + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *Snapshot) Size() (n int) { + var l int + _ = l + n += 1 + sovRecord(uint64(m.Index)) + n += 1 + sovRecord(uint64(m.Term)) + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func sovRecord(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozRecord(x uint64) (n int) { + return sovRecord(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (m *Record) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRecord + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Record: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Record: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Type", wireType) + } + m.Type = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRecord + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Type |= (int64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Crc", wireType) + } + m.Crc = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRecord + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Crc |= (uint32(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Data", wireType) + } + var byteLen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRecord + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + byteLen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if byteLen < 0 { + return ErrInvalidLengthRecord + } + postIndex := iNdEx + byteLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Data = append(m.Data[:0], dAtA[iNdEx:postIndex]...) + if m.Data == nil { + m.Data = []byte{} + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipRecord(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRecord + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func (m *Snapshot) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRecord + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Snapshot: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Snapshot: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Index", wireType) + } + m.Index = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRecord + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Index |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) + } + m.Term = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowRecord + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.Term |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipRecord(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthRecord + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, dAtA[iNdEx:iNdEx+skippy]...) + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipRecord(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRecord + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRecord + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRecord + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthRecord + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowRecord + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipRecord(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthRecord = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowRecord = fmt.Errorf("proto: integer overflow") +) + +func init() { proto.RegisterFile("record.proto", fileDescriptorRecord) } + +var fileDescriptorRecord = []byte{ + // 186 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xe2, 0xe2, 0x29, 0x4a, 0x4d, 0xce, + 0x2f, 0x4a, 0xd1, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x62, 0x2d, 0x4f, 0xcc, 0x29, 0x48, 0x92, + 0x12, 0x49, 0xcf, 0x4f, 0xcf, 0x07, 0x8b, 0xe8, 0x83, 0x58, 0x10, 0x49, 0x25, 0x3f, 0x2e, 0xb6, + 0x20, 0xb0, 0x62, 0x21, 0x09, 0x2e, 0x96, 0x92, 0xca, 0x82, 0x54, 0x09, 0x46, 0x05, 0x46, 0x0d, + 0x66, 0x27, 0x96, 0x13, 0xf7, 0xe4, 0x19, 0x82, 0xc0, 0x22, 0x42, 0x62, 0x5c, 0xcc, 0xc9, 0x45, + 0xc9, 0x12, 0x4c, 0x0a, 0x8c, 0x1a, 0xbc, 0x50, 0x09, 0x90, 0x80, 0x90, 0x10, 0x17, 0x4b, 0x4a, + 0x62, 0x49, 0xa2, 0x04, 0xb3, 0x02, 0xa3, 0x06, 0x4f, 0x10, 0x98, 0xad, 0xe4, 0xc0, 0xc5, 0x11, + 0x9c, 0x97, 0x58, 0x50, 0x9c, 0x91, 0x5f, 0x22, 0x24, 0xc5, 0xc5, 0x9a, 0x99, 0x97, 0x92, 0x5a, + 0x01, 0x36, 0x92, 0x05, 0xaa, 0x13, 0x22, 0x04, 0xb6, 0x2d, 0xb5, 0x28, 0x17, 0x6c, 0x28, 0x0b, + 0xdc, 0xb6, 0xd4, 0xa2, 0x5c, 0x27, 0x91, 0x13, 0x0f, 0xe5, 0x18, 0x4e, 0x3c, 0x92, 0x63, 0xbc, + 0xf0, 0x48, 0x8e, 0xf1, 0xc1, 0x23, 0x39, 0xc6, 0x19, 0x8f, 0xe5, 0x18, 0x00, 0x01, 0x00, 0x00, + 0xff, 0xff, 0x7f, 0x5e, 0x5c, 0x46, 0xd3, 0x00, 0x00, 0x00, +} diff --git a/vendor/github.com/coreos/etcd/wal/walpb/record.proto b/vendor/github.com/coreos/etcd/wal/walpb/record.proto new file mode 100644 index 0000000..b694cb2 --- /dev/null +++ b/vendor/github.com/coreos/etcd/wal/walpb/record.proto @@ -0,0 +1,20 @@ +syntax = "proto2"; +package walpb; + +import "gogoproto/gogo.proto"; + +option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; +option (gogoproto.unmarshaler_all) = true; +option (gogoproto.goproto_getters_all) = false; + +message Record { + optional int64 type = 1 [(gogoproto.nullable) = false]; + optional uint32 crc = 2 [(gogoproto.nullable) = false]; + optional bytes data = 3; +} + +message Snapshot { + optional uint64 index = 1 [(gogoproto.nullable) = false]; + optional uint64 term = 2 [(gogoproto.nullable) = false]; +} diff --git a/vendor/github.com/coreos/go-systemd/LICENSE b/vendor/github.com/coreos/go-systemd/LICENSE new file mode 100644 index 0000000..37ec93a --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/coreos/go-systemd/README.md b/vendor/github.com/coreos/go-systemd/README.md new file mode 100644 index 0000000..cb87a11 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/README.md @@ -0,0 +1,54 @@ +# go-systemd + +[![Build Status](https://travis-ci.org/coreos/go-systemd.png?branch=master)](https://travis-ci.org/coreos/go-systemd) +[![godoc](https://godoc.org/github.com/coreos/go-systemd?status.svg)](http://godoc.org/github.com/coreos/go-systemd) + +Go bindings to systemd. The project has several packages: + +- `activation` - for writing and using socket activation from Go +- `dbus` - for starting/stopping/inspecting running services and units +- `journal` - for writing to systemd's logging service, journald +- `sdjournal` - for reading from journald by wrapping its C API +- `machine1` - for registering machines/containers with systemd +- `unit` - for (de)serialization and comparison of unit files + +## Socket Activation + +An example HTTP server using socket activation can be quickly set up by following this README on a Linux machine running systemd: + +https://github.com/coreos/go-systemd/tree/master/examples/activation/httpserver + +## Journal + +Using the pure-Go `journal` package you can submit journal entries directly to systemd's journal, taking advantage of features like indexed key/value pairs for each log entry. +The `sdjournal` package provides read access to the journal by wrapping around journald's native C API; consequently it requires cgo and the journal headers to be available. + +## D-Bus + +The `dbus` package connects to the [systemd D-Bus API](http://www.freedesktop.org/wiki/Software/systemd/dbus/) and lets you start, stop and introspect systemd units. The API docs are here: + +http://godoc.org/github.com/coreos/go-systemd/dbus + +### Debugging + +Create `/etc/dbus-1/system-local.conf` that looks like this: + +``` +<!DOCTYPE busconfig PUBLIC +"-//freedesktop//DTD D-Bus Bus Configuration 1.0//EN" +"http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd"> +<busconfig> + <policy user="root"> + <allow eavesdrop="true"/> + <allow eavesdrop="true" send_destination="*"/> + </policy> +</busconfig> +``` + +## machined + +The `machine1` package allows interaction with the [systemd machined D-Bus API](http://www.freedesktop.org/wiki/Software/systemd/machined/). + +## Units + +The `unit` package provides various functions for working with [systemd unit files](http://www.freedesktop.org/software/systemd/man/systemd.unit.html). diff --git a/vendor/github.com/coreos/go-systemd/activation/files.go b/vendor/github.com/coreos/go-systemd/activation/files.go new file mode 100644 index 0000000..c8e85fc --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/activation/files.go @@ -0,0 +1,52 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package activation implements primitives for systemd socket activation. +package activation + +import ( + "os" + "strconv" + "syscall" +) + +// based on: https://gist.github.com/alberts/4640792 +const ( + listenFdsStart = 3 +) + +func Files(unsetEnv bool) []*os.File { + if unsetEnv { + defer os.Unsetenv("LISTEN_PID") + defer os.Unsetenv("LISTEN_FDS") + } + + pid, err := strconv.Atoi(os.Getenv("LISTEN_PID")) + if err != nil || pid != os.Getpid() { + return nil + } + + nfds, err := strconv.Atoi(os.Getenv("LISTEN_FDS")) + if err != nil || nfds == 0 { + return nil + } + + files := make([]*os.File, 0, nfds) + for fd := listenFdsStart; fd < listenFdsStart+nfds; fd++ { + syscall.CloseOnExec(fd) + files = append(files, os.NewFile(uintptr(fd), "LISTEN_FD_"+strconv.Itoa(fd))) + } + + return files +} diff --git a/vendor/github.com/coreos/go-systemd/activation/listeners.go b/vendor/github.com/coreos/go-systemd/activation/listeners.go new file mode 100644 index 0000000..fd5dfc7 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/activation/listeners.go @@ -0,0 +1,60 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package activation + +import ( + "crypto/tls" + "net" +) + +// Listeners returns a slice containing a net.Listener for each matching socket type +// passed to this process. +// +// The order of the file descriptors is preserved in the returned slice. +// Nil values are used to fill any gaps. For example if systemd were to return file descriptors +// corresponding with "udp, tcp, tcp", then the slice would contain {nil, net.Listener, net.Listener} +func Listeners(unsetEnv bool) ([]net.Listener, error) { + files := Files(unsetEnv) + listeners := make([]net.Listener, len(files)) + + for i, f := range files { + if pc, err := net.FileListener(f); err == nil { + listeners[i] = pc + } + } + return listeners, nil +} + +// TLSListeners returns a slice containing a net.listener for each matching TCP socket type +// passed to this process. +// It uses default Listeners func and forces TCP sockets handlers to use TLS based on tlsConfig. +func TLSListeners(unsetEnv bool, tlsConfig *tls.Config) ([]net.Listener, error) { + listeners, err := Listeners(unsetEnv) + + if listeners == nil || err != nil { + return nil, err + } + + if tlsConfig != nil && err == nil { + for i, l := range listeners { + // Activate TLS only for TCP sockets + if l.Addr().Network() == "tcp" { + listeners[i] = tls.NewListener(l, tlsConfig) + } + } + } + + return listeners, err +} diff --git a/vendor/github.com/coreos/go-systemd/activation/packetconns.go b/vendor/github.com/coreos/go-systemd/activation/packetconns.go new file mode 100644 index 0000000..48b2ca0 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/activation/packetconns.go @@ -0,0 +1,37 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package activation + +import ( + "net" +) + +// PacketConns returns a slice containing a net.PacketConn for each matching socket type +// passed to this process. +// +// The order of the file descriptors is preserved in the returned slice. +// Nil values are used to fill any gaps. For example if systemd were to return file descriptors +// corresponding with "udp, tcp, udp", then the slice would contain {net.PacketConn, nil, net.PacketConn} +func PacketConns(unsetEnv bool) ([]net.PacketConn, error) { + files := Files(unsetEnv) + conns := make([]net.PacketConn, len(files)) + + for i, f := range files { + if pc, err := net.FilePacketConn(f); err == nil { + conns[i] = pc + } + } + return conns, nil +} diff --git a/vendor/github.com/coreos/go-systemd/daemon/sdnotify.go b/vendor/github.com/coreos/go-systemd/daemon/sdnotify.go new file mode 100644 index 0000000..ba6d41d --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/daemon/sdnotify.go @@ -0,0 +1,63 @@ +// Copyright 2014 Docker, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Code forked from Docker project +package daemon + +import ( + "net" + "os" +) + +// SdNotify sends a message to the init daemon. It is common to ignore the error. +// If `unsetEnvironment` is true, the environment variable `NOTIFY_SOCKET` +// will be unconditionally unset. +// +// It returns one of the following: +// (false, nil) - notification not supported (i.e. NOTIFY_SOCKET is unset) +// (false, err) - notification supported, but failure happened (e.g. error connecting to NOTIFY_SOCKET or while sending data) +// (true, nil) - notification supported, data has been sent +func SdNotify(unsetEnvironment bool, state string) (sent bool, err error) { + socketAddr := &net.UnixAddr{ + Name: os.Getenv("NOTIFY_SOCKET"), + Net: "unixgram", + } + + // NOTIFY_SOCKET not set + if socketAddr.Name == "" { + return false, nil + } + + if unsetEnvironment { + err = os.Unsetenv("NOTIFY_SOCKET") + } + if err != nil { + return false, err + } + + conn, err := net.DialUnix(socketAddr.Net, nil, socketAddr) + // Error connecting to NOTIFY_SOCKET + if err != nil { + return false, err + } + defer conn.Close() + + _, err = conn.Write([]byte(state)) + // Error sending the message + if err != nil { + return false, err + } + return true, nil +} diff --git a/vendor/github.com/coreos/go-systemd/daemon/watchdog.go b/vendor/github.com/coreos/go-systemd/daemon/watchdog.go new file mode 100644 index 0000000..35a92e6 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/daemon/watchdog.go @@ -0,0 +1,72 @@ +// Copyright 2016 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package daemon + +import ( + "fmt" + "os" + "strconv" + "time" +) + +// SdWatchdogEnabled return watchdog information for a service. +// Process should send daemon.SdNotify("WATCHDOG=1") every time / 2. +// If `unsetEnvironment` is true, the environment variables `WATCHDOG_USEC` +// and `WATCHDOG_PID` will be unconditionally unset. +// +// It returns one of the following: +// (0, nil) - watchdog isn't enabled or we aren't the watched PID. +// (0, err) - an error happened (e.g. error converting time). +// (time, nil) - watchdog is enabled and we can send ping. +// time is delay before inactive service will be killed. +func SdWatchdogEnabled(unsetEnvironment bool) (time.Duration, error) { + wusec := os.Getenv("WATCHDOG_USEC") + wpid := os.Getenv("WATCHDOG_PID") + if unsetEnvironment { + wusecErr := os.Unsetenv("WATCHDOG_USEC") + wpidErr := os.Unsetenv("WATCHDOG_PID") + if wusecErr != nil { + return 0, wusecErr + } + if wpidErr != nil { + return 0, wpidErr + } + } + + if wusec == "" { + return 0, nil + } + s, err := strconv.Atoi(wusec) + if err != nil { + return 0, fmt.Errorf("error converting WATCHDOG_USEC: %s", err) + } + if s <= 0 { + return 0, fmt.Errorf("error WATCHDOG_USEC must be a positive number") + } + interval := time.Duration(s) * time.Microsecond + + if wpid == "" { + return interval, nil + } + p, err := strconv.Atoi(wpid) + if err != nil { + return 0, fmt.Errorf("error converting WATCHDOG_PID: %s", err) + } + if os.Getpid() != p { + return 0, nil + } + + return interval, nil +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/dbus.go b/vendor/github.com/coreos/go-systemd/dbus/dbus.go new file mode 100644 index 0000000..c1694fb --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/dbus.go @@ -0,0 +1,213 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Integration with the systemd D-Bus API. See http://www.freedesktop.org/wiki/Software/systemd/dbus/ +package dbus + +import ( + "fmt" + "os" + "strconv" + "strings" + "sync" + + "github.com/godbus/dbus" +) + +const ( + alpha = `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ` + num = `0123456789` + alphanum = alpha + num + signalBuffer = 100 +) + +// needsEscape checks whether a byte in a potential dbus ObjectPath needs to be escaped +func needsEscape(i int, b byte) bool { + // Escape everything that is not a-z-A-Z-0-9 + // Also escape 0-9 if it's the first character + return strings.IndexByte(alphanum, b) == -1 || + (i == 0 && strings.IndexByte(num, b) != -1) +} + +// PathBusEscape sanitizes a constituent string of a dbus ObjectPath using the +// rules that systemd uses for serializing special characters. +func PathBusEscape(path string) string { + // Special case the empty string + if len(path) == 0 { + return "_" + } + n := []byte{} + for i := 0; i < len(path); i++ { + c := path[i] + if needsEscape(i, c) { + e := fmt.Sprintf("_%x", c) + n = append(n, []byte(e)...) + } else { + n = append(n, c) + } + } + return string(n) +} + +// Conn is a connection to systemd's dbus endpoint. +type Conn struct { + // sysconn/sysobj are only used to call dbus methods + sysconn *dbus.Conn + sysobj dbus.BusObject + + // sigconn/sigobj are only used to receive dbus signals + sigconn *dbus.Conn + sigobj dbus.BusObject + + jobListener struct { + jobs map[dbus.ObjectPath]chan<- string + sync.Mutex + } + subscriber struct { + updateCh chan<- *SubStateUpdate + errCh chan<- error + sync.Mutex + ignore map[dbus.ObjectPath]int64 + cleanIgnore int64 + } +} + +// New establishes a connection to any available bus and authenticates. +// Callers should call Close() when done with the connection. +func New() (*Conn, error) { + conn, err := NewSystemConnection() + if err != nil && os.Geteuid() == 0 { + return NewSystemdConnection() + } + return conn, err +} + +// NewSystemConnection establishes a connection to the system bus and authenticates. +// Callers should call Close() when done with the connection +func NewSystemConnection() (*Conn, error) { + return NewConnection(func() (*dbus.Conn, error) { + return dbusAuthHelloConnection(dbus.SystemBusPrivate) + }) +} + +// NewUserConnection establishes a connection to the session bus and +// authenticates. This can be used to connect to systemd user instances. +// Callers should call Close() when done with the connection. +func NewUserConnection() (*Conn, error) { + return NewConnection(func() (*dbus.Conn, error) { + return dbusAuthHelloConnection(dbus.SessionBusPrivate) + }) +} + +// NewSystemdConnection establishes a private, direct connection to systemd. +// This can be used for communicating with systemd without a dbus daemon. +// Callers should call Close() when done with the connection. +func NewSystemdConnection() (*Conn, error) { + return NewConnection(func() (*dbus.Conn, error) { + // We skip Hello when talking directly to systemd. + return dbusAuthConnection(func() (*dbus.Conn, error) { + return dbus.Dial("unix:path=/run/systemd/private") + }) + }) +} + +// Close closes an established connection +func (c *Conn) Close() { + c.sysconn.Close() + c.sigconn.Close() +} + +// NewConnection establishes a connection to a bus using a caller-supplied function. +// This allows connecting to remote buses through a user-supplied mechanism. +// The supplied function may be called multiple times, and should return independent connections. +// The returned connection must be fully initialised: the org.freedesktop.DBus.Hello call must have succeeded, +// and any authentication should be handled by the function. +func NewConnection(dialBus func() (*dbus.Conn, error)) (*Conn, error) { + sysconn, err := dialBus() + if err != nil { + return nil, err + } + + sigconn, err := dialBus() + if err != nil { + sysconn.Close() + return nil, err + } + + c := &Conn{ + sysconn: sysconn, + sysobj: systemdObject(sysconn), + sigconn: sigconn, + sigobj: systemdObject(sigconn), + } + + c.subscriber.ignore = make(map[dbus.ObjectPath]int64) + c.jobListener.jobs = make(map[dbus.ObjectPath]chan<- string) + + // Setup the listeners on jobs so that we can get completions + c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0, + "type='signal', interface='org.freedesktop.systemd1.Manager', member='JobRemoved'") + + c.dispatch() + return c, nil +} + +// GetManagerProperty returns the value of a property on the org.freedesktop.systemd1.Manager +// interface. The value is returned in its string representation, as defined at +// https://developer.gnome.org/glib/unstable/gvariant-text.html +func (c *Conn) GetManagerProperty(prop string) (string, error) { + variant, err := c.sysobj.GetProperty("org.freedesktop.systemd1.Manager." + prop) + if err != nil { + return "", err + } + return variant.String(), nil +} + +func dbusAuthConnection(createBus func() (*dbus.Conn, error)) (*dbus.Conn, error) { + conn, err := createBus() + if err != nil { + return nil, err + } + + // Only use EXTERNAL method, and hardcode the uid (not username) + // to avoid a username lookup (which requires a dynamically linked + // libc) + methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(os.Getuid()))} + + err = conn.Auth(methods) + if err != nil { + conn.Close() + return nil, err + } + + return conn, nil +} + +func dbusAuthHelloConnection(createBus func() (*dbus.Conn, error)) (*dbus.Conn, error) { + conn, err := dbusAuthConnection(createBus) + if err != nil { + return nil, err + } + + if err = conn.Hello(); err != nil { + conn.Close() + return nil, err + } + + return conn, nil +} + +func systemdObject(conn *dbus.Conn) dbus.BusObject { + return conn.Object("org.freedesktop.systemd1", dbus.ObjectPath("/org/freedesktop/systemd1")) +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/methods.go b/vendor/github.com/coreos/go-systemd/dbus/methods.go new file mode 100644 index 0000000..ab17f7c --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/methods.go @@ -0,0 +1,565 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "errors" + "path" + "strconv" + + "github.com/godbus/dbus" +) + +func (c *Conn) jobComplete(signal *dbus.Signal) { + var id uint32 + var job dbus.ObjectPath + var unit string + var result string + dbus.Store(signal.Body, &id, &job, &unit, &result) + c.jobListener.Lock() + out, ok := c.jobListener.jobs[job] + if ok { + out <- result + delete(c.jobListener.jobs, job) + } + c.jobListener.Unlock() +} + +func (c *Conn) startJob(ch chan<- string, job string, args ...interface{}) (int, error) { + if ch != nil { + c.jobListener.Lock() + defer c.jobListener.Unlock() + } + + var p dbus.ObjectPath + err := c.sysobj.Call(job, 0, args...).Store(&p) + if err != nil { + return 0, err + } + + if ch != nil { + c.jobListener.jobs[p] = ch + } + + // ignore error since 0 is fine if conversion fails + jobID, _ := strconv.Atoi(path.Base(string(p))) + + return jobID, nil +} + +// StartUnit enqueues a start job and depending jobs, if any (unless otherwise +// specified by the mode string). +// +// Takes the unit to activate, plus a mode string. The mode needs to be one of +// replace, fail, isolate, ignore-dependencies, ignore-requirements. If +// "replace" the call will start the unit and its dependencies, possibly +// replacing already queued jobs that conflict with this. If "fail" the call +// will start the unit and its dependencies, but will fail if this would change +// an already queued job. If "isolate" the call will start the unit in question +// and terminate all units that aren't dependencies of it. If +// "ignore-dependencies" it will start a unit but ignore all its dependencies. +// If "ignore-requirements" it will start a unit but only ignore the +// requirement dependencies. It is not recommended to make use of the latter +// two options. +// +// If the provided channel is non-nil, a result string will be sent to it upon +// job completion: one of done, canceled, timeout, failed, dependency, skipped. +// done indicates successful execution of a job. canceled indicates that a job +// has been canceled before it finished execution. timeout indicates that the +// job timeout was reached. failed indicates that the job failed. dependency +// indicates that a job this job has been depending on failed and the job hence +// has been removed too. skipped indicates that a job was skipped because it +// didn't apply to the units current state. +// +// If no error occurs, the ID of the underlying systemd job will be returned. There +// does exist the possibility for no error to be returned, but for the returned job +// ID to be 0. In this case, the actual underlying ID is not 0 and this datapoint +// should not be considered authoritative. +// +// If an error does occur, it will be returned to the user alongside a job ID of 0. +func (c *Conn) StartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.StartUnit", name, mode) +} + +// StopUnit is similar to StartUnit but stops the specified unit rather +// than starting it. +func (c *Conn) StopUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.StopUnit", name, mode) +} + +// ReloadUnit reloads a unit. Reloading is done only if the unit is already running and fails otherwise. +func (c *Conn) ReloadUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadUnit", name, mode) +} + +// RestartUnit restarts a service. If a service is restarted that isn't +// running it will be started. +func (c *Conn) RestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.RestartUnit", name, mode) +} + +// TryRestartUnit is like RestartUnit, except that a service that isn't running +// is not affected by the restart. +func (c *Conn) TryRestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.TryRestartUnit", name, mode) +} + +// ReloadOrRestart attempts a reload if the unit supports it and use a restart +// otherwise. +func (c *Conn) ReloadOrRestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadOrRestartUnit", name, mode) +} + +// ReloadOrTryRestart attempts a reload if the unit supports it and use a "Try" +// flavored restart otherwise. +func (c *Conn) ReloadOrTryRestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadOrTryRestartUnit", name, mode) +} + +// StartTransientUnit() may be used to create and start a transient unit, which +// will be released as soon as it is not running or referenced anymore or the +// system is rebooted. name is the unit name including suffix, and must be +// unique. mode is the same as in StartUnit(), properties contains properties +// of the unit. +func (c *Conn) StartTransientUnit(name string, mode string, properties []Property, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.StartTransientUnit", name, mode, properties, make([]PropertyCollection, 0)) +} + +// KillUnit takes the unit name and a UNIX signal number to send. All of the unit's +// processes are killed. +func (c *Conn) KillUnit(name string, signal int32) { + c.sysobj.Call("org.freedesktop.systemd1.Manager.KillUnit", 0, name, "all", signal).Store() +} + +// ResetFailedUnit resets the "failed" state of a specific unit. +func (c *Conn) ResetFailedUnit(name string) error { + return c.sysobj.Call("org.freedesktop.systemd1.Manager.ResetFailedUnit", 0, name).Store() +} + +// getProperties takes the unit name and returns all of its dbus object properties, for the given dbus interface +func (c *Conn) getProperties(unit string, dbusInterface string) (map[string]interface{}, error) { + var err error + var props map[string]dbus.Variant + + path := unitPath(unit) + if !path.IsValid() { + return nil, errors.New("invalid unit name: " + unit) + } + + obj := c.sysconn.Object("org.freedesktop.systemd1", path) + err = obj.Call("org.freedesktop.DBus.Properties.GetAll", 0, dbusInterface).Store(&props) + if err != nil { + return nil, err + } + + out := make(map[string]interface{}, len(props)) + for k, v := range props { + out[k] = v.Value() + } + + return out, nil +} + +// GetUnitProperties takes the unit name and returns all of its dbus object properties. +func (c *Conn) GetUnitProperties(unit string) (map[string]interface{}, error) { + return c.getProperties(unit, "org.freedesktop.systemd1.Unit") +} + +func (c *Conn) getProperty(unit string, dbusInterface string, propertyName string) (*Property, error) { + var err error + var prop dbus.Variant + + path := unitPath(unit) + if !path.IsValid() { + return nil, errors.New("invalid unit name: " + unit) + } + + obj := c.sysconn.Object("org.freedesktop.systemd1", path) + err = obj.Call("org.freedesktop.DBus.Properties.Get", 0, dbusInterface, propertyName).Store(&prop) + if err != nil { + return nil, err + } + + return &Property{Name: propertyName, Value: prop}, nil +} + +func (c *Conn) GetUnitProperty(unit string, propertyName string) (*Property, error) { + return c.getProperty(unit, "org.freedesktop.systemd1.Unit", propertyName) +} + +// GetServiceProperty returns property for given service name and property name +func (c *Conn) GetServiceProperty(service string, propertyName string) (*Property, error) { + return c.getProperty(service, "org.freedesktop.systemd1.Service", propertyName) +} + +// GetUnitTypeProperties returns the extra properties for a unit, specific to the unit type. +// Valid values for unitType: Service, Socket, Target, Device, Mount, Automount, Snapshot, Timer, Swap, Path, Slice, Scope +// return "dbus.Error: Unknown interface" if the unitType is not the correct type of the unit +func (c *Conn) GetUnitTypeProperties(unit string, unitType string) (map[string]interface{}, error) { + return c.getProperties(unit, "org.freedesktop.systemd1."+unitType) +} + +// SetUnitProperties() may be used to modify certain unit properties at runtime. +// Not all properties may be changed at runtime, but many resource management +// settings (primarily those in systemd.cgroup(5)) may. The changes are applied +// instantly, and stored on disk for future boots, unless runtime is true, in which +// case the settings only apply until the next reboot. name is the name of the unit +// to modify. properties are the settings to set, encoded as an array of property +// name and value pairs. +func (c *Conn) SetUnitProperties(name string, runtime bool, properties ...Property) error { + return c.sysobj.Call("org.freedesktop.systemd1.Manager.SetUnitProperties", 0, name, runtime, properties).Store() +} + +func (c *Conn) GetUnitTypeProperty(unit string, unitType string, propertyName string) (*Property, error) { + return c.getProperty(unit, "org.freedesktop.systemd1."+unitType, propertyName) +} + +type UnitStatus struct { + Name string // The primary unit name as string + Description string // The human readable description string + LoadState string // The load state (i.e. whether the unit file has been loaded successfully) + ActiveState string // The active state (i.e. whether the unit is currently started or not) + SubState string // The sub state (a more fine-grained version of the active state that is specific to the unit type, which the active state is not) + Followed string // A unit that is being followed in its state by this unit, if there is any, otherwise the empty string. + Path dbus.ObjectPath // The unit object path + JobId uint32 // If there is a job queued for the job unit the numeric job id, 0 otherwise + JobType string // The job type as string + JobPath dbus.ObjectPath // The job object path +} + +type storeFunc func(retvalues ...interface{}) error + +func (c *Conn) listUnitsInternal(f storeFunc) ([]UnitStatus, error) { + result := make([][]interface{}, 0) + err := f(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + status := make([]UnitStatus, len(result)) + statusInterface := make([]interface{}, len(status)) + for i := range status { + statusInterface[i] = &status[i] + } + + err = dbus.Store(resultInterface, statusInterface...) + if err != nil { + return nil, err + } + + return status, nil +} + +// ListUnits returns an array with all currently loaded units. Note that +// units may be known by multiple names at the same time, and hence there might +// be more unit names loaded than actual units behind them. +func (c *Conn) ListUnits() ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnits", 0).Store) +} + +// ListUnitsFiltered returns an array with units filtered by state. +// It takes a list of units' statuses to filter. +func (c *Conn) ListUnitsFiltered(states []string) ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitsFiltered", 0, states).Store) +} + +// ListUnitsByPatterns returns an array with units. +// It takes a list of units' statuses and names to filter. +// Note that units may be known by multiple names at the same time, +// and hence there might be more unit names loaded than actual units behind them. +func (c *Conn) ListUnitsByPatterns(states []string, patterns []string) ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitsByPatterns", 0, states, patterns).Store) +} + +// ListUnitsByNames returns an array with units. It takes a list of units' +// names and returns an UnitStatus array. Comparing to ListUnitsByPatterns +// method, this method returns statuses even for inactive or non-existing +// units. Input array should contain exact unit names, but not patterns. +func (c *Conn) ListUnitsByNames(units []string) ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitsByNames", 0, units).Store) +} + +type UnitFile struct { + Path string + Type string +} + +func (c *Conn) listUnitFilesInternal(f storeFunc) ([]UnitFile, error) { + result := make([][]interface{}, 0) + err := f(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + files := make([]UnitFile, len(result)) + fileInterface := make([]interface{}, len(files)) + for i := range files { + fileInterface[i] = &files[i] + } + + err = dbus.Store(resultInterface, fileInterface...) + if err != nil { + return nil, err + } + + return files, nil +} + +// ListUnitFiles returns an array of all available units on disk. +func (c *Conn) ListUnitFiles() ([]UnitFile, error) { + return c.listUnitFilesInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitFiles", 0).Store) +} + +// ListUnitFilesByPatterns returns an array of all available units on disk matched the patterns. +func (c *Conn) ListUnitFilesByPatterns(states []string, patterns []string) ([]UnitFile, error) { + return c.listUnitFilesInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitFilesByPatterns", 0, states, patterns).Store) +} + +type LinkUnitFileChange EnableUnitFileChange + +// LinkUnitFiles() links unit files (that are located outside of the +// usual unit search paths) into the unit search path. +// +// It takes a list of absolute paths to unit files to link and two +// booleans. The first boolean controls whether the unit shall be +// enabled for runtime only (true, /run), or persistently (false, +// /etc). +// The second controls whether symlinks pointing to other units shall +// be replaced if necessary. +// +// This call returns a list of the changes made. The list consists of +// structures with three strings: the type of the change (one of symlink +// or unlink), the file name of the symlink and the destination of the +// symlink. +func (c *Conn) LinkUnitFiles(files []string, runtime bool, force bool) ([]LinkUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.LinkUnitFiles", 0, files, runtime, force).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]LinkUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +// EnableUnitFiles() may be used to enable one or more units in the system (by +// creating symlinks to them in /etc or /run). +// +// It takes a list of unit files to enable (either just file names or full +// absolute paths if the unit files are residing outside the usual unit +// search paths), and two booleans: the first controls whether the unit shall +// be enabled for runtime only (true, /run), or persistently (false, /etc). +// The second one controls whether symlinks pointing to other units shall +// be replaced if necessary. +// +// This call returns one boolean and an array with the changes made. The +// boolean signals whether the unit files contained any enablement +// information (i.e. an [Install]) section. The changes list consists of +// structures with three strings: the type of the change (one of symlink +// or unlink), the file name of the symlink and the destination of the +// symlink. +func (c *Conn) EnableUnitFiles(files []string, runtime bool, force bool) (bool, []EnableUnitFileChange, error) { + var carries_install_info bool + + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.EnableUnitFiles", 0, files, runtime, force).Store(&carries_install_info, &result) + if err != nil { + return false, nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]EnableUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return false, nil, err + } + + return carries_install_info, changes, nil +} + +type EnableUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// DisableUnitFiles() may be used to disable one or more units in the system (by +// removing symlinks to them from /etc or /run). +// +// It takes a list of unit files to disable (either just file names or full +// absolute paths if the unit files are residing outside the usual unit +// search paths), and one boolean: whether the unit was enabled for runtime +// only (true, /run), or persistently (false, /etc). +// +// This call returns an array with the changes made. The changes list +// consists of structures with three strings: the type of the change (one of +// symlink or unlink), the file name of the symlink and the destination of the +// symlink. +func (c *Conn) DisableUnitFiles(files []string, runtime bool) ([]DisableUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.DisableUnitFiles", 0, files, runtime).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]DisableUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +type DisableUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// MaskUnitFiles masks one or more units in the system +// +// It takes three arguments: +// * list of units to mask (either just file names or full +// absolute paths if the unit files are residing outside +// the usual unit search paths) +// * runtime to specify whether the unit was enabled for runtime +// only (true, /run/systemd/..), or persistently (false, /etc/systemd/..) +// * force flag +func (c *Conn) MaskUnitFiles(files []string, runtime bool, force bool) ([]MaskUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.MaskUnitFiles", 0, files, runtime, force).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]MaskUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +type MaskUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// UnmaskUnitFiles unmasks one or more units in the system +// +// It takes two arguments: +// * list of unit files to mask (either just file names or full +// absolute paths if the unit files are residing outside +// the usual unit search paths) +// * runtime to specify whether the unit was enabled for runtime +// only (true, /run/systemd/..), or persistently (false, /etc/systemd/..) +func (c *Conn) UnmaskUnitFiles(files []string, runtime bool) ([]UnmaskUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.UnmaskUnitFiles", 0, files, runtime).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]UnmaskUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +type UnmaskUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// Reload instructs systemd to scan for and reload unit files. This is +// equivalent to a 'systemctl daemon-reload'. +func (c *Conn) Reload() error { + return c.sysobj.Call("org.freedesktop.systemd1.Manager.Reload", 0).Store() +} + +func unitPath(name string) dbus.ObjectPath { + return dbus.ObjectPath("/org/freedesktop/systemd1/unit/" + PathBusEscape(name)) +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/properties.go b/vendor/github.com/coreos/go-systemd/dbus/properties.go new file mode 100644 index 0000000..6c81895 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/properties.go @@ -0,0 +1,237 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "github.com/godbus/dbus" +) + +// From the systemd docs: +// +// The properties array of StartTransientUnit() may take many of the settings +// that may also be configured in unit files. Not all parameters are currently +// accepted though, but we plan to cover more properties with future release. +// Currently you may set the Description, Slice and all dependency types of +// units, as well as RemainAfterExit, ExecStart for service units, +// TimeoutStopUSec and PIDs for scope units, and CPUAccounting, CPUShares, +// BlockIOAccounting, BlockIOWeight, BlockIOReadBandwidth, +// BlockIOWriteBandwidth, BlockIODeviceWeight, MemoryAccounting, MemoryLimit, +// DevicePolicy, DeviceAllow for services/scopes/slices. These fields map +// directly to their counterparts in unit files and as normal D-Bus object +// properties. The exception here is the PIDs field of scope units which is +// used for construction of the scope only and specifies the initial PIDs to +// add to the scope object. + +type Property struct { + Name string + Value dbus.Variant +} + +type PropertyCollection struct { + Name string + Properties []Property +} + +type execStart struct { + Path string // the binary path to execute + Args []string // an array with all arguments to pass to the executed command, starting with argument 0 + UncleanIsFailure bool // a boolean whether it should be considered a failure if the process exits uncleanly +} + +// PropExecStart sets the ExecStart service property. The first argument is a +// slice with the binary path to execute followed by the arguments to pass to +// the executed command. See +// http://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecStart= +func PropExecStart(command []string, uncleanIsFailure bool) Property { + execStarts := []execStart{ + execStart{ + Path: command[0], + Args: command, + UncleanIsFailure: uncleanIsFailure, + }, + } + + return Property{ + Name: "ExecStart", + Value: dbus.MakeVariant(execStarts), + } +} + +// PropRemainAfterExit sets the RemainAfterExit service property. See +// http://www.freedesktop.org/software/systemd/man/systemd.service.html#RemainAfterExit= +func PropRemainAfterExit(b bool) Property { + return Property{ + Name: "RemainAfterExit", + Value: dbus.MakeVariant(b), + } +} + +// PropType sets the Type service property. See +// http://www.freedesktop.org/software/systemd/man/systemd.service.html#Type= +func PropType(t string) Property { + return Property{ + Name: "Type", + Value: dbus.MakeVariant(t), + } +} + +// PropDescription sets the Description unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit#Description= +func PropDescription(desc string) Property { + return Property{ + Name: "Description", + Value: dbus.MakeVariant(desc), + } +} + +func propDependency(name string, units []string) Property { + return Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + +// PropRequires sets the Requires unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Requires= +func PropRequires(units ...string) Property { + return propDependency("Requires", units) +} + +// PropRequiresOverridable sets the RequiresOverridable unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiresOverridable= +func PropRequiresOverridable(units ...string) Property { + return propDependency("RequiresOverridable", units) +} + +// PropRequisite sets the Requisite unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Requisite= +func PropRequisite(units ...string) Property { + return propDependency("Requisite", units) +} + +// PropRequisiteOverridable sets the RequisiteOverridable unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequisiteOverridable= +func PropRequisiteOverridable(units ...string) Property { + return propDependency("RequisiteOverridable", units) +} + +// PropWants sets the Wants unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Wants= +func PropWants(units ...string) Property { + return propDependency("Wants", units) +} + +// PropBindsTo sets the BindsTo unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#BindsTo= +func PropBindsTo(units ...string) Property { + return propDependency("BindsTo", units) +} + +// PropRequiredBy sets the RequiredBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiredBy= +func PropRequiredBy(units ...string) Property { + return propDependency("RequiredBy", units) +} + +// PropRequiredByOverridable sets the RequiredByOverridable unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiredByOverridable= +func PropRequiredByOverridable(units ...string) Property { + return propDependency("RequiredByOverridable", units) +} + +// PropWantedBy sets the WantedBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#WantedBy= +func PropWantedBy(units ...string) Property { + return propDependency("WantedBy", units) +} + +// PropBoundBy sets the BoundBy unit property. See +// http://www.freedesktop.org/software/systemd/main/systemd.unit.html#BoundBy= +func PropBoundBy(units ...string) Property { + return propDependency("BoundBy", units) +} + +// PropConflicts sets the Conflicts unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Conflicts= +func PropConflicts(units ...string) Property { + return propDependency("Conflicts", units) +} + +// PropConflictedBy sets the ConflictedBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#ConflictedBy= +func PropConflictedBy(units ...string) Property { + return propDependency("ConflictedBy", units) +} + +// PropBefore sets the Before unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Before= +func PropBefore(units ...string) Property { + return propDependency("Before", units) +} + +// PropAfter sets the After unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#After= +func PropAfter(units ...string) Property { + return propDependency("After", units) +} + +// PropOnFailure sets the OnFailure unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#OnFailure= +func PropOnFailure(units ...string) Property { + return propDependency("OnFailure", units) +} + +// PropTriggers sets the Triggers unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Triggers= +func PropTriggers(units ...string) Property { + return propDependency("Triggers", units) +} + +// PropTriggeredBy sets the TriggeredBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#TriggeredBy= +func PropTriggeredBy(units ...string) Property { + return propDependency("TriggeredBy", units) +} + +// PropPropagatesReloadTo sets the PropagatesReloadTo unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#PropagatesReloadTo= +func PropPropagatesReloadTo(units ...string) Property { + return propDependency("PropagatesReloadTo", units) +} + +// PropRequiresMountsFor sets the RequiresMountsFor unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiresMountsFor= +func PropRequiresMountsFor(units ...string) Property { + return propDependency("RequiresMountsFor", units) +} + +// PropSlice sets the Slice unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.resource-control.html#Slice= +func PropSlice(slice string) Property { + return Property{ + Name: "Slice", + Value: dbus.MakeVariant(slice), + } +} + +// PropPids sets the PIDs field of scope units used in the initial construction +// of the scope only and specifies the initial PIDs to add to the scope object. +// See https://www.freedesktop.org/wiki/Software/systemd/ControlGroupInterface/#properties +func PropPids(pids ...uint32) Property { + return Property{ + Name: "PIDs", + Value: dbus.MakeVariant(pids), + } +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/set.go b/vendor/github.com/coreos/go-systemd/dbus/set.go new file mode 100644 index 0000000..f92e6fb --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/set.go @@ -0,0 +1,47 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +type set struct { + data map[string]bool +} + +func (s *set) Add(value string) { + s.data[value] = true +} + +func (s *set) Remove(value string) { + delete(s.data, value) +} + +func (s *set) Contains(value string) (exists bool) { + _, exists = s.data[value] + return +} + +func (s *set) Length() int { + return len(s.data) +} + +func (s *set) Values() (values []string) { + for val, _ := range s.data { + values = append(values, val) + } + return +} + +func newSet() *set { + return &set{make(map[string]bool)} +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/subscription.go b/vendor/github.com/coreos/go-systemd/dbus/subscription.go new file mode 100644 index 0000000..9964514 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/subscription.go @@ -0,0 +1,250 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "errors" + "time" + + "github.com/godbus/dbus" +) + +const ( + cleanIgnoreInterval = int64(10 * time.Second) + ignoreInterval = int64(30 * time.Millisecond) +) + +// Subscribe sets up this connection to subscribe to all systemd dbus events. +// This is required before calling SubscribeUnits. When the connection closes +// systemd will automatically stop sending signals so there is no need to +// explicitly call Unsubscribe(). +func (c *Conn) Subscribe() error { + c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0, + "type='signal',interface='org.freedesktop.systemd1.Manager',member='UnitNew'") + c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0, + "type='signal',interface='org.freedesktop.DBus.Properties',member='PropertiesChanged'") + + err := c.sigobj.Call("org.freedesktop.systemd1.Manager.Subscribe", 0).Store() + if err != nil { + return err + } + + return nil +} + +// Unsubscribe this connection from systemd dbus events. +func (c *Conn) Unsubscribe() error { + err := c.sigobj.Call("org.freedesktop.systemd1.Manager.Unsubscribe", 0).Store() + if err != nil { + return err + } + + return nil +} + +func (c *Conn) dispatch() { + ch := make(chan *dbus.Signal, signalBuffer) + + c.sigconn.Signal(ch) + + go func() { + for { + signal, ok := <-ch + if !ok { + return + } + + if signal.Name == "org.freedesktop.systemd1.Manager.JobRemoved" { + c.jobComplete(signal) + } + + if c.subscriber.updateCh == nil { + continue + } + + var unitPath dbus.ObjectPath + switch signal.Name { + case "org.freedesktop.systemd1.Manager.JobRemoved": + unitName := signal.Body[2].(string) + c.sysobj.Call("org.freedesktop.systemd1.Manager.GetUnit", 0, unitName).Store(&unitPath) + case "org.freedesktop.systemd1.Manager.UnitNew": + unitPath = signal.Body[1].(dbus.ObjectPath) + case "org.freedesktop.DBus.Properties.PropertiesChanged": + if signal.Body[0].(string) == "org.freedesktop.systemd1.Unit" { + unitPath = signal.Path + } + } + + if unitPath == dbus.ObjectPath("") { + continue + } + + c.sendSubStateUpdate(unitPath) + } + }() +} + +// Returns two unbuffered channels which will receive all changed units every +// interval. Deleted units are sent as nil. +func (c *Conn) SubscribeUnits(interval time.Duration) (<-chan map[string]*UnitStatus, <-chan error) { + return c.SubscribeUnitsCustom(interval, 0, func(u1, u2 *UnitStatus) bool { return *u1 != *u2 }, nil) +} + +// SubscribeUnitsCustom is like SubscribeUnits but lets you specify the buffer +// size of the channels, the comparison function for detecting changes and a filter +// function for cutting down on the noise that your channel receives. +func (c *Conn) SubscribeUnitsCustom(interval time.Duration, buffer int, isChanged func(*UnitStatus, *UnitStatus) bool, filterUnit func(string) bool) (<-chan map[string]*UnitStatus, <-chan error) { + old := make(map[string]*UnitStatus) + statusChan := make(chan map[string]*UnitStatus, buffer) + errChan := make(chan error, buffer) + + go func() { + for { + timerChan := time.After(interval) + + units, err := c.ListUnits() + if err == nil { + cur := make(map[string]*UnitStatus) + for i := range units { + if filterUnit != nil && filterUnit(units[i].Name) { + continue + } + cur[units[i].Name] = &units[i] + } + + // add all new or changed units + changed := make(map[string]*UnitStatus) + for n, u := range cur { + if oldU, ok := old[n]; !ok || isChanged(oldU, u) { + changed[n] = u + } + delete(old, n) + } + + // add all deleted units + for oldN := range old { + changed[oldN] = nil + } + + old = cur + + if len(changed) != 0 { + statusChan <- changed + } + } else { + errChan <- err + } + + <-timerChan + } + }() + + return statusChan, errChan +} + +type SubStateUpdate struct { + UnitName string + SubState string +} + +// SetSubStateSubscriber writes to updateCh when any unit's substate changes. +// Although this writes to updateCh on every state change, the reported state +// may be more recent than the change that generated it (due to an unavoidable +// race in the systemd dbus interface). That is, this method provides a good +// way to keep a current view of all units' states, but is not guaranteed to +// show every state transition they go through. Furthermore, state changes +// will only be written to the channel with non-blocking writes. If updateCh +// is full, it attempts to write an error to errCh; if errCh is full, the error +// passes silently. +func (c *Conn) SetSubStateSubscriber(updateCh chan<- *SubStateUpdate, errCh chan<- error) { + c.subscriber.Lock() + defer c.subscriber.Unlock() + c.subscriber.updateCh = updateCh + c.subscriber.errCh = errCh +} + +func (c *Conn) sendSubStateUpdate(path dbus.ObjectPath) { + c.subscriber.Lock() + defer c.subscriber.Unlock() + + if c.shouldIgnore(path) { + return + } + + info, err := c.GetUnitProperties(string(path)) + if err != nil { + select { + case c.subscriber.errCh <- err: + default: + } + } + + name := info["Id"].(string) + substate := info["SubState"].(string) + + update := &SubStateUpdate{name, substate} + select { + case c.subscriber.updateCh <- update: + default: + select { + case c.subscriber.errCh <- errors.New("update channel full!"): + default: + } + } + + c.updateIgnore(path, info) +} + +// The ignore functions work around a wart in the systemd dbus interface. +// Requesting the properties of an unloaded unit will cause systemd to send a +// pair of UnitNew/UnitRemoved signals. Because we need to get a unit's +// properties on UnitNew (as that's the only indication of a new unit coming up +// for the first time), we would enter an infinite loop if we did not attempt +// to detect and ignore these spurious signals. The signal themselves are +// indistinguishable from relevant ones, so we (somewhat hackishly) ignore an +// unloaded unit's signals for a short time after requesting its properties. +// This means that we will miss e.g. a transient unit being restarted +// *immediately* upon failure and also a transient unit being started +// immediately after requesting its status (with systemctl status, for example, +// because this causes a UnitNew signal to be sent which then causes us to fetch +// the properties). + +func (c *Conn) shouldIgnore(path dbus.ObjectPath) bool { + t, ok := c.subscriber.ignore[path] + return ok && t >= time.Now().UnixNano() +} + +func (c *Conn) updateIgnore(path dbus.ObjectPath, info map[string]interface{}) { + c.cleanIgnore() + + // unit is unloaded - it will trigger bad systemd dbus behavior + if info["LoadState"].(string) == "not-found" { + c.subscriber.ignore[path] = time.Now().UnixNano() + ignoreInterval + } +} + +// without this, ignore would grow unboundedly over time +func (c *Conn) cleanIgnore() { + now := time.Now().UnixNano() + if c.subscriber.cleanIgnore < now { + c.subscriber.cleanIgnore = now + cleanIgnoreInterval + + for p, t := range c.subscriber.ignore { + if t < now { + delete(c.subscriber.ignore, p) + } + } + } +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/subscription_set.go b/vendor/github.com/coreos/go-systemd/dbus/subscription_set.go new file mode 100644 index 0000000..5b408d5 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/subscription_set.go @@ -0,0 +1,57 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "time" +) + +// SubscriptionSet returns a subscription set which is like conn.Subscribe but +// can filter to only return events for a set of units. +type SubscriptionSet struct { + *set + conn *Conn +} + +func (s *SubscriptionSet) filter(unit string) bool { + return !s.Contains(unit) +} + +// Subscribe starts listening for dbus events for all of the units in the set. +// Returns channels identical to conn.SubscribeUnits. +func (s *SubscriptionSet) Subscribe() (<-chan map[string]*UnitStatus, <-chan error) { + // TODO: Make fully evented by using systemd 209 with properties changed values + return s.conn.SubscribeUnitsCustom(time.Second, 0, + mismatchUnitStatus, + func(unit string) bool { return s.filter(unit) }, + ) +} + +// NewSubscriptionSet returns a new subscription set. +func (conn *Conn) NewSubscriptionSet() *SubscriptionSet { + return &SubscriptionSet{newSet(), conn} +} + +// mismatchUnitStatus returns true if the provided UnitStatus objects +// are not equivalent. false is returned if the objects are equivalent. +// Only the Name, Description and state-related fields are used in +// the comparison. +func mismatchUnitStatus(u1, u2 *UnitStatus) bool { + return u1.Name != u2.Name || + u1.Description != u2.Description || + u1.LoadState != u2.LoadState || + u1.ActiveState != u2.ActiveState || + u1.SubState != u2.SubState +} diff --git a/vendor/github.com/coreos/go-systemd/journal/journal.go b/vendor/github.com/coreos/go-systemd/journal/journal.go new file mode 100644 index 0000000..7f43499 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/journal/journal.go @@ -0,0 +1,179 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package journal provides write bindings to the local systemd journal. +// It is implemented in pure Go and connects to the journal directly over its +// unix socket. +// +// To read from the journal, see the "sdjournal" package, which wraps the +// sd-journal a C API. +// +// http://www.freedesktop.org/software/systemd/man/systemd-journald.service.html +package journal + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "io/ioutil" + "net" + "os" + "strconv" + "strings" + "syscall" +) + +// Priority of a journal message +type Priority int + +const ( + PriEmerg Priority = iota + PriAlert + PriCrit + PriErr + PriWarning + PriNotice + PriInfo + PriDebug +) + +var conn net.Conn + +func init() { + var err error + conn, err = net.Dial("unixgram", "/run/systemd/journal/socket") + if err != nil { + conn = nil + } +} + +// Enabled returns true if the local systemd journal is available for logging +func Enabled() bool { + return conn != nil +} + +// Send a message to the local systemd journal. vars is a map of journald +// fields to values. Fields must be composed of uppercase letters, numbers, +// and underscores, but must not start with an underscore. Within these +// restrictions, any arbitrary field name may be used. Some names have special +// significance: see the journalctl documentation +// (http://www.freedesktop.org/software/systemd/man/systemd.journal-fields.html) +// for more details. vars may be nil. +func Send(message string, priority Priority, vars map[string]string) error { + if conn == nil { + return journalError("could not connect to journald socket") + } + + data := new(bytes.Buffer) + appendVariable(data, "PRIORITY", strconv.Itoa(int(priority))) + appendVariable(data, "MESSAGE", message) + for k, v := range vars { + appendVariable(data, k, v) + } + + _, err := io.Copy(conn, data) + if err != nil && isSocketSpaceError(err) { + file, err := tempFd() + if err != nil { + return journalError(err.Error()) + } + defer file.Close() + _, err = io.Copy(file, data) + if err != nil { + return journalError(err.Error()) + } + + rights := syscall.UnixRights(int(file.Fd())) + + /* this connection should always be a UnixConn, but better safe than sorry */ + unixConn, ok := conn.(*net.UnixConn) + if !ok { + return journalError("can't send file through non-Unix connection") + } + unixConn.WriteMsgUnix([]byte{}, rights, nil) + } else if err != nil { + return journalError(err.Error()) + } + return nil +} + +// Print prints a message to the local systemd journal using Send(). +func Print(priority Priority, format string, a ...interface{}) error { + return Send(fmt.Sprintf(format, a...), priority, nil) +} + +func appendVariable(w io.Writer, name, value string) { + if !validVarName(name) { + journalError("variable name contains invalid character, ignoring") + } + if strings.ContainsRune(value, '\n') { + /* When the value contains a newline, we write: + * - the variable name, followed by a newline + * - the size (in 64bit little endian format) + * - the data, followed by a newline + */ + fmt.Fprintln(w, name) + binary.Write(w, binary.LittleEndian, uint64(len(value))) + fmt.Fprintln(w, value) + } else { + /* just write the variable and value all on one line */ + fmt.Fprintf(w, "%s=%s\n", name, value) + } +} + +func validVarName(name string) bool { + /* The variable name must be in uppercase and consist only of characters, + * numbers and underscores, and may not begin with an underscore. (from the docs) + */ + + valid := name[0] != '_' + for _, c := range name { + valid = valid && ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_' + } + return valid +} + +func isSocketSpaceError(err error) bool { + opErr, ok := err.(*net.OpError) + if !ok { + return false + } + + sysErr, ok := opErr.Err.(syscall.Errno) + if !ok { + return false + } + + return sysErr == syscall.EMSGSIZE || sysErr == syscall.ENOBUFS +} + +func tempFd() (*os.File, error) { + file, err := ioutil.TempFile("/dev/shm/", "journal.XXXXX") + if err != nil { + return nil, err + } + syscall.Unlink(file.Name()) + if err != nil { + return nil, err + } + return file, nil +} + +func journalError(s string) error { + s = "journal error: " + s + fmt.Fprintln(os.Stderr, s) + return errors.New(s) +} diff --git a/vendor/github.com/coreos/go-systemd/util/util.go b/vendor/github.com/coreos/go-systemd/util/util.go new file mode 100644 index 0000000..7828ce6 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/util/util.go @@ -0,0 +1,90 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package util contains utility functions related to systemd that applications +// can use to check things like whether systemd is running. Note that some of +// these functions attempt to manually load systemd libraries at runtime rather +// than linking against them. +package util + +import ( + "fmt" + "io/ioutil" + "os" + "strings" +) + +var ( + ErrNoCGO = fmt.Errorf("go-systemd built with CGO disabled") +) + +// GetRunningSlice attempts to retrieve the name of the systemd slice in which +// the current process is running. +// This function is a wrapper around the libsystemd C library; if it cannot be +// opened, an error is returned. +func GetRunningSlice() (string, error) { + return getRunningSlice() +} + +// RunningFromSystemService tries to detect whether the current process has +// been invoked from a system service. The condition for this is whether the +// process is _not_ a user process. User processes are those running in session +// scopes or under per-user `systemd --user` instances. +// +// To avoid false positives on systems without `pam_systemd` (which is +// responsible for creating user sessions), this function also uses a heuristic +// to detect whether it's being invoked from a session leader process. This is +// the case if the current process is executed directly from a service file +// (e.g. with `ExecStart=/this/cmd`). Note that this heuristic will fail if the +// command is instead launched in a subshell or similar so that it is not +// session leader (e.g. `ExecStart=/bin/bash -c "/this/cmd"`) +// +// This function is a wrapper around the libsystemd C library; if this is +// unable to successfully open a handle to the library for any reason (e.g. it +// cannot be found), an error will be returned. +func RunningFromSystemService() (bool, error) { + return runningFromSystemService() +} + +// CurrentUnitName attempts to retrieve the name of the systemd system unit +// from which the calling process has been invoked. It wraps the systemd +// `sd_pid_get_unit` call, with the same caveat: for processes not part of a +// systemd system unit, this function will return an error. +func CurrentUnitName() (string, error) { + return currentUnitName() +} + +// IsRunningSystemd checks whether the host was booted with systemd as its init +// system. This functions similarly to systemd's `sd_booted(3)`: internally, it +// checks whether /run/systemd/system/ exists and is a directory. +// http://www.freedesktop.org/software/systemd/man/sd_booted.html +func IsRunningSystemd() bool { + fi, err := os.Lstat("/run/systemd/system") + if err != nil { + return false + } + return fi.IsDir() +} + +// GetMachineID returns a host's 128-bit machine ID as a string. This functions +// similarly to systemd's `sd_id128_get_machine`: internally, it simply reads +// the contents of /etc/machine-id +// http://www.freedesktop.org/software/systemd/man/sd_id128_get_machine.html +func GetMachineID() (string, error) { + machineID, err := ioutil.ReadFile("/etc/machine-id") + if err != nil { + return "", fmt.Errorf("failed to read /etc/machine-id: %v", err) + } + return strings.TrimSpace(string(machineID)), nil +} diff --git a/vendor/github.com/coreos/go-systemd/util/util_cgo.go b/vendor/github.com/coreos/go-systemd/util/util_cgo.go new file mode 100644 index 0000000..22c0d60 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/util/util_cgo.go @@ -0,0 +1,174 @@ +// Copyright 2016 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo + +package util + +// #include <stdlib.h> +// #include <sys/types.h> +// #include <unistd.h> +// +// int +// my_sd_pid_get_owner_uid(void *f, pid_t pid, uid_t *uid) +// { +// int (*sd_pid_get_owner_uid)(pid_t, uid_t *); +// +// sd_pid_get_owner_uid = (int (*)(pid_t, uid_t *))f; +// return sd_pid_get_owner_uid(pid, uid); +// } +// +// int +// my_sd_pid_get_unit(void *f, pid_t pid, char **unit) +// { +// int (*sd_pid_get_unit)(pid_t, char **); +// +// sd_pid_get_unit = (int (*)(pid_t, char **))f; +// return sd_pid_get_unit(pid, unit); +// } +// +// int +// my_sd_pid_get_slice(void *f, pid_t pid, char **slice) +// { +// int (*sd_pid_get_slice)(pid_t, char **); +// +// sd_pid_get_slice = (int (*)(pid_t, char **))f; +// return sd_pid_get_slice(pid, slice); +// } +// +// int +// am_session_leader() +// { +// return (getsid(0) == getpid()); +// } +import "C" +import ( + "fmt" + "syscall" + "unsafe" + + "github.com/coreos/pkg/dlopen" +) + +var libsystemdNames = []string{ + // systemd < 209 + "libsystemd-login.so.0", + "libsystemd-login.so", + + // systemd >= 209 merged libsystemd-login into libsystemd proper + "libsystemd.so.0", + "libsystemd.so", +} + +func getRunningSlice() (slice string, err error) { + var h *dlopen.LibHandle + h, err = dlopen.GetHandle(libsystemdNames) + if err != nil { + return + } + defer func() { + if err1 := h.Close(); err1 != nil { + err = err1 + } + }() + + sd_pid_get_slice, err := h.GetSymbolPointer("sd_pid_get_slice") + if err != nil { + return + } + + var s string + sl := C.CString(s) + defer C.free(unsafe.Pointer(sl)) + + ret := C.my_sd_pid_get_slice(sd_pid_get_slice, 0, &sl) + if ret < 0 { + err = fmt.Errorf("error calling sd_pid_get_slice: %v", syscall.Errno(-ret)) + return + } + + return C.GoString(sl), nil +} + +func runningFromSystemService() (ret bool, err error) { + var h *dlopen.LibHandle + h, err = dlopen.GetHandle(libsystemdNames) + if err != nil { + return + } + defer func() { + if err1 := h.Close(); err1 != nil { + err = err1 + } + }() + + sd_pid_get_owner_uid, err := h.GetSymbolPointer("sd_pid_get_owner_uid") + if err != nil { + return + } + + var uid C.uid_t + errno := C.my_sd_pid_get_owner_uid(sd_pid_get_owner_uid, 0, &uid) + serrno := syscall.Errno(-errno) + // when we're running from a unit file, sd_pid_get_owner_uid returns + // ENOENT (systemd <220) or ENXIO (systemd >=220) + switch { + case errno >= 0: + ret = false + case serrno == syscall.ENOENT, serrno == syscall.ENXIO: + // Since the implementation of sessions in systemd relies on + // the `pam_systemd` module, using the sd_pid_get_owner_uid + // heuristic alone can result in false positives if that module + // (or PAM itself) is not present or properly configured on the + // system. As such, we also check if we're the session leader, + // which should be the case if we're invoked from a unit file, + // but not if e.g. we're invoked from the command line from a + // user's login session + ret = C.am_session_leader() == 1 + default: + err = fmt.Errorf("error calling sd_pid_get_owner_uid: %v", syscall.Errno(-errno)) + } + return +} + +func currentUnitName() (unit string, err error) { + var h *dlopen.LibHandle + h, err = dlopen.GetHandle(libsystemdNames) + if err != nil { + return + } + defer func() { + if err1 := h.Close(); err1 != nil { + err = err1 + } + }() + + sd_pid_get_unit, err := h.GetSymbolPointer("sd_pid_get_unit") + if err != nil { + return + } + + var s string + u := C.CString(s) + defer C.free(unsafe.Pointer(u)) + + ret := C.my_sd_pid_get_unit(sd_pid_get_unit, 0, &u) + if ret < 0 { + err = fmt.Errorf("error calling sd_pid_get_unit: %v", syscall.Errno(-ret)) + return + } + + unit = C.GoString(u) + return +} diff --git a/vendor/github.com/coreos/go-systemd/util/util_stub.go b/vendor/github.com/coreos/go-systemd/util/util_stub.go new file mode 100644 index 0000000..477589e --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/util/util_stub.go @@ -0,0 +1,23 @@ +// Copyright 2016 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !cgo + +package util + +func getRunningSlice() (string, error) { return "", ErrNoCGO } + +func runningFromSystemService() (bool, error) { return false, ErrNoCGO } + +func currentUnitName() (string, error) { return "", ErrNoCGO } diff --git a/vendor/github.com/coreos/pkg/LICENSE b/vendor/github.com/coreos/pkg/LICENSE new file mode 100644 index 0000000..e06d208 --- /dev/null +++ b/vendor/github.com/coreos/pkg/LICENSE @@ -0,0 +1,202 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/vendor/github.com/coreos/pkg/NOTICE b/vendor/github.com/coreos/pkg/NOTICE new file mode 100644 index 0000000..b39ddfa --- /dev/null +++ b/vendor/github.com/coreos/pkg/NOTICE @@ -0,0 +1,5 @@ +CoreOS Project +Copyright 2014 CoreOS, Inc + +This product includes software developed at CoreOS, Inc. +(http://www.coreos.com/). diff --git a/vendor/github.com/coreos/pkg/README.md b/vendor/github.com/coreos/pkg/README.md new file mode 100644 index 0000000..ca68a07 --- /dev/null +++ b/vendor/github.com/coreos/pkg/README.md @@ -0,0 +1,4 @@ +a collection of go utility packages + +[![Build Status](https://travis-ci.org/coreos/pkg.png?branch=master)](https://travis-ci.org/coreos/pkg) +[![Godoc](http://img.shields.io/badge/godoc-reference-blue.svg?style=flat)](https://godoc.org/github.com/coreos/pkg) diff --git a/vendor/github.com/coreos/pkg/capnslog/README.md b/vendor/github.com/coreos/pkg/capnslog/README.md new file mode 100644 index 0000000..81efb1f --- /dev/null +++ b/vendor/github.com/coreos/pkg/capnslog/README.md @@ -0,0 +1,39 @@ +# capnslog, the CoreOS logging package + +There are far too many logging packages out there, with varying degrees of licenses, far too many features (colorization, all sorts of log frameworks) or are just a pain to use (lack of `Fatalln()`?). +capnslog provides a simple but consistent logging interface suitable for all kinds of projects. + +### Design Principles + +##### `package main` is the place where logging gets turned on and routed + +A library should not touch log options, only generate log entries. Libraries are silent until main lets them speak. + +##### All log options are runtime-configurable. + +Still the job of `main` to expose these configurations. `main` may delegate this to, say, a configuration webhook, but does so explicitly. + +##### There is one log object per package. It is registered under its repository and package name. + +`main` activates logging for its repository and any dependency repositories it would also like to have output in its logstream. `main` also dictates at which level each subpackage logs. + +##### There is *one* output stream, and it is an `io.Writer` composed with a formatter. + +Splitting streams is probably not the job of your program, but rather, your log aggregation framework. If you must split output streams, again, `main` configures this and you can write a very simple two-output struct that satisfies io.Writer. + +Fancy colorful formatting and JSON output are beyond the scope of a basic logging framework -- they're application/log-collector dependant. These are, at best, provided as options, but more likely, provided by your application. + +##### Log objects are an interface + +An object knows best how to print itself. Log objects can collect more interesting metadata if they wish, however, because text isn't going away anytime soon, they must all be marshalable to text. The simplest log object is a string, which returns itself. If you wish to do more fancy tricks for printing your log objects, see also JSON output -- introspect and write a formatter which can handle your advanced log interface. Making strings is the only thing guaranteed. + +##### Log levels have specific meanings: + + * Critical: Unrecoverable. Must fail. + * Error: Data has been lost, a request has failed for a bad reason, or a required resource has been lost + * Warning: (Hopefully) Temporary conditions that may cause errors, but may work fine. A replica disappearing (that may reconnect) is a warning. + * Notice: Normal, but important (uncommon) log information. + * Info: Normal, working log information, everything is fine, but helpful notices for auditing or common operations. + * Debug: Everything is still fine, but even common operations may be logged, and less helpful but more quantity of notices. + * Trace: Anything goes, from logging every function call as part of a common operation, to tracing execution of a query. + diff --git a/vendor/github.com/coreos/pkg/capnslog/formatters.go b/vendor/github.com/coreos/pkg/capnslog/formatters.go new file mode 100644 index 0000000..b305a84 --- /dev/null +++ b/vendor/github.com/coreos/pkg/capnslog/formatters.go @@ -0,0 +1,157 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package capnslog + +import ( + "bufio" + "fmt" + "io" + "log" + "runtime" + "strings" + "time" +) + +type Formatter interface { + Format(pkg string, level LogLevel, depth int, entries ...interface{}) + Flush() +} + +func NewStringFormatter(w io.Writer) Formatter { + return &StringFormatter{ + w: bufio.NewWriter(w), + } +} + +type StringFormatter struct { + w *bufio.Writer +} + +func (s *StringFormatter) Format(pkg string, l LogLevel, i int, entries ...interface{}) { + now := time.Now().UTC() + s.w.WriteString(now.Format(time.RFC3339)) + s.w.WriteByte(' ') + writeEntries(s.w, pkg, l, i, entries...) + s.Flush() +} + +func writeEntries(w *bufio.Writer, pkg string, _ LogLevel, _ int, entries ...interface{}) { + if pkg != "" { + w.WriteString(pkg + ": ") + } + str := fmt.Sprint(entries...) + endsInNL := strings.HasSuffix(str, "\n") + w.WriteString(str) + if !endsInNL { + w.WriteString("\n") + } +} + +func (s *StringFormatter) Flush() { + s.w.Flush() +} + +func NewPrettyFormatter(w io.Writer, debug bool) Formatter { + return &PrettyFormatter{ + w: bufio.NewWriter(w), + debug: debug, + } +} + +type PrettyFormatter struct { + w *bufio.Writer + debug bool +} + +func (c *PrettyFormatter) Format(pkg string, l LogLevel, depth int, entries ...interface{}) { + now := time.Now() + ts := now.Format("2006-01-02 15:04:05") + c.w.WriteString(ts) + ms := now.Nanosecond() / 1000 + c.w.WriteString(fmt.Sprintf(".%06d", ms)) + if c.debug { + _, file, line, ok := runtime.Caller(depth) // It's always the same number of frames to the user's call. + if !ok { + file = "???" + line = 1 + } else { + slash := strings.LastIndex(file, "/") + if slash >= 0 { + file = file[slash+1:] + } + } + if line < 0 { + line = 0 // not a real line number + } + c.w.WriteString(fmt.Sprintf(" [%s:%d]", file, line)) + } + c.w.WriteString(fmt.Sprint(" ", l.Char(), " | ")) + writeEntries(c.w, pkg, l, depth, entries...) + c.Flush() +} + +func (c *PrettyFormatter) Flush() { + c.w.Flush() +} + +// LogFormatter emulates the form of the traditional built-in logger. +type LogFormatter struct { + logger *log.Logger + prefix string +} + +// NewLogFormatter is a helper to produce a new LogFormatter struct. It uses the +// golang log package to actually do the logging work so that logs look similar. +func NewLogFormatter(w io.Writer, prefix string, flag int) Formatter { + return &LogFormatter{ + logger: log.New(w, "", flag), // don't use prefix here + prefix: prefix, // save it instead + } +} + +// Format builds a log message for the LogFormatter. The LogLevel is ignored. +func (lf *LogFormatter) Format(pkg string, _ LogLevel, _ int, entries ...interface{}) { + str := fmt.Sprint(entries...) + prefix := lf.prefix + if pkg != "" { + prefix = fmt.Sprintf("%s%s: ", prefix, pkg) + } + lf.logger.Output(5, fmt.Sprintf("%s%v", prefix, str)) // call depth is 5 +} + +// Flush is included so that the interface is complete, but is a no-op. +func (lf *LogFormatter) Flush() { + // noop +} + +// NilFormatter is a no-op log formatter that does nothing. +type NilFormatter struct { +} + +// NewNilFormatter is a helper to produce a new LogFormatter struct. It logs no +// messages so that you can cause part of your logging to be silent. +func NewNilFormatter() Formatter { + return &NilFormatter{} +} + +// Format does nothing. +func (_ *NilFormatter) Format(_ string, _ LogLevel, _ int, _ ...interface{}) { + // noop +} + +// Flush is included so that the interface is complete, but is a no-op. +func (_ *NilFormatter) Flush() { + // noop +} diff --git a/vendor/github.com/coreos/pkg/capnslog/glog_formatter.go b/vendor/github.com/coreos/pkg/capnslog/glog_formatter.go new file mode 100644 index 0000000..426603e --- /dev/null +++ b/vendor/github.com/coreos/pkg/capnslog/glog_formatter.go @@ -0,0 +1,96 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package capnslog + +import ( + "bufio" + "bytes" + "io" + "os" + "runtime" + "strconv" + "strings" + "time" +) + +var pid = os.Getpid() + +type GlogFormatter struct { + StringFormatter +} + +func NewGlogFormatter(w io.Writer) *GlogFormatter { + g := &GlogFormatter{} + g.w = bufio.NewWriter(w) + return g +} + +func (g GlogFormatter) Format(pkg string, level LogLevel, depth int, entries ...interface{}) { + g.w.Write(GlogHeader(level, depth+1)) + g.StringFormatter.Format(pkg, level, depth+1, entries...) +} + +func GlogHeader(level LogLevel, depth int) []byte { + // Lmmdd hh:mm:ss.uuuuuu threadid file:line] + now := time.Now().UTC() + _, file, line, ok := runtime.Caller(depth) // It's always the same number of frames to the user's call. + if !ok { + file = "???" + line = 1 + } else { + slash := strings.LastIndex(file, "/") + if slash >= 0 { + file = file[slash+1:] + } + } + if line < 0 { + line = 0 // not a real line number + } + buf := &bytes.Buffer{} + buf.Grow(30) + _, month, day := now.Date() + hour, minute, second := now.Clock() + buf.WriteString(level.Char()) + twoDigits(buf, int(month)) + twoDigits(buf, day) + buf.WriteByte(' ') + twoDigits(buf, hour) + buf.WriteByte(':') + twoDigits(buf, minute) + buf.WriteByte(':') + twoDigits(buf, second) + buf.WriteByte('.') + buf.WriteString(strconv.Itoa(now.Nanosecond() / 1000)) + buf.WriteByte('Z') + buf.WriteByte(' ') + buf.WriteString(strconv.Itoa(pid)) + buf.WriteByte(' ') + buf.WriteString(file) + buf.WriteByte(':') + buf.WriteString(strconv.Itoa(line)) + buf.WriteByte(']') + buf.WriteByte(' ') + return buf.Bytes() +} + +const digits = "0123456789" + +func twoDigits(b *bytes.Buffer, d int) { + c2 := digits[d%10] + d /= 10 + c1 := digits[d%10] + b.WriteByte(c1) + b.WriteByte(c2) +} diff --git a/vendor/github.com/coreos/pkg/capnslog/init.go b/vendor/github.com/coreos/pkg/capnslog/init.go new file mode 100644 index 0000000..44b8cd3 --- /dev/null +++ b/vendor/github.com/coreos/pkg/capnslog/init.go @@ -0,0 +1,49 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +build !windows + +package capnslog + +import ( + "io" + "os" + "syscall" +) + +// Here's where the opinionation comes in. We need some sensible defaults, +// especially after taking over the log package. Your project (whatever it may +// be) may see things differently. That's okay; there should be no defaults in +// the main package that cannot be controlled or overridden programatically, +// otherwise it's a bug. Doing so is creating your own init_log.go file much +// like this one. + +func init() { + initHijack() + + // Go `log` pacakge uses os.Stderr. + SetFormatter(NewDefaultFormatter(os.Stderr)) + SetGlobalLogLevel(INFO) +} + +func NewDefaultFormatter(out io.Writer) Formatter { + if syscall.Getppid() == 1 { + // We're running under init, which may be systemd. + f, err := NewJournaldFormatter() + if err == nil { + return f + } + } + return NewPrettyFormatter(out, false) +} diff --git a/vendor/github.com/coreos/pkg/capnslog/init_windows.go b/vendor/github.com/coreos/pkg/capnslog/init_windows.go new file mode 100644 index 0000000..4553050 --- /dev/null +++ b/vendor/github.com/coreos/pkg/capnslog/init_windows.go @@ -0,0 +1,25 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package capnslog + +import "os" + +func init() { + initHijack() + + // Go `log` package uses os.Stderr. + SetFormatter(NewPrettyFormatter(os.Stderr, false)) + SetGlobalLogLevel(INFO) +} diff --git a/vendor/github.com/coreos/pkg/capnslog/journald_formatter.go b/vendor/github.com/coreos/pkg/capnslog/journald_formatter.go new file mode 100644 index 0000000..72e0520 --- /dev/null +++ b/vendor/github.com/coreos/pkg/capnslog/journald_formatter.go @@ -0,0 +1,68 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +build !windows + +package capnslog + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/coreos/go-systemd/journal" +) + +func NewJournaldFormatter() (Formatter, error) { + if !journal.Enabled() { + return nil, errors.New("No systemd detected") + } + return &journaldFormatter{}, nil +} + +type journaldFormatter struct{} + +func (j *journaldFormatter) Format(pkg string, l LogLevel, _ int, entries ...interface{}) { + var pri journal.Priority + switch l { + case CRITICAL: + pri = journal.PriCrit + case ERROR: + pri = journal.PriErr + case WARNING: + pri = journal.PriWarning + case NOTICE: + pri = journal.PriNotice + case INFO: + pri = journal.PriInfo + case DEBUG: + pri = journal.PriDebug + case TRACE: + pri = journal.PriDebug + default: + panic("Unhandled loglevel") + } + msg := fmt.Sprint(entries...) + tags := map[string]string{ + "PACKAGE": pkg, + "SYSLOG_IDENTIFIER": filepath.Base(os.Args[0]), + } + err := journal.Send(msg, pri, tags) + if err != nil { + fmt.Fprintln(os.Stderr, err) + } +} + +func (j *journaldFormatter) Flush() {} diff --git a/vendor/github.com/coreos/pkg/capnslog/log_hijack.go b/vendor/github.com/coreos/pkg/capnslog/log_hijack.go new file mode 100644 index 0000000..970086b --- /dev/null +++ b/vendor/github.com/coreos/pkg/capnslog/log_hijack.go @@ -0,0 +1,39 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package capnslog + +import ( + "log" +) + +func initHijack() { + pkg := NewPackageLogger("log", "") + w := packageWriter{pkg} + log.SetFlags(0) + log.SetPrefix("") + log.SetOutput(w) +} + +type packageWriter struct { + pl *PackageLogger +} + +func (p packageWriter) Write(b []byte) (int, error) { + if p.pl.level < INFO { + return 0, nil + } + p.pl.internalLog(calldepth+2, INFO, string(b)) + return len(b), nil +} diff --git a/vendor/github.com/coreos/pkg/capnslog/logmap.go b/vendor/github.com/coreos/pkg/capnslog/logmap.go new file mode 100644 index 0000000..8495448 --- /dev/null +++ b/vendor/github.com/coreos/pkg/capnslog/logmap.go @@ -0,0 +1,240 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package capnslog + +import ( + "errors" + "strings" + "sync" +) + +// LogLevel is the set of all log levels. +type LogLevel int8 + +const ( + // CRITICAL is the lowest log level; only errors which will end the program will be propagated. + CRITICAL LogLevel = iota - 1 + // ERROR is for errors that are not fatal but lead to troubling behavior. + ERROR + // WARNING is for errors which are not fatal and not errors, but are unusual. Often sourced from misconfigurations. + WARNING + // NOTICE is for normal but significant conditions. + NOTICE + // INFO is a log level for common, everyday log updates. + INFO + // DEBUG is the default hidden level for more verbose updates about internal processes. + DEBUG + // TRACE is for (potentially) call by call tracing of programs. + TRACE +) + +// Char returns a single-character representation of the log level. +func (l LogLevel) Char() string { + switch l { + case CRITICAL: + return "C" + case ERROR: + return "E" + case WARNING: + return "W" + case NOTICE: + return "N" + case INFO: + return "I" + case DEBUG: + return "D" + case TRACE: + return "T" + default: + panic("Unhandled loglevel") + } +} + +// String returns a multi-character representation of the log level. +func (l LogLevel) String() string { + switch l { + case CRITICAL: + return "CRITICAL" + case ERROR: + return "ERROR" + case WARNING: + return "WARNING" + case NOTICE: + return "NOTICE" + case INFO: + return "INFO" + case DEBUG: + return "DEBUG" + case TRACE: + return "TRACE" + default: + panic("Unhandled loglevel") + } +} + +// Update using the given string value. Fulfills the flag.Value interface. +func (l *LogLevel) Set(s string) error { + value, err := ParseLevel(s) + if err != nil { + return err + } + + *l = value + return nil +} + +// ParseLevel translates some potential loglevel strings into their corresponding levels. +func ParseLevel(s string) (LogLevel, error) { + switch s { + case "CRITICAL", "C": + return CRITICAL, nil + case "ERROR", "0", "E": + return ERROR, nil + case "WARNING", "1", "W": + return WARNING, nil + case "NOTICE", "2", "N": + return NOTICE, nil + case "INFO", "3", "I": + return INFO, nil + case "DEBUG", "4", "D": + return DEBUG, nil + case "TRACE", "5", "T": + return TRACE, nil + } + return CRITICAL, errors.New("couldn't parse log level " + s) +} + +type RepoLogger map[string]*PackageLogger + +type loggerStruct struct { + sync.Mutex + repoMap map[string]RepoLogger + formatter Formatter +} + +// logger is the global logger +var logger = new(loggerStruct) + +// SetGlobalLogLevel sets the log level for all packages in all repositories +// registered with capnslog. +func SetGlobalLogLevel(l LogLevel) { + logger.Lock() + defer logger.Unlock() + for _, r := range logger.repoMap { + r.setRepoLogLevelInternal(l) + } +} + +// GetRepoLogger may return the handle to the repository's set of packages' loggers. +func GetRepoLogger(repo string) (RepoLogger, error) { + logger.Lock() + defer logger.Unlock() + r, ok := logger.repoMap[repo] + if !ok { + return nil, errors.New("no packages registered for repo " + repo) + } + return r, nil +} + +// MustRepoLogger returns the handle to the repository's packages' loggers. +func MustRepoLogger(repo string) RepoLogger { + r, err := GetRepoLogger(repo) + if err != nil { + panic(err) + } + return r +} + +// SetRepoLogLevel sets the log level for all packages in the repository. +func (r RepoLogger) SetRepoLogLevel(l LogLevel) { + logger.Lock() + defer logger.Unlock() + r.setRepoLogLevelInternal(l) +} + +func (r RepoLogger) setRepoLogLevelInternal(l LogLevel) { + for _, v := range r { + v.level = l + } +} + +// ParseLogLevelConfig parses a comma-separated string of "package=loglevel", in +// order, and returns a map of the results, for use in SetLogLevel. +func (r RepoLogger) ParseLogLevelConfig(conf string) (map[string]LogLevel, error) { + setlist := strings.Split(conf, ",") + out := make(map[string]LogLevel) + for _, setstring := range setlist { + setting := strings.Split(setstring, "=") + if len(setting) != 2 { + return nil, errors.New("oddly structured `pkg=level` option: " + setstring) + } + l, err := ParseLevel(setting[1]) + if err != nil { + return nil, err + } + out[setting[0]] = l + } + return out, nil +} + +// SetLogLevel takes a map of package names within a repository to their desired +// loglevel, and sets the levels appropriately. Unknown packages are ignored. +// "*" is a special package name that corresponds to all packages, and will be +// processed first. +func (r RepoLogger) SetLogLevel(m map[string]LogLevel) { + logger.Lock() + defer logger.Unlock() + if l, ok := m["*"]; ok { + r.setRepoLogLevelInternal(l) + } + for k, v := range m { + l, ok := r[k] + if !ok { + continue + } + l.level = v + } +} + +// SetFormatter sets the formatting function for all logs. +func SetFormatter(f Formatter) { + logger.Lock() + defer logger.Unlock() + logger.formatter = f +} + +// NewPackageLogger creates a package logger object. +// This should be defined as a global var in your package, referencing your repo. +func NewPackageLogger(repo string, pkg string) (p *PackageLogger) { + logger.Lock() + defer logger.Unlock() + if logger.repoMap == nil { + logger.repoMap = make(map[string]RepoLogger) + } + r, rok := logger.repoMap[repo] + if !rok { + logger.repoMap[repo] = make(RepoLogger) + r = logger.repoMap[repo] + } + p, pok := r[pkg] + if !pok { + r[pkg] = &PackageLogger{ + pkg: pkg, + level: INFO, + } + p = r[pkg] + } + return +} diff --git a/vendor/github.com/coreos/pkg/capnslog/pkg_logger.go b/vendor/github.com/coreos/pkg/capnslog/pkg_logger.go new file mode 100644 index 0000000..612d55c --- /dev/null +++ b/vendor/github.com/coreos/pkg/capnslog/pkg_logger.go @@ -0,0 +1,177 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package capnslog + +import ( + "fmt" + "os" +) + +type PackageLogger struct { + pkg string + level LogLevel +} + +const calldepth = 2 + +func (p *PackageLogger) internalLog(depth int, inLevel LogLevel, entries ...interface{}) { + logger.Lock() + defer logger.Unlock() + if inLevel != CRITICAL && p.level < inLevel { + return + } + if logger.formatter != nil { + logger.formatter.Format(p.pkg, inLevel, depth+1, entries...) + } +} + +func (p *PackageLogger) LevelAt(l LogLevel) bool { + logger.Lock() + defer logger.Unlock() + return p.level >= l +} + +// Log a formatted string at any level between ERROR and TRACE +func (p *PackageLogger) Logf(l LogLevel, format string, args ...interface{}) { + p.internalLog(calldepth, l, fmt.Sprintf(format, args...)) +} + +// Log a message at any level between ERROR and TRACE +func (p *PackageLogger) Log(l LogLevel, args ...interface{}) { + p.internalLog(calldepth, l, fmt.Sprint(args...)) +} + +// log stdlib compatibility + +func (p *PackageLogger) Println(args ...interface{}) { + p.internalLog(calldepth, INFO, fmt.Sprintln(args...)) +} + +func (p *PackageLogger) Printf(format string, args ...interface{}) { + p.Logf(INFO, format, args...) +} + +func (p *PackageLogger) Print(args ...interface{}) { + p.internalLog(calldepth, INFO, fmt.Sprint(args...)) +} + +// Panic and fatal + +func (p *PackageLogger) Panicf(format string, args ...interface{}) { + s := fmt.Sprintf(format, args...) + p.internalLog(calldepth, CRITICAL, s) + panic(s) +} + +func (p *PackageLogger) Panic(args ...interface{}) { + s := fmt.Sprint(args...) + p.internalLog(calldepth, CRITICAL, s) + panic(s) +} + +func (p *PackageLogger) Fatalf(format string, args ...interface{}) { + p.Logf(CRITICAL, format, args...) + os.Exit(1) +} + +func (p *PackageLogger) Fatal(args ...interface{}) { + s := fmt.Sprint(args...) + p.internalLog(calldepth, CRITICAL, s) + os.Exit(1) +} + +func (p *PackageLogger) Fatalln(args ...interface{}) { + s := fmt.Sprintln(args...) + p.internalLog(calldepth, CRITICAL, s) + os.Exit(1) +} + +// Error Functions + +func (p *PackageLogger) Errorf(format string, args ...interface{}) { + p.Logf(ERROR, format, args...) +} + +func (p *PackageLogger) Error(entries ...interface{}) { + p.internalLog(calldepth, ERROR, entries...) +} + +// Warning Functions + +func (p *PackageLogger) Warningf(format string, args ...interface{}) { + p.Logf(WARNING, format, args...) +} + +func (p *PackageLogger) Warning(entries ...interface{}) { + p.internalLog(calldepth, WARNING, entries...) +} + +// Notice Functions + +func (p *PackageLogger) Noticef(format string, args ...interface{}) { + p.Logf(NOTICE, format, args...) +} + +func (p *PackageLogger) Notice(entries ...interface{}) { + p.internalLog(calldepth, NOTICE, entries...) +} + +// Info Functions + +func (p *PackageLogger) Infof(format string, args ...interface{}) { + p.Logf(INFO, format, args...) +} + +func (p *PackageLogger) Info(entries ...interface{}) { + p.internalLog(calldepth, INFO, entries...) +} + +// Debug Functions + +func (p *PackageLogger) Debugf(format string, args ...interface{}) { + if p.level < DEBUG { + return + } + p.Logf(DEBUG, format, args...) +} + +func (p *PackageLogger) Debug(entries ...interface{}) { + if p.level < DEBUG { + return + } + p.internalLog(calldepth, DEBUG, entries...) +} + +// Trace Functions + +func (p *PackageLogger) Tracef(format string, args ...interface{}) { + if p.level < TRACE { + return + } + p.Logf(TRACE, format, args...) +} + +func (p *PackageLogger) Trace(entries ...interface{}) { + if p.level < TRACE { + return + } + p.internalLog(calldepth, TRACE, entries...) +} + +func (p *PackageLogger) Flush() { + logger.Lock() + defer logger.Unlock() + logger.formatter.Flush() +} diff --git a/vendor/github.com/coreos/pkg/capnslog/syslog_formatter.go b/vendor/github.com/coreos/pkg/capnslog/syslog_formatter.go new file mode 100644 index 0000000..4be5a1f --- /dev/null +++ b/vendor/github.com/coreos/pkg/capnslog/syslog_formatter.go @@ -0,0 +1,65 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +build !windows + +package capnslog + +import ( + "fmt" + "log/syslog" +) + +func NewSyslogFormatter(w *syslog.Writer) Formatter { + return &syslogFormatter{w} +} + +func NewDefaultSyslogFormatter(tag string) (Formatter, error) { + w, err := syslog.New(syslog.LOG_DEBUG, tag) + if err != nil { + return nil, err + } + return NewSyslogFormatter(w), nil +} + +type syslogFormatter struct { + w *syslog.Writer +} + +func (s *syslogFormatter) Format(pkg string, l LogLevel, _ int, entries ...interface{}) { + for _, entry := range entries { + str := fmt.Sprint(entry) + switch l { + case CRITICAL: + s.w.Crit(str) + case ERROR: + s.w.Err(str) + case WARNING: + s.w.Warning(str) + case NOTICE: + s.w.Notice(str) + case INFO: + s.w.Info(str) + case DEBUG: + s.w.Debug(str) + case TRACE: + s.w.Debug(str) + default: + panic("Unhandled loglevel") + } + } +} + +func (s *syslogFormatter) Flush() { +} diff --git a/vendor/github.com/coreos/pkg/dlopen/dlopen.go b/vendor/github.com/coreos/pkg/dlopen/dlopen.go new file mode 100644 index 0000000..23774f6 --- /dev/null +++ b/vendor/github.com/coreos/pkg/dlopen/dlopen.go @@ -0,0 +1,82 @@ +// Copyright 2016 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package dlopen provides some convenience functions to dlopen a library and +// get its symbols. +package dlopen + +// #cgo LDFLAGS: -ldl +// #include <stdlib.h> +// #include <dlfcn.h> +import "C" +import ( + "errors" + "fmt" + "unsafe" +) + +var ErrSoNotFound = errors.New("unable to open a handle to the library") + +// LibHandle represents an open handle to a library (.so) +type LibHandle struct { + Handle unsafe.Pointer + Libname string +} + +// GetHandle tries to get a handle to a library (.so), attempting to access it +// by the names specified in libs and returning the first that is successfully +// opened. Callers are responsible for closing the handler. If no library can +// be successfully opened, an error is returned. +func GetHandle(libs []string) (*LibHandle, error) { + for _, name := range libs { + libname := C.CString(name) + defer C.free(unsafe.Pointer(libname)) + handle := C.dlopen(libname, C.RTLD_LAZY) + if handle != nil { + h := &LibHandle{ + Handle: handle, + Libname: name, + } + return h, nil + } + } + return nil, ErrSoNotFound +} + +// GetSymbolPointer takes a symbol name and returns a pointer to the symbol. +func (l *LibHandle) GetSymbolPointer(symbol string) (unsafe.Pointer, error) { + sym := C.CString(symbol) + defer C.free(unsafe.Pointer(sym)) + + C.dlerror() + p := C.dlsym(l.Handle, sym) + e := C.dlerror() + if e != nil { + return nil, fmt.Errorf("error resolving symbol %q: %v", symbol, errors.New(C.GoString(e))) + } + + return p, nil +} + +// Close closes a LibHandle. +func (l *LibHandle) Close() error { + C.dlerror() + C.dlclose(l.Handle) + e := C.dlerror() + if e != nil { + return fmt.Errorf("error closing %v: %v", l.Libname, errors.New(C.GoString(e))) + } + + return nil +} diff --git a/vendor/github.com/coreos/pkg/dlopen/dlopen_example.go b/vendor/github.com/coreos/pkg/dlopen/dlopen_example.go new file mode 100644 index 0000000..48a6601 --- /dev/null +++ b/vendor/github.com/coreos/pkg/dlopen/dlopen_example.go @@ -0,0 +1,56 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +build linux + +package dlopen + +// #include <string.h> +// #include <stdlib.h> +// +// int +// my_strlen(void *f, const char *s) +// { +// size_t (*strlen)(const char *); +// +// strlen = (size_t (*)(const char *))f; +// return strlen(s); +// } +import "C" + +import ( + "fmt" + "unsafe" +) + +func strlen(libs []string, s string) (int, error) { + h, err := GetHandle(libs) + if err != nil { + return -1, fmt.Errorf(`couldn't get a handle to the library: %v`, err) + } + defer h.Close() + + f := "strlen" + cs := C.CString(s) + defer C.free(unsafe.Pointer(cs)) + + strlen, err := h.GetSymbolPointer(f) + if err != nil { + return -1, fmt.Errorf(`couldn't get symbol %q: %v`, f, err) + } + + len := C.my_strlen(strlen, cs) + + return int(len), nil +} |