commit 4912a387918de7572b7f9c76f36b33e0619ac5ae Author: Lu zhiping Date: Thu Jul 28 16:28:18 2022 +0800 Import Upstream version 1.0.0~rc10 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..282e34e --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +vendor/pkg +/runc +/runc-* +contrib/cmd/recvtty/recvtty +man/man8 +release diff --git a/.pullapprove.yml b/.pullapprove.yml new file mode 100644 index 0000000..fc8c5d3 --- /dev/null +++ b/.pullapprove.yml @@ -0,0 +1,10 @@ +approve_by_comment: true +approve_regex: ^LGTM +reject_regex: ^Rejected +reset_on_push: true +author_approval: ignored +reviewers: + teams: + - runc-maintainers + name: default + required: 2 diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..5c2928d --- /dev/null +++ b/.travis.yml @@ -0,0 +1,54 @@ +dist: bionic +language: go +go: + - 1.11.x + - 1.12.x + - tip + +matrix: + include: + - go: 1.12.x + env: + - RUNC_USE_SYSTEMD=1 + script: + - make BUILDTAGS="${BUILDTAGS}" all + - sudo PATH="$PATH" make localintegration RUNC_USE_SYSTEMD=1 + - go: 1.12.x + env: + - VIRTUALBOX_VERSION=6.0 + - VAGRANT_VERSION=2.2.6 + - FEDORA_VERSION=31 + before_install: + - cat /proc/cpuinfo + - wget -q https://www.virtualbox.org/download/oracle_vbox_2016.asc -O- | sudo apt-key add - && sudo sh -c "echo deb https://download.virtualbox.org/virtualbox/debian $(lsb_release -cs) contrib >> /etc/apt/sources.list" && sudo apt-get update && sudo apt-get install -yq build-essential gcc make linux-headers-$(uname -r) virtualbox-${VIRTUALBOX_VERSION} && sudo usermod -aG vboxusers $(whoami) + - wget https://releases.hashicorp.com/vagrant/${VAGRANT_VERSION}/vagrant_${VAGRANT_VERSION}_$(uname -m).deb && sudo dpkg -i vagrant_${VAGRANT_VERSION}_$(uname -m).deb + - vagrant init bento/fedora-${FEDORA_VERSION} && vagrant up && mkdir -p ~/.ssh && vagrant ssh-config >> ~/.ssh/config + - ssh default sudo dnf install -y podman + script: + - ssh default sudo podman build -t test /vagrant + - ssh default sudo podman run --privileged --cgroupns=private test make localunittest + allow_failures: + - go: tip + +go_import_path: github.com/opencontainers/runc + +# `make ci` uses Docker. +sudo: required +services: + - docker + +env: + global: + - BUILDTAGS="seccomp apparmor selinux ambient" + +before_install: + - sudo apt-get -qq update + - sudo apt-get install -y libseccomp-dev + - go get -u golang.org/x/lint/golint + - go get -u github.com/vbatts/git-validation + - env | grep TRAVIS_ + +script: + - git-validation -run DCO,short-subject -v + - make BUILDTAGS="${BUILDTAGS}" + - make BUILDTAGS="${BUILDTAGS}" clean ci cross diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..3b674cf --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,124 @@ +## Contribution Guidelines + +### Security issues + +If you are reporting a security issue, do not create an issue or file a pull +request on GitHub. Instead, disclose the issue responsibly by sending an email +to security@opencontainers.org (which is inhabited only by the maintainers of +the various OCI projects). + +### Pull requests are always welcome + +We are always thrilled to receive pull requests, and do our best to +process them as fast as possible. Not sure if that typo is worth a pull +request? Do it! We will appreciate it. + +If your pull request is not accepted on the first try, don't be +discouraged! If there's a problem with the implementation, hopefully you +received feedback on what to improve. + +We're trying very hard to keep runc lean and focused. We don't want it +to do everything for everybody. This means that we might decide against +incorporating a new feature. However, there might be a way to implement +that feature *on top of* runc. + + +### Conventions + +Fork the repo and make changes on your fork in a feature branch: + +- If it's a bugfix branch, name it XXX-something where XXX is the number of the + issue +- If it's a feature branch, create an enhancement issue to announce your + intentions, and name it XXX-something where XXX is the number of the issue. + +Submit unit tests for your changes. Go has a great test framework built in; use +it! Take a look at existing tests for inspiration. Run the full test suite on +your branch before submitting a pull request. + +Update the documentation when creating or modifying features. Test +your documentation changes for clarity, concision, and correctness, as +well as a clean documentation build. See ``docs/README.md`` for more +information on building the docs and how docs get released. + +Write clean code. Universally formatted code promotes ease of writing, reading, +and maintenance. Always run `gofmt -s -w file.go` on each changed file before +committing your changes. Most editors have plugins that do this automatically. + +Pull requests descriptions should be as clear as possible and include a +reference to all the issues that they address. + +Pull requests must not contain commits from other users or branches. + +Commit messages must start with a capitalized and short summary (max. 50 +chars) written in the imperative, followed by an optional, more detailed +explanatory text which is separated from the summary by an empty line. + +Code review comments may be added to your pull request. Discuss, then make the +suggested modifications and push additional commits to your feature branch. Be +sure to post a comment after pushing. The new commits will show up in the pull +request automatically, but the reviewers will not be notified unless you +comment. + +Before the pull request is merged, make sure that you squash your commits into +logical units of work using `git rebase -i` and `git push -f`. After every +commit the test suite should be passing. Include documentation changes in the +same commit so that a revert would remove all traces of the feature or fix. + +Commits that fix or close an issue should include a reference like `Closes #XXX` +or `Fixes #XXX`, which will automatically close the issue when merged. + +### Sign your work + +The sign-off is a simple line at the end of the explanation for the +patch, which certifies that you wrote it or otherwise have the right to +pass it on as an open-source patch. The rules are pretty simple: if you +can certify the below (from +[developercertificate.org](http://developercertificate.org/)): + +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +660 York Street, Suite 102, +San Francisco, CA 94110 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +then you just add a line to every git commit message: + + Signed-off-by: Joe Smith + +using your real name (sorry, no pseudonyms or anonymous contributions.) + +You can add the sign off when creating the git commit via `git commit -s`. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5c65470 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,66 @@ +FROM golang:1.12-stretch + +RUN dpkg --add-architecture armel \ + && dpkg --add-architecture armhf \ + && dpkg --add-architecture arm64 \ + && dpkg --add-architecture ppc64el \ + && apt-get update && apt-get install -y \ + build-essential \ + curl \ + sudo \ + gawk \ + iptables \ + jq \ + pkg-config \ + libaio-dev \ + libcap-dev \ + libprotobuf-dev \ + libprotobuf-c0-dev \ + libnl-3-dev \ + libnet-dev \ + libseccomp2 \ + libseccomp-dev \ + protobuf-c-compiler \ + protobuf-compiler \ + python-minimal \ + uidmap \ + kmod \ + crossbuild-essential-armel crossbuild-essential-armhf crossbuild-essential-arm64 crossbuild-essential-ppc64el \ + libseccomp-dev:armel libseccomp-dev:armhf libseccomp-dev:arm64 libseccomp-dev:ppc64el \ + --no-install-recommends \ + && apt-get clean + +# Add a dummy user for the rootless integration tests. While runC does +# not require an entry in /etc/passwd to operate, one of the tests uses +# `git clone` -- and `git clone` does not allow you to clone a +# repository if the current uid does not have an entry in /etc/passwd. +RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless + +# install bats +RUN cd /tmp \ + && git clone https://github.com/sstephenson/bats.git \ + && cd bats \ + && git reset --hard 03608115df2071fff4eaaff1605768c275e5f81f \ + && ./install.sh /usr/local \ + && rm -rf /tmp/bats + +# install criu +ENV CRIU_VERSION v3.12 +RUN mkdir -p /usr/src/criu \ + && curl -sSL https://github.com/checkpoint-restore/criu/archive/${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \ + && cd /usr/src/criu \ + && make install-criu \ + && rm -rf /usr/src/criu + +# setup a playground for us to spawn containers in +ENV ROOTFS /busybox +RUN mkdir -p ${ROOTFS} + +COPY script/tmpmount / +WORKDIR /go/src/github.com/opencontainers/runc +ENTRYPOINT ["/tmpmount"] + +ADD . /go/src/github.com/opencontainers/runc + +RUN . tests/integration/multi-arch.bash \ + && curl -o- -sSL `get_busybox` | tar xfJC - ${ROOTFS} diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2744858 --- /dev/null +++ b/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2014 Docker, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MAINTAINERS b/MAINTAINERS new file mode 100644 index 0000000..9fe08d3 --- /dev/null +++ b/MAINTAINERS @@ -0,0 +1,5 @@ +Michael Crosby (@crosbymichael) +Mrunal Patel (@mrunalp) +Daniel, Dao Quang Minh (@dqminh) +Qiang Huang (@hqhq) +Aleksa Sarai (@cyphar) diff --git a/MAINTAINERS_GUIDE.md b/MAINTAINERS_GUIDE.md new file mode 100644 index 0000000..7442103 --- /dev/null +++ b/MAINTAINERS_GUIDE.md @@ -0,0 +1,120 @@ +## Introduction + +Dear maintainer. Thank you for investing the time and energy to help +make runc as useful as possible. Maintaining a project is difficult, +sometimes unrewarding work. Sure, you will get to contribute cool +features to the project. But most of your time will be spent reviewing, +cleaning up, documenting, answering questions, justifying design +decisions - while everyone has all the fun! But remember - the quality +of the maintainers work is what distinguishes the good projects from the +great. So please be proud of your work, even the unglamorous parts, +and encourage a culture of appreciation and respect for *every* aspect +of improving the project - not just the hot new features. + +This document is a manual for maintainers old and new. It explains what +is expected of maintainers, how they should work, and what tools are +available to them. + +This is a living document - if you see something out of date or missing, +speak up! + +## What are a maintainer's responsibility? + +It is every maintainer's responsibility to: + +* 1) Expose a clear roadmap for improving their component. +* 2) Deliver prompt feedback and decisions on pull requests. +* 3) Be available to anyone with questions, bug reports, criticism etc. + on their component. This includes IRC and GitHub issues and pull requests. +* 4) Make sure their component respects the philosophy, design and + roadmap of the project. + +## How are decisions made? + +Short answer: with pull requests to the runc repository. + +runc is an open-source project with an open design philosophy. This +means that the repository is the source of truth for EVERY aspect of the +project, including its philosophy, design, roadmap and APIs. *If it's +part of the project, it's in the repo. It's in the repo, it's part of +the project.* + +As a result, all decisions can be expressed as changes to the +repository. An implementation change is a change to the source code. An +API change is a change to the API specification. A philosophy change is +a change to the philosophy manifesto. And so on. + +All decisions affecting runc, big and small, follow the same 3 steps: + +* Step 1: Open a pull request. Anyone can do this. + +* Step 2: Discuss the pull request. Anyone can do this. + +* Step 3: Accept (`LGTM`) or refuse a pull request. The relevant maintainers do +this (see below "Who decides what?") + +*I'm a maintainer, should I make pull requests too?* + +Yes. Nobody should ever push to master directly. All changes should be +made through a pull request. + +## Who decides what? + +All decisions are pull requests, and the relevant maintainers make +decisions by accepting or refusing the pull request. Review and acceptance +by anyone is denoted by adding a comment in the pull request: `LGTM`. +However, only currently listed `MAINTAINERS` are counted towards the required +two LGTMs. + +Overall the maintainer system works because of mutual respect across the +maintainers of the project. The maintainers trust one another to make decisions +in the best interests of the project. Sometimes maintainers can disagree and +this is part of a healthy project to represent the point of views of various people. +In the case where maintainers cannot find agreement on a specific change the +role of a Chief Maintainer comes into play. + +The Chief Maintainer for the project is responsible for overall architecture +of the project to maintain conceptual integrity. Large decisions and +architecture changes should be reviewed by the chief maintainer. +The current chief maintainer for the project is Michael Crosby (@crosbymichael). + +Even though the maintainer system is built on trust, if there is a conflict +with the chief maintainer on a decision, their decision can be challenged +and brought to the technical oversight board if two-thirds of the +maintainers vote for an appeal. It is expected that this would be a +very exceptional event. + + +### How are maintainers added? + +The best maintainers have a vested interest in the project. Maintainers +are first and foremost contributors that have shown they are committed to +the long term success of the project. Contributors wanting to become +maintainers are expected to be deeply involved in contributing code, +pull request review, and triage of issues in the project for more than two months. + +Just contributing does not make you a maintainer, it is about building trust +with the current maintainers of the project and being a person that they can +depend on and trust to make decisions in the best interest of the project. The +final vote to add a new maintainer should be approved by over 66% of the current +maintainers with the chief maintainer having veto power. In case of a veto, +conflict resolution rules expressed above apply. The voting period is +five business days on the Pull Request to add the new maintainer. + + +### What is expected of maintainers? + +Part of a healthy project is to have active maintainers to support the community +in contributions and perform tasks to keep the project running. Maintainers are +expected to be able to respond in a timely manner if their help is required on specific +issues where they are pinged. Being a maintainer is a time consuming commitment and should +not be taken lightly. + +When a maintainer is unable to perform the required duties they can be removed with +a vote by 66% of the current maintainers with the chief maintainer having veto power. +The voting period is ten business days. Issues related to a maintainer's performance should +be discussed with them among the other maintainers so that they are not surprised by +a pull request removing them. + + + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..81db9d9 --- /dev/null +++ b/Makefile @@ -0,0 +1,133 @@ +.PHONY: all shell dbuild man release \ + localtest localunittest localintegration \ + test unittest integration \ + cross localcross + +CONTAINER_ENGINE := docker +GO := go + +SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$') +PREFIX := $(DESTDIR)/usr/local +BINDIR := $(PREFIX)/sbin +GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null) +GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g") +RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN)) +PROJECT := github.com/opencontainers/runc +BUILDTAGS ?= seccomp +COMMIT_NO := $(shell git rev-parse HEAD 2> /dev/null || true) +COMMIT ?= $(if $(shell git status --porcelain --untracked-files=no),"${COMMIT_NO}-dirty","${COMMIT_NO}") + +MAN_DIR := $(CURDIR)/man/man8 +MAN_PAGES = $(shell ls $(MAN_DIR)/*.8) +MAN_PAGES_BASE = $(notdir $(MAN_PAGES)) +MAN_INSTALL_PATH := ${PREFIX}/share/man/man8/ + +RELEASE_DIR := $(CURDIR)/release + +VERSION := ${shell cat ./VERSION} + +SHELL := $(shell command -v bash 2>/dev/null) + +.DEFAULT: runc + +runc: $(SOURCES) + $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc . + +all: runc recvtty + +recvtty: contrib/cmd/recvtty/recvtty + +contrib/cmd/recvtty/recvtty: $(SOURCES) + $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty + +static: $(SOURCES) + CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o runc . + CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty + +release: + script/release.sh -r release/$(VERSION) -v $(VERSION) + +dbuild: runcimage + $(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make clean all + +lint: + $(GO) vet $(allpackages) + $(GO) fmt $(allpackages) + +man: + man/md2man-all.sh + +runcimage: + $(CONTAINER_ENGINE) build ${CONTAINER_ENGINE_BUILD_FLAGS} -t $(RUNC_IMAGE) . + +test: + make unittest integration rootlessintegration + +localtest: + make localunittest localintegration localrootlessintegration + +unittest: runcimage + $(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest TESTFLAGS=${TESTFLAGS} + +localunittest: all + $(GO) test -timeout 3m -tags "$(BUILDTAGS)" ${TESTFLAGS} -v $(allpackages) + +integration: runcimage + $(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localintegration TESTPATH=${TESTPATH} + +localintegration: all + bats -t tests/integration${TESTPATH} + +rootlessintegration: runcimage + $(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localrootlessintegration + +localrootlessintegration: all + tests/rootless.sh + +shell: runcimage + $(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash + +install: + install -D -m0755 runc $(BINDIR)/runc + +install-bash: + install -D -m0644 contrib/completions/bash/runc $(PREFIX)/share/bash-completion/completions/runc + +install-man: + install -d -m 755 $(MAN_INSTALL_PATH) + install -m 644 $(MAN_PAGES) $(MAN_INSTALL_PATH) + +uninstall: + rm -f $(BINDIR)/runc + +uninstall-bash: + rm -f $(PREFIX)/share/bash-completion/completions/runc + +uninstall-man: + rm -f $(addprefix $(MAN_INSTALL_PATH),$(MAN_PAGES_BASE)) + +clean: + rm -f runc runc-* + rm -f contrib/cmd/recvtty/recvtty + rm -rf $(RELEASE_DIR) + rm -rf $(MAN_DIR) + +validate: + script/validate-gofmt + script/validate-c + $(GO) vet $(allpackages) + +ci: validate test release + +cross: runcimage + $(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -e BUILDTAGS="$(BUILDTAGS)" --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localcross + +localcross: + CGO_ENABLED=1 GOARCH=arm GOARM=6 CC=arm-linux-gnueabi-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-armel . + CGO_ENABLED=1 GOARCH=arm GOARM=7 CC=arm-linux-gnueabihf-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-armhf . + CGO_ENABLED=1 GOARCH=arm64 CC=aarch64-linux-gnu-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-arm64 . + CGO_ENABLED=1 GOARCH=ppc64le CC=powerpc64le-linux-gnu-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-ppc64le . + +# memoize allpackages, so that it's executed only once and only if used +_allpackages = $(shell $(GO) list ./... | grep -v vendor) +allpackages = $(if $(__allpackages),,$(eval __allpackages := $$(_allpackages)))$(__allpackages) diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..5c97abc --- /dev/null +++ b/NOTICE @@ -0,0 +1,17 @@ +runc + +Copyright 2012-2015 Docker, Inc. + +This product includes software developed at Docker, Inc. (http://www.docker.com). + +The following is courtesy of our legal counsel: + + +Use and transfer of Docker may be subject to certain restrictions by the +United States and other governments. +It is your responsibility to ensure that your use and/or transfer does not +violate applicable laws. + +For more information, please see http://www.bis.doc.gov + +See also http://www.apache.org/dev/crypto.html and/or seek legal counsel. diff --git a/PRINCIPLES.md b/PRINCIPLES.md new file mode 100644 index 0000000..fdcc373 --- /dev/null +++ b/PRINCIPLES.md @@ -0,0 +1,19 @@ +# runc principles + +In the design and development of runc and libcontainer we try to follow these principles: + +(Work in progress) + +* Don't try to replace every tool. Instead, be an ingredient to improve them. +* Less code is better. +* Fewer components are better. Do you really need to add one more class? +* 50 lines of straightforward, readable code is better than 10 lines of magic that nobody can understand. +* Don't do later what you can do now. "//TODO: refactor" is not acceptable in new code. +* When hesitating between two options, choose the one that is easier to reverse. +* "No" is temporary; "Yes" is forever. If you're not sure about a new feature, say no. You can change your mind later. +* Containers must be portable to the greatest possible number of machines. Be suspicious of any change which makes machines less interchangeable. +* The fewer moving parts in a container, the better. +* Don't merge it unless you document it. +* Don't document it unless you can keep it up-to-date. +* Don't merge it unless you test it! +* Everyone's problem is slightly different. Focus on the part that is the same for everyone, and solve that. diff --git a/README.md b/README.md new file mode 100644 index 0000000..a806f27 --- /dev/null +++ b/README.md @@ -0,0 +1,280 @@ +# runc + +[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc) +[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc) +[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc) + +## Introduction + +`runc` is a CLI tool for spawning and running containers according to the OCI specification. + +## Releases + +`runc` depends on and tracks the [runtime-spec](https://github.com/opencontainers/runtime-spec) repository. +We will try to make sure that `runc` and the OCI specification major versions stay in lockstep. +This means that `runc` 1.0.0 should implement the 1.0 version of the specification. + +You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page. + +Currently, the following features are not considered to be production-ready: + +* Support for cgroup v2 + +## Security + +The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/). + +## Building + +`runc` currently supports the Linux platform with various architecture support. +It must be built with Go version 1.6 or higher in order for some features to function properly. + +In order to enable seccomp support you will need to install `libseccomp` on your platform. +> e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu + +Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make. + +```bash +# create a 'github.com/opencontainers' in your GOPATH/src +cd github.com/opencontainers +git clone https://github.com/opencontainers/runc +cd runc + +make +sudo make install +``` + +You can also use `go get` to install to your `GOPATH`, assuming that you have a `github.com` parent folder already created under `src`: + +```bash +go get github.com/opencontainers/runc +cd $GOPATH/src/github.com/opencontainers/runc +make +sudo make install +``` + +`runc` will be installed to `/usr/local/sbin/runc` on your system. + + +#### Build Tags + +`runc` supports optional build tags for compiling support of various features. +To add build tags to the make option the `BUILDTAGS` variable must be set. + +```bash +make BUILDTAGS='seccomp apparmor' +``` + +| Build Tag | Feature | Dependency | +|-----------|------------------------------------|-------------| +| seccomp | Syscall filtering | libseccomp | +| selinux | selinux process and mount labeling | | +| apparmor | apparmor profile support | | +| ambient | ambient capability support | kernel 4.3 | +| nokmem | disable kernel memory account | | + + +### Running the test suite + +`runc` currently supports running its test suite via Docker. +To run the suite just type `make test`. + +```bash +make test +``` + +There are additional make targets for running the tests outside of a container but this is not recommended as the tests are written with the expectation that they can write and remove anywhere. + +You can run a specific test case by setting the `TESTFLAGS` variable. + +```bash +# make test TESTFLAGS="-run=SomeTestFunction" +``` + +You can run a specific integration test by setting the `TESTPATH` variable. + +```bash +# make test TESTPATH="/checkpoint.bats" +``` + +You can run a test in your proxy environment by setting `DOCKER_BUILD_PROXY` and `DOCKER_RUN_PROXY` variables. + +```bash +# make test DOCKER_BUILD_PROXY="--build-arg HTTP_PROXY=http://yourproxy/" DOCKER_RUN_PROXY="-e HTTP_PROXY=http://yourproxy/" +``` + +### Dependencies Management + +`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management. +Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update +new dependencies. + +## Using runc + +### Creating an OCI Bundle + +In order to use runc you must have your container in the format of an OCI bundle. +If you have Docker installed you can use its `export` method to acquire a root filesystem from an existing Docker container. + +```bash +# create the top most bundle directory +mkdir /mycontainer +cd /mycontainer + +# create the rootfs directory +mkdir rootfs + +# export busybox via Docker into the rootfs directory +docker export $(docker create busybox) | tar -C rootfs -xvf - +``` + +After a root filesystem is populated you just generate a spec in the format of a `config.json` file inside your bundle. +`runc` provides a `spec` command to generate a base template spec that you are then able to edit. +To find features and documentation for fields in the spec please refer to the [specs](https://github.com/opencontainers/runtime-spec) repository. + +```bash +runc spec +``` + +### Running Containers + +Assuming you have an OCI bundle from the previous step you can execute the container in two different ways. + +The first way is to use the convenience command `run` that will handle creating, starting, and deleting the container after it exits. + +```bash +# run as root +cd /mycontainer +runc run mycontainerid +``` + +If you used the unmodified `runc spec` template this should give you a `sh` session inside the container. + +The second way to start a container is using the specs lifecycle operations. +This gives you more power over how the container is created and managed while it is running. +This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here. +Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`. + + +```json + "process": { + "terminal": false, + "user": { + "uid": 0, + "gid": 0 + }, + "args": [ + "sleep", "5" + ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "/", + "capabilities": { + "bounding": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "effective": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "inheritable": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "permitted": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "ambient": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ] + }, + "rlimits": [ + { + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + } + ], + "noNewPrivileges": true + }, +``` + +Now we can go through the lifecycle operations in your shell. + + +```bash +# run as root +cd /mycontainer +runc create mycontainerid + +# view the container is created and in the "created" state +runc list + +# start the process inside the container +runc start mycontainerid + +# after 5 seconds view that the container has exited and is now in the stopped state +runc list + +# now delete the container +runc delete mycontainerid +``` + +This allows higher level systems to augment the containers creation logic with setup of various settings after the container is created and/or before it is deleted. For example, the container's network stack is commonly set up after `create` but before `start`. + +#### Rootless containers +`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version. + +**Note:** In order to use this feature, "User Namespaces" must be compiled and enabled in your kernel. There are various ways to do this depending on your distribution: +- Confirm `CONFIG_USER_NS=y` is set in your kernel configuration (normally found in `/proc/config.gz`) +- Arch/Debian: `echo 1 > /proc/sys/kernel/unprivileged_userns_clone` +- RHEL/CentOS 7: `echo 28633 > /proc/sys/user/max_user_namespaces` + +Run the following commands as an ordinary user: +```bash +# Same as the first example +mkdir ~/mycontainer +cd ~/mycontainer +mkdir rootfs +docker export $(docker create busybox) | tar -C rootfs -xvf - + +# The --rootless parameter instructs runc spec to generate a configuration for a rootless container, which will allow you to run the container as a non-root user. +runc spec --rootless + +# The --root parameter tells runc where to store the container state. It must be writable by the user. +runc --root /tmp/runc run mycontainerid +``` + +#### Supervisors + +`runc` can be used with process supervisors and init systems to ensure that containers are restarted when they exit. +An example systemd unit file looks something like this. + +```systemd +[Unit] +Description=Start My Container + +[Service] +Type=forking +ExecStart=/usr/local/sbin/runc run -d --pid-file /run/mycontainerid.pid mycontainerid +ExecStopPost=/usr/local/sbin/runc delete mycontainerid +WorkingDirectory=/mycontainer +PIDFile=/run/mycontainerid.pid + +[Install] +WantedBy=multi-user.target +``` + +## License + +The code and docs are released under the [Apache 2.0 license](LICENSE). diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..63a7438 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,3 @@ +# Security + +The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/). diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..950f8ca --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +1.0.0-rc10 diff --git a/checkpoint.go b/checkpoint.go new file mode 100644 index 0000000..ae01ea3 --- /dev/null +++ b/checkpoint.go @@ -0,0 +1,137 @@ +// +build linux + +package main + +import ( + "fmt" + "os" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "github.com/urfave/cli" + + "golang.org/x/sys/unix" +) + +var checkpointCommand = cli.Command{ + Name: "checkpoint", + Usage: "checkpoint a running container", + ArgsUsage: ` + +Where "" is the name for the instance of the container to be +checkpointed.`, + Description: `The checkpoint command saves the state of the container instance.`, + Flags: []cli.Flag{ + cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"}, + cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"}, + cli.StringFlag{Name: "parent-path", Value: "", Usage: "path for previous criu image files in pre-dump"}, + cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"}, + cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"}, + cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"}, + cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"}, + cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"}, + cli.StringFlag{Name: "status-fd", Value: "", Usage: "criu writes \\0 to this FD once lazy-pages is ready"}, + cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"}, + cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"}, + cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"}, + cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"}, + cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properties"}, + cli.BoolFlag{Name: "auto-dedup", Usage: "enable auto deduplication of memory images"}, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, exactArgs); err != nil { + return err + } + // XXX: Currently this is untested with rootless containers. + if os.Geteuid() != 0 || system.RunningInUserNS() { + logrus.Warn("runc checkpoint is untested with rootless containers") + } + + container, err := getContainer(context) + if err != nil { + return err + } + status, err := container.Status() + if err != nil { + return err + } + if status == libcontainer.Created || status == libcontainer.Stopped { + fatalf("Container cannot be checkpointed in %s state", status.String()) + } + defer destroy(container) + options := criuOptions(context) + // these are the mandatory criu options for a container + setPageServer(context, options) + setManageCgroupsMode(context, options) + if err := setEmptyNsMask(context, options); err != nil { + return err + } + return container.Checkpoint(options) + }, +} + +func getCheckpointImagePath(context *cli.Context) string { + imagePath := context.String("image-path") + if imagePath == "" { + imagePath = getDefaultImagePath(context) + } + return imagePath +} + +func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) { + // xxx following criu opts are optional + // The dump image can be sent to a criu page server + if psOpt := context.String("page-server"); psOpt != "" { + addressPort := strings.Split(psOpt, ":") + if len(addressPort) != 2 { + fatal(fmt.Errorf("Use --page-server ADDRESS:PORT to specify page server")) + } + portInt, err := strconv.Atoi(addressPort[1]) + if err != nil { + fatal(fmt.Errorf("Invalid port number")) + } + options.PageServer = libcontainer.CriuPageServerInfo{ + Address: addressPort[0], + Port: int32(portInt), + } + } +} + +func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) { + if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" { + switch cgOpt { + case "soft": + options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_SOFT + case "full": + options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_FULL + case "strict": + options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_STRICT + default: + fatal(fmt.Errorf("Invalid manage cgroups mode")) + } + } +} + +var namespaceMapping = map[specs.LinuxNamespaceType]int{ + specs.NetworkNamespace: unix.CLONE_NEWNET, +} + +func setEmptyNsMask(context *cli.Context, options *libcontainer.CriuOpts) error { + /* Runc doesn't manage network devices and their configuration */ + nsmask := unix.CLONE_NEWNET + + for _, ns := range context.StringSlice("empty-ns") { + f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)] + if !exists { + return fmt.Errorf("namespace %q is not supported", ns) + } + nsmask |= f + } + + options.EmptyNs = uint32(nsmask) + return nil +} diff --git a/contrib/cmd/recvtty/recvtty.go b/contrib/cmd/recvtty/recvtty.go new file mode 100644 index 0000000..a658b8d --- /dev/null +++ b/contrib/cmd/recvtty/recvtty.go @@ -0,0 +1,238 @@ +/* + * Copyright 2016 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "io" + "io/ioutil" + "net" + "os" + "strings" + + "github.com/containerd/console" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/urfave/cli" +) + +// version will be populated by the Makefile, read from +// VERSION file of the source code. +var version = "" + +// gitCommit will be the hash that the binary was built from +// and will be populated by the Makefile +var gitCommit = "" + +const ( + usage = `Open Container Initiative contrib/cmd/recvtty + +recvtty is a reference implementation of a consumer of runC's --console-socket +API. It has two main modes of operation: + + * single: Only permit one terminal to be sent to the socket, which is + then hooked up to the stdio of the recvtty process. This is useful + for rudimentary shell management of a container. + + * null: Permit as many terminals to be sent to the socket, but they + are read to /dev/null. This is used for testing, and imitates the + old runC API's --console=/dev/pts/ptmx hack which would allow for a + similar trick. This is probably not what you want to use, unless + you're doing something like our bats integration tests. + +To use recvtty, just specify a socket path at which you want to receive +terminals: + + $ recvtty [--mode ] socket.sock +` +) + +func bail(err error) { + fmt.Fprintf(os.Stderr, "[recvtty] fatal error: %v\n", err) + os.Exit(1) +} + +func handleSingle(path string) error { + // Open a socket. + ln, err := net.Listen("unix", path) + if err != nil { + return err + } + defer ln.Close() + + // We only accept a single connection, since we can only really have + // one reader for os.Stdin. Plus this is all a PoC. + conn, err := ln.Accept() + if err != nil { + return err + } + defer conn.Close() + + // Close ln, to allow for other instances to take over. + ln.Close() + + // Get the fd of the connection. + unixconn, ok := conn.(*net.UnixConn) + if !ok { + return fmt.Errorf("failed to cast to unixconn") + } + + socket, err := unixconn.File() + if err != nil { + return err + } + defer socket.Close() + + // Get the master file descriptor from runC. + master, err := utils.RecvFd(socket) + if err != nil { + return err + } + c, err := console.ConsoleFromFile(master) + if err != nil { + return err + } + console.ClearONLCR(c.Fd()) + + // Copy from our stdio to the master fd. + quitChan := make(chan struct{}) + go func() { + io.Copy(os.Stdout, c) + quitChan <- struct{}{} + }() + go func() { + io.Copy(c, os.Stdin) + quitChan <- struct{}{} + }() + + // Only close the master fd once we've stopped copying. + <-quitChan + c.Close() + return nil +} + +func handleNull(path string) error { + // Open a socket. + ln, err := net.Listen("unix", path) + if err != nil { + return err + } + defer ln.Close() + + // As opposed to handleSingle we accept as many connections as we get, but + // we don't interact with Stdin at all (and we copy stdout to /dev/null). + for { + conn, err := ln.Accept() + if err != nil { + return err + } + go func(conn net.Conn) { + // Don't leave references lying around. + defer conn.Close() + + // Get the fd of the connection. + unixconn, ok := conn.(*net.UnixConn) + if !ok { + return + } + + socket, err := unixconn.File() + if err != nil { + return + } + defer socket.Close() + + // Get the master file descriptor from runC. + master, err := utils.RecvFd(socket) + if err != nil { + return + } + + // Just do a dumb copy to /dev/null. + devnull, err := os.OpenFile("/dev/null", os.O_RDWR, 0) + if err != nil { + // TODO: Handle this nicely. + return + } + + io.Copy(devnull, master) + devnull.Close() + }(conn) + } +} + +func main() { + app := cli.NewApp() + app.Name = "recvtty" + app.Usage = usage + + // Set version to be the same as runC. + var v []string + if version != "" { + v = append(v, version) + } + if gitCommit != "" { + v = append(v, fmt.Sprintf("commit: %s", gitCommit)) + } + app.Version = strings.Join(v, "\n") + + // Set the flags. + app.Flags = []cli.Flag{ + cli.StringFlag{ + Name: "mode, m", + Value: "single", + Usage: "Mode of operation (single or null)", + }, + cli.StringFlag{ + Name: "pid-file", + Value: "", + Usage: "Path to write daemon process ID to", + }, + } + + app.Action = func(ctx *cli.Context) error { + args := ctx.Args() + if len(args) != 1 { + return fmt.Errorf("need to specify a single socket path") + } + path := ctx.Args()[0] + + pidPath := ctx.String("pid-file") + if pidPath != "" { + pid := fmt.Sprintf("%d\n", os.Getpid()) + if err := ioutil.WriteFile(pidPath, []byte(pid), 0644); err != nil { + return err + } + } + + switch ctx.String("mode") { + case "single": + if err := handleSingle(path); err != nil { + return err + } + case "null": + if err := handleNull(path); err != nil { + return err + } + default: + return fmt.Errorf("need to select a valid mode: %s", ctx.String("mode")) + } + return nil + } + if err := app.Run(os.Args); err != nil { + bail(err) + } +} diff --git a/contrib/completions/bash/runc b/contrib/completions/bash/runc new file mode 100644 index 0000000..9517a5b --- /dev/null +++ b/contrib/completions/bash/runc @@ -0,0 +1,826 @@ +#!/bin/bash +# +# bash completion file for runc command +# +# This script provides completion of: +# - commands and their options +# - filepaths +# +# To enable the completions either: +# - place this file in /usr/share/bash-completion/completions +# or +# - copy this file to e.g. ~/.runc-completion.sh and add the line +# below to your .bashrc after bash completion features are loaded +# . ~/.runc-completion.sh +# +# Configuration: +# + +# Note for developers: +# Please arrange options sorted alphabetically by long name with the short +# options immediately following their corresponding long form. +# This order should be applied to lists, alternatives and code blocks. + +__runc_previous_extglob_setting=$(shopt -p extglob) +shopt -s extglob + +__runc_list_all() { + COMPREPLY=($(compgen -W "$(runc list -q)" -- $cur)) +} + +__runc_pos_first_nonflag() { + local argument_flags=$1 + + local counter=$((${subcommand_pos:-${command_pos}} + 1)) + while [ $counter -le $cword ]; do + if [ -n "$argument_flags" ] && eval "case '${words[$counter]}' in $argument_flags) true ;; *) false ;; esac"; then + ((counter++)) + else + case "${words[$counter]}" in + -*) ;; + *) + break + ;; + esac + fi + ((counter++)) + done + + echo $counter +} + +# Transforms a multiline list of strings into a single line string +# with the words separated by "|". +# This is used to prepare arguments to __runc_pos_first_nonflag(). +__runc_to_alternatives() { + local parts=($1) + local IFS='|' + echo "${parts[*]}" +} + +# Transforms a multiline list of options into an extglob pattern +# suitable for use in case statements. +__runc_to_extglob() { + local extglob=$(__runc_to_alternatives "$1") + echo "@($extglob)" +} + +# Subcommand processing. +# Locates the first occurrence of any of the subcommands contained in the +# first argument. In case of a match, calls the corresponding completion +# function and returns 0. +# If no match is found, 1 is returned. The calling function can then +# continue processing its completion. +# +# TODO if the preceding command has options that accept arguments and an +# argument is equal to one of the subcommands, this is falsely detected as +# a match. +__runc_subcommands() { + local subcommands="$1" + + local counter=$(($command_pos + 1)) + while [ $counter -lt $cword ]; do + case "${words[$counter]}" in + $(__runc_to_extglob "$subcommands")) + subcommand_pos=$counter + local subcommand=${words[$counter]} + local completions_func=_runc_${command}_${subcommand} + declare -F $completions_func >/dev/null && $completions_func + return 0 + ;; + esac + ((counter++)) + done + return 1 +} + +# List all Signals +__runc_list_signals() { + COMPREPLY=($(compgen -W "$(for i in $(kill -l | xargs); do echo $i; done | grep SIG)")) +} + +# suppress trailing whitespace +__runc_nospace() { + # compopt is not available in ancient bash versions + type compopt &>/dev/null && compopt -o nospace +} + +# The list of capabilities is defined in types.go, ALL was added manually. +__runc_complete_capabilities() { + COMPREPLY=($(compgen -W " + ALL + AUDIT_CONTROL + AUDIT_WRITE + AUDIT_READ + BLOCK_SUSPEND + CHOWN + DAC_OVERRIDE + DAC_READ_SEARCH + FOWNER + FSETID + IPC_LOCK + IPC_OWNER + KILL + LEASE + LINUX_IMMUTABLE + MAC_ADMIN + MAC_OVERRIDE + MKNOD + NET_ADMIN + NET_BIND_SERVICE + NET_BROADCAST + NET_RAW + SETFCAP + SETGID + SETPCAP + SETUID + SYS_ADMIN + SYS_BOOT + SYS_CHROOT + SYSLOG + SYS_MODULE + SYS_NICE + SYS_PACCT + SYS_PTRACE + SYS_RAWIO + SYS_RESOURCE + SYS_TIME + SYS_TTY_CONFIG + WAKE_ALARM + " -- "$cur")) +} + +_runc_exec() { + local boolean_options=" + --help + --no-new-privs + --tty, -t + --detach, -d + " + + local options_with_args=" + --console-socket + --cwd + --env, -e + --user, -u + --additional-gids, -g + --process, -p + --pid-file + --process-label + --apparmor + --cap, -c + --preserve-fds + " + + local all_options="$options_with_args $boolean_options" + + case "$prev" in + --cap | -c) + __runc_complete_capabilities + return + ;; + + --console-socket | --cwd | --process | --apparmor) + case "$cur" in + *:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine) + '') + COMPREPLY=($(compgen -W '/' -- "$cur")) + __runc_nospace + ;; + /*) + _filedir + __runc_nospace + ;; + esac + return + ;; + --env | -e) + COMPREPLY=($(compgen -e -- "$cur")) + __runc_nospace + return + ;; + $(__runc_to_extglob "$options_with_args")) + return + ;; + esac + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$all_options" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} + +# global options that may appear after the runc command +_runc_runc() { + local boolean_options=" + $global_boolean_options + --help + --version -v + --debug + " + local options_with_args=" + --log + --log-format + --root + --criu + --rootless + " + + case "$prev" in + --log | --root | --criu) + case "$cur" in + *:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine) + '') + COMPREPLY=($(compgen -W '/' -- "$cur")) + __runc_nospace + ;; + *) + _filedir + __runc_nospace + ;; + esac + return + ;; + + --log-format) + COMPREPLY=($(compgen -W 'text json' -- "$cur")) + return + ;; + + $(__runc_to_extglob "$options_with_args")) + return + ;; + esac + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + local counter=$(__runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args")) + if [ $cword -eq $counter ]; then + COMPREPLY=($(compgen -W "${commands[*]} help" -- "$cur")) + fi + ;; + esac +} + +_runc_pause() { + local boolean_options=" + --help + -h + " + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} + +_runc_ps() { + local boolean_options=" + --help + -h + " + local options_with_args=" + --format, -f + " + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} + +_runc_delete() { + local boolean_options=" + --help + -h + --format, -f + " + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} + +_runc_kill() { + local boolean_options=" + --help + -h + --all + -a + " + + case "$prev" in + "kill") + __runc_list_all + return + ;; + *) + __runc_list_signals + return + ;; + esac + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} + +_runc_events() { + local boolean_options=" + --help + --stats + " + + local options_with_args=" + --interval + " + + case "$prev" in + $(__runc_to_extglob "$options_with_args")) + return + ;; + esac + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} + +_runc_list() { + local boolean_options=" + --help + --quiet + -q + " + + local options_with_args=" + --format + -f + " + + case "$prev" in + --format | -f) + COMPREPLY=($(compgen -W 'text json' -- "$cur")) + return + ;; + + $(__runc_to_extglob "$options_with_args")) + return + ;; + esac + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + local counter=$(__runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args")) + ;; + esac +} + +_runc_spec() { + local boolean_options=" + --help + --rootless + " + + local options_with_args=" + --bundle + -b + " + + case "$prev" in + --bundle | -b) + case "$cur" in + '') + COMPREPLY=($(compgen -W '/' -- "$cur")) + __runc_nospace + ;; + /*) + _filedir + __runc_nospace + ;; + esac + return + ;; + + $(__runc_to_extglob "$options_with_args")) + return + ;; + esac + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + local counter=$(__runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args")) + ;; + esac +} + +_runc_run() { + local boolean_options=" + --help + --detatch + -d + --no-subreaper + --no-pivot + --no-new-keyring + " + + local options_with_args=" + --bundle + -b + --console-socket + --pid-file + --preserve-fds + " + + case "$prev" in + --bundle | -b | --console-socket | --pid-file) + case "$cur" in + '') + COMPREPLY=($(compgen -W '/' -- "$cur")) + __runc_nospace + ;; + /*) + _filedir + __runc_nospace + ;; + esac + return + ;; + + $(__runc_to_extglob "$options_with_args")) + return + ;; + esac + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} + +_runc_checkpoint() { + local boolean_options=" + --help + -h + --leave-running + --tcp-established + --ext-unix-sk + --shell-job + --lazy-pages + --file-locks + --pre-dump + --auto-dedup + " + + local options_with_args=" + --image-path + --work-path + --parent-path + --status-fd + --page-server + --manage-cgroups-mode + --empty-ns + " + + case "$prev" in + --page-server) ;; + + --manage-cgroups-mode) + COMPREPLY=($(compgen -W "soft full strict" -- "$cur")) + return + ;; + + --image-path | --work-path | --parent-path) + case "$cur" in + *:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine) + '') + COMPREPLY=($(compgen -W '/' -- "$cur")) + __runc_nospace + ;; + *) + _filedir + __runc_nospace + ;; + esac + return + ;; + + $(__runc_to_extglob "$options_with_args")) + return + ;; + esac + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} +_runc_create() { + local boolean_options=" + --help + --no-pivot + --no-new-keyring + " + + local options_with_args=" + --bundle + -b + --console-socket + --pid-file + --preserve-fds + " + case "$prev" in + --bundle | -b | --console-socket | --pid-file) + case "$cur" in + '') + COMPREPLY=($(compgen -W '/' -- "$cur")) + __runc_nospace + ;; + /*) + _filedir + __runc_nospace + ;; + esac + return + ;; + + $(__runc_to_extglob "$options_with_args")) + return + ;; + esac + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac + +} + +_runc_help() { + local counter=$(__runc_pos_first_nonflag) + if [ $cword -eq $counter ]; then + COMPREPLY=($(compgen -W "${commands[*]}" -- "$cur")) + fi +} + +_runc_restore() { + local boolean_options=" + --help + --tcp-established + --ext-unix-sk + --shell-job + --file-locks + --detach + -d + --no-subreaper + --no-pivot + --auto-dedup + --lazy-pages + " + + local options_with_args=" + -b + --bundle + --image-path + --work-path + --manage-cgroups-mode + --pid-file + --empty-ns + " + + local all_options="$options_with_args $boolean_options" + + case "$prev" in + --manage-cgroups-mode) + COMPREPLY=($(compgen -W "soft full strict" -- "$cur")) + return + ;; + + --pid-file | --image-path | --work-path | --bundle | -b) + case "$cur" in + *:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine) + '') + COMPREPLY=($(compgen -W '/' -- "$cur")) + __runc_nospace + ;; + /*) + _filedir + __runc_nospace + ;; + esac + return + ;; + + $(__runc_to_extglob "$options_with_args")) + return + ;; + esac + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$all_options" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} + +_runc_resume() { + local boolean_options=" + --help + -h + " + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} + +_runc_state() { + local boolean_options=" + --help + -h + " + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} +_runc_start() { + local boolean_options=" + --help + -h + " + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} +_runc_update() { + local boolean_options=" + --help + " + + local options_with_args=" + --blkio-weight + --cpu-period + --cpu-quota + --cpu-rt-period + --cpu-rt-runtime + --cpu-share + --cpuset-cpus + --cpuset-mems + --kernel-memory + --kernel-memory-tcp + --memory + --memory-reservation + --memory-swap + --pids-limit + --l3-cache-schema + --mem-bw-schema + " + + case "$prev" in + $(__runc_to_extglob "$options_with_args")) + return + ;; + esac + + case "$cur" in + -*) + COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) + ;; + *) + __runc_list_all + ;; + esac +} + +_runc() { + local previous_extglob_setting=$(shopt -p extglob) + shopt -s extglob + + local commands=( + checkpoint + create + delete + events + exec + init + kill + list + pause + ps + restore + resume + run + spec + start + state + update + help + h + ) + + # These options are valid as global options for all client commands + # and valid as command options for `runc daemon` + local global_boolean_options=" + --help -h + --version -v + " + + COMPREPLY=() + local cur prev words cword + _get_comp_words_by_ref -n : cur prev words cword + + local command='runc' command_pos=0 subcommand_pos + local counter=1 + while [ $counter -lt $cword ]; do + case "${words[$counter]}" in + -*) ;; + =) + ((counter++)) + ;; + *) + command="${words[$counter]}" + command_pos=$counter + break + ;; + esac + ((counter++)) + done + + local completions_func=_runc_${command} + declare -F $completions_func >/dev/null && $completions_func + + eval "$previous_extglob_setting" + return 0 +} + +eval "$__runc_previous_extglob_setting" +unset __runc_previous_extglob_setting + +complete -F _runc runc diff --git a/create.go b/create.go new file mode 100644 index 0000000..5f3ac60 --- /dev/null +++ b/create.go @@ -0,0 +1,74 @@ +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +var createCommand = cli.Command{ + Name: "create", + Usage: "create a container", + ArgsUsage: ` + +Where "" is your name for the instance of the container that you +are starting. The name you provide for the container instance must be unique on +your host.`, + Description: `The create command creates an instance of a container for a bundle. The bundle +is a directory with a specification file named "` + specConfig + `" and a root +filesystem. + +The specification file includes an args parameter. The args parameter is used +to specify command(s) that get run when the container is started. To change the +command(s) that get executed on start, edit the args parameter of the spec. See +"runc spec --help" for more explanation.`, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "bundle, b", + Value: "", + Usage: `path to the root of the bundle directory, defaults to the current directory`, + }, + cli.StringFlag{ + Name: "console-socket", + Value: "", + Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal", + }, + cli.StringFlag{ + Name: "pid-file", + Value: "", + Usage: "specify the file to write the process id to", + }, + cli.BoolFlag{ + Name: "no-pivot", + Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk", + }, + cli.BoolFlag{ + Name: "no-new-keyring", + Usage: "do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key", + }, + cli.IntFlag{ + Name: "preserve-fds", + Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)", + }, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, exactArgs); err != nil { + return err + } + if err := revisePidFile(context); err != nil { + return err + } + spec, err := setupSpec(context) + if err != nil { + return err + } + status, err := startContainer(context, spec, CT_ACT_CREATE, nil) + if err != nil { + return err + } + // exit with the container's exit status so any external supervisor is + // notified of the exit with the correct exit status. + os.Exit(status) + return nil + }, +} diff --git a/delete.go b/delete.go new file mode 100644 index 0000000..fb6f38e --- /dev/null +++ b/delete.go @@ -0,0 +1,89 @@ +// +build !solaris + +package main + +import ( + "fmt" + "os" + "path/filepath" + "syscall" + "time" + + "github.com/opencontainers/runc/libcontainer" + "github.com/urfave/cli" + + "golang.org/x/sys/unix" +) + +func killContainer(container libcontainer.Container) error { + _ = container.Signal(unix.SIGKILL, false) + for i := 0; i < 100; i++ { + time.Sleep(100 * time.Millisecond) + if err := container.Signal(syscall.Signal(0), false); err != nil { + destroy(container) + return nil + } + } + return fmt.Errorf("container init still running") +} + +var deleteCommand = cli.Command{ + Name: "delete", + Usage: "delete any resources held by the container often used with detached container", + ArgsUsage: ` + +Where "" is the name for the instance of the container. + +EXAMPLE: +For example, if the container id is "ubuntu01" and runc list currently shows the +status of "ubuntu01" as "stopped" the following will delete resources held for +"ubuntu01" removing "ubuntu01" from the runc list of containers: + + # runc delete ubuntu01`, + Flags: []cli.Flag{ + cli.BoolFlag{ + Name: "force, f", + Usage: "Forcibly deletes the container if it is still running (uses SIGKILL)", + }, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, exactArgs); err != nil { + return err + } + + id := context.Args().First() + force := context.Bool("force") + container, err := getContainer(context) + if err != nil { + if lerr, ok := err.(libcontainer.Error); ok && lerr.Code() == libcontainer.ContainerNotExists { + // if there was an aborted start or something of the sort then the container's directory could exist but + // libcontainer does not see it because the state.json file inside that directory was never created. + path := filepath.Join(context.GlobalString("root"), id) + if e := os.RemoveAll(path); e != nil { + fmt.Fprintf(os.Stderr, "remove %s: %v\n", path, e) + } + if force { + return nil + } + } + return err + } + s, err := container.Status() + if err != nil { + return err + } + switch s { + case libcontainer.Stopped: + destroy(container) + case libcontainer.Created: + return killContainer(container) + default: + if force { + return killContainer(container) + } + return fmt.Errorf("cannot delete container %s that is not stopped: %s\n", id, s) + } + + return nil + }, +} diff --git a/docs/checkpoint-restore.md b/docs/checkpoint-restore.md new file mode 100644 index 0000000..80ec46b --- /dev/null +++ b/docs/checkpoint-restore.md @@ -0,0 +1,50 @@ +# Checkpoint and Restore # + +For a basic description about checkpointing and restoring containers with +`runc` please see [runc-checkpoint(8)](../man/runc-checkpoint.8.md) and +[runc-restore(8)](../man/runc-restore.8.md). + +## Checkpoint/Restore Annotations ## + +In addition to specifying options on the command-line like it is described +in the man-pages (see above), it is also possible to influence CRIU's +behaviour using CRIU configuration files. For details about CRIU's +configuration file support please see [CRIU's wiki](https://criu.org/Configuration_files). + +In addition to CRIU's default configuration files `runc` tells CRIU to +also evaluate the file `/etc/criu/runc.conf`. Using the annotation +`org.criu.config` it is, however, possible to change this additional +CRIU configuration file. + +If the annotation `org.criu.config` is set to an empty string `runc` +will not pass any additional configuration file to CRIU. With an empty +string it is therefore possible to disable the additional CRIU configuration +file. This can be used to make sure that no additional configuration file +changes CRIU's behaviour accidentally. + +If the annotation `org.criu.config` is set to a non-empty string `runc` will +pass that string to CRIU to be evaluated as an additional configuration file. +If CRIU cannot open this additional configuration file, it will ignore this +file and continue. + +### Annotation Example to disable additional CRIU configuration file ### + +``` +{ + "ociVersion": "1.0.0", + "annotations": { + "org.criu.config": "" + }, + "process": { +``` + +### Annotation Example to set a specific CRIU configuration file ### + +``` +{ + "ociVersion": "1.0.0", + "annotations": { + "org.criu.config": "/etc/special-runc-criu-options" + }, + "process": { +``` diff --git a/docs/terminals.md b/docs/terminals.md new file mode 100644 index 0000000..fc000e1 --- /dev/null +++ b/docs/terminals.md @@ -0,0 +1,314 @@ +# Terminals and Standard IO # + +*Note that the default configuration of `runc` (foreground, new terminal) is +generally the best option for most users. This document exists to help explain +what the purpose of the different modes is, and to try to steer users away from +common mistakes and misunderstandings.* + +In general, most processes on Unix (and Unix-like) operating systems have 3 +standard file descriptors provided at the start, collectively referred to as +"standard IO" (`stdio`): + +* `0`: standard-in (`stdin`), the input stream into the process +* `1`: standard-out (`stdout`), the output stream from the process +* `2`: standard-error (`stderr`), the error stream from the process + +When creating and running a container via `runc`, it is important to take care +to structure the `stdio` the new container's process receives. In some ways +containers are just regular processes, while in other ways they're an isolated +sub-partition of your machine (in a similar sense to a VM). This means that the +structure of IO is not as simple as with ordinary programs (which generally +just use the file descriptors you give them). + +## Other File Descriptors ## + +Before we continue, it is important to note that processes can have more file +descriptors than just `stdio`. By default in `runc` no other file descriptors +will be passed to the spawned container process. If you wish to explicitly pass +file descriptors to the container you have to use the `--preserve-fds` option. +These ancillary file descriptors don't have any of the strange semantics +discussed further in this document (those only apply to `stdio`) -- they are +passed untouched by `runc`. + +It should be noted that `--preserve-fds` does not take individual file +descriptors to preserve. Instead, it takes how many file descriptors (not +including `stdio` or `LISTEN_FDS`) should be passed to the container. In the +following example: + +``` +% runc run --preserve-fds 5 +``` + +`runc` will pass the first `5` file descriptors (`3`, `4`, `5`, `6`, and `7` -- +assuming that `LISTEN_FDS` has not been configured) to the container. + +In addition to `--preserve-fds`, `LISTEN_FDS` file descriptors are passed +automatically to allow for `systemd`-style socket activation. To extend the +above example: + +``` +% LISTEN_PID=$pid_of_runc LISTEN_FDS=3 runc run --preserve-fds 5 +``` + +`runc` will now pass the first `8` file descriptors (and it will also pass +`LISTEN_FDS=3` and `LISTEN_PID=1` to the container). The first `3` (`3`, `4`, +and `5`) were passed due to `LISTEN_FDS` and the other `5` (`6`, `7`, `8`, `9`, +and `10`) were passed due to `--preserve-fds`. You should keep this in mind if +you use `runc` directly in something like a `systemd` unit file. To disable +this `LISTEN_FDS`-style passing just unset `LISTEN_FDS`. + +**Be very careful when passing file descriptors to a container process.** Due +to some Linux kernel (mis)features, a container with access to certain types of +file descriptors (such as `O_PATH` descriptors) outside of the container's root +file system can use these to break out of the container's pivoted mount +namespace. [This has resulted in CVEs in the past.][CVE-2016-9962] + +[CVE-2016-9962]: https://nvd.nist.gov/vuln/detail/CVE-2016-9962 + +## Terminal Modes ## + +`runc` supports two distinct methods for passing `stdio` to the container's +primary process: + +* [new terminal](#new-terminal) (`terminal: true`) +* [pass-through](#pass-through) (`terminal: false`) + +When first using `runc` these two modes will look incredibly similar, but this +can be quite deceptive as these different modes have quite different +characteristics. + +By default, `runc spec` will create a configuration that will create a new +terminal (`terminal: true`). However, if the `terminal: ...` line is not +present in `config.json` then pass-through is the default. + +*In general we recommend using new terminal, because it means that tools like +`sudo` will work inside your container. But pass-through can be useful if you +know what you're doing, or if you're using `runc` as part of a non-interactive +pipeline.* + +### New Terminal ### + +In new terminal mode, `runc` will create a brand-new "console" (or more +precisely, a new pseudo-terminal using the container's namespaced +`/dev/pts/ptmx`) for your contained process to use as its `stdio`. + +When you start a process in new terminal mode, `runc` will do the following: + +1. Create a new pseudo-terminal. +2. Pass the slave end to the container's primary process as its `stdio`. +3. Send the master end to a process to interact with the `stdio` for the + container's primary process ([details below](#runc-modes)). + +It should be noted that since a new pseudo-terminal is being used for +communication with the container, some strange properties of pseudo-terminals +might surprise you. For instance, by default, all new pseudo-terminals +translate the byte `'\n'` to the sequence `'\r\n'` on both `stdout` and +`stderr`. In addition there are [a whole range of `ioctls(2)` that can only +interact with pseudo-terminal `stdio`][tty_ioctl(4)]. + +> **NOTE**: In new terminal mode, all three `stdio` file descriptors are the +> same underlying file. The reason for this is to match how a shell's `stdio` +> looks to a process (as well as remove race condition issues with having to +> deal with multiple master pseudo-terminal file descriptors). However this +> means that it is not really possible to uniquely distinguish between `stdout` +> and `stderr` from the caller's perspective. + +[tty_ioctl(4)]: https://linux.die.net/man/4/tty_ioctl + +### Pass-Through ### + +If you have already set up some file handles that you wish your contained +process to use as its `stdio`, then you can ask `runc` to pass them through to +the contained process (this is not necessarily the same as `--preserve-fds`'s +passing of file descriptors -- [details below](#runc-modes)). As an example +(assuming that `terminal: false` is set in `config.json`): + +``` +% echo input | runc run some_container > /tmp/log.out 2>& /tmp/log.err +``` + +Here the container's various `stdio` file descriptors will be substituted with +the following: + +* `stdin` will be sourced from the `echo input` pipeline. +* `stdout` will be output into `/tmp/log.out` on the host. +* `stderr` will be output into `/tmp/log.err` on the host. + +It should be noted that the actual file handles seen inside the container may +be different [based on the mode `runc` is being used in](#runc-modes) (for +instance, the file referenced by `1` could be `/tmp/log.out` directly or a pipe +which `runc` is using to buffer output, based on the mode). However the net +result will be the same in either case. In principle you could use the [new +terminal mode](#new-terminal) in a pipeline, but the difference will become +more clear when you are introduced to [`runc`'s detached mode](#runc-modes). + +## `runc` Modes ## + +`runc` itself runs in two modes: + +* [foreground](#foreground) +* [detached](#detached) + +You can use either [terminal mode](#terminal-modes) with either `runc` mode. +However, there are considerations that may indicate preference for one mode +over another. It should be noted that while two types of modes (terminal and +`runc`) are conceptually independent from each other, you should be aware of +the intricacies of which combination you are using. + +*In general we recommend using foreground because it's the most +straight-forward to use, with the only downside being that you will have a +long-running `runc` process. Detached mode is difficult to get right and +generally requires having your own `stdio` management.* + +### Foreground ### + +The default (and most straight-forward) mode of `runc`. In this mode, your +`runc` command remains in the foreground with the container process as a child. +All `stdio` is buffered through the foreground `runc` process (irrespective of +which terminal mode you are using). This is conceptually quite similar to +running a normal process interactively in a shell (and if you are using `runc` +in a shell interactively, this is what you should use). + +Because the `stdio` will be buffered in this mode, some very important +peculiarities of this mode should be kept in mind: + +* With [new terminal mode](#new-terminal), the container will see a + pseudo-terminal as its `stdio` (as you might expect). However, the `stdio` of + the foreground `runc` process will remain the `stdio` that the process was + started with -- and `runc` will copy all `stdio` between its `stdio` and the + container's `stdio`. This means that while a new pseudo-terminal has been + created, the foreground `runc` process manages it over the lifetime of the + container. + +* With [pass-through mode](#pass-through), the foreground `runc`'s `stdio` is + **not** passed to the container. Instead, the container's `stdio` is a set of + pipes which are used to copy data between `runc`'s `stdio` and the + container's `stdio`. This means that the container never has direct access to + host file descriptors (aside from the pipes created by the container runtime, + but that shouldn't be an issue). + +The main drawback of the foreground mode of operation is that it requires a +long-running foreground `runc` process. If you kill the foreground `runc` +process then you will no longer have access to the `stdio` of the container +(and in most cases this will result in the container dying abnormally due to +`SIGPIPE` or some other error). By extension this means that any bug in the +long-running foreground `runc` process (such as a memory leak) or a stray +OOM-kill sweep could result in your container being killed **through no fault +of the user**. In addition, there is no way in foreground mode of passing a +file descriptor directly to the container process as its `stdio` (like +`--preserve-fds` does). + +These shortcomings are obviously sub-optimal and are the reason that `runc` has +an additional mode called "detached mode". + +### Detached ### + +In contrast to foreground mode, in detached mode there is no long-running +foreground `runc` process once the container has started. In fact, there is no +long-running `runc` process at all. However, this means that it is up to the +caller to handle the `stdio` after `runc` has set it up for you. In a shell +this means that the `runc` command will exit and control will return to the +shell, after the container has been set up. + +You can run `runc` in detached mode in one of the following ways: + +* `runc run -d ...` which operates similar to `runc run` but is detached. +* `runc create` followed by `runc start` which is the standard container + lifecycle defined by the OCI runtime specification (`runc create` sets up the + container completely, waiting for `runc start` to begin execution of user + code). + +The main use-case of detached mode is for higher-level tools that want to be +wrappers around `runc`. By running `runc` in detached mode, those tools have +far more control over the container's `stdio` without `runc` getting in the +way (most wrappers around `runc` like `cri-o` or `containerd` use detached mode +for this reason). + +Unfortunately using detached mode is a bit more complicated and requires more +care than the foreground mode -- mainly because it is now up to the caller to +handle the `stdio` of the container. + +#### Detached Pass-Through #### + +In detached mode, pass-through actually does what it says on the tin -- the +`stdio` file descriptors of the `runc` process are passed through (untouched) +to the container's `stdio`. The purpose of this option is to allow a user to +set up `stdio` for a container themselves and then force `runc` to just use +their pre-prepared `stdio` (without any pseudo-terminal funny business). *If +you don't see why this would be useful, don't use this option.* + +**You must be incredibly careful when using detached pass-through (especially +in a shell).** The reason for this is that by using detached pass-through you +are passing host file descriptors to the container. In the case of a shell, +usually your `stdio` is going to be a pseudo-terminal (on your host). A +malicious container could take advantage of TTY-specific `ioctls` like +`TIOCSTI` to fake input into the **host** shell (remember that in detached +mode, control is returned to your shell and so the terminal you've given the +container is being read by a shell prompt). + +There are also several other issues with running non-malicious containers in a +shell with detached pass-through (where you pass your shell's `stdio` to the +container): + +* Output from the container will be interleaved with output from your shell (in + a non-deterministic way), without any real way of distinguishing from where a + particular piece of output came from. + +* Any input to `stdin` will be non-deterministically split and given to either + the container or the shell (because both are blocked on a `read(2)` of the + same FIFO-style file descriptor). + +They are all related to the fact that there is going to be a race when either +your host or the container tries to read from (or write to) `stdio`. This +problem is especially obvious when in a shell, where usually the terminal has +been put into raw mode (where each individual key-press should cause `read(2)` +to return). + +> **NOTE**: There is also currently a [known problem][issue-1721] where using +> detached pass-through will result in the container hanging if the `stdout` or +> `stderr` is a pipe (though this should be a temporary issue). + +[issue-1721]: https://github.com/opencontainers/runc/issues/1721 + +#### Detached New Terminal #### + +When creating a new pseudo-terminal in detached mode, and fairly obvious +problem appears -- how do we use the new terminal that `runc` created? Unlike +in pass-through, `runc` has created a new set of file descriptors that need to +be used by *something* in order for container communication to work. + +The way this problem is resolved is through the use of Unix domain sockets. +There is a feature of Unix sockets called `SCM_RIGHTS` which allows a file +descriptor to be sent through a Unix socket to a completely separate process +(which can then use that file descriptor as though they opened it). When using +`runc` in detached new terminal mode, this is how a user gets access to the +pseudo-terminal's master file descriptor. + +To this end, there is a new option (which is required if you want to use `runc` +in detached new terminal mode): `--console-socket`. This option takes the path +to a Unix domain socket which `runc` will connect to and send the +pseudo-terminal master file descriptor down. The general process for getting +the pseudo-terminal master is as follows: + +1. Create a Unix domain socket at some path, `$socket_path`. +2. Call `runc run` or `runc create` with the argument `--console-socket + $socket_path`. +3. Using `recvmsg(2)` retrieve the file descriptor sent using `SCM_RIGHTS` by + `runc`. +4. Now the manager can interact with the `stdio` of the container, using the + retrieved pseudo-terminal master. + +After `runc` exits, the only process with a copy of the pseudo-terminal master +file descriptor is whoever read the file descriptor from the socket. + +> **NOTE**: Currently `runc` doesn't support abstract socket addresses (due to +> it not being possible to pass an `argv` with a null-byte as the first +> character). In the future this may change, but currently you must use a valid +> path name. + +In order to help users make use of detached new terminal mode, we have provided +a [Go implementation in the `go-runc` bindings][containerd/go-runc.Socket], as +well as [a simple client][recvtty]. + +[containerd/go-runc.Socket]: https://godoc.org/github.com/containerd/go-runc#Socket +[recvtty]: /contrib/cmd/recvtty diff --git a/events.go b/events.go new file mode 100644 index 0000000..fb3f630 --- /dev/null +++ b/events.go @@ -0,0 +1,215 @@ +// +build linux + +package main + +import ( + "encoding/json" + "fmt" + "os" + "sync" + "time" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/types" + + "github.com/sirupsen/logrus" + "github.com/urfave/cli" +) + +var eventsCommand = cli.Command{ + Name: "events", + Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics", + ArgsUsage: ` + +Where "" is the name for the instance of the container.`, + Description: `The events command displays information about the container. By default the +information is displayed once every 5 seconds.`, + Flags: []cli.Flag{ + cli.DurationFlag{Name: "interval", Value: 5 * time.Second, Usage: "set the stats collection interval"}, + cli.BoolFlag{Name: "stats", Usage: "display the container's stats then exit"}, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, exactArgs); err != nil { + return err + } + container, err := getContainer(context) + if err != nil { + return err + } + duration := context.Duration("interval") + if duration <= 0 { + return fmt.Errorf("duration interval must be greater than 0") + } + status, err := container.Status() + if err != nil { + return err + } + if status == libcontainer.Stopped { + return fmt.Errorf("container with id %s is not running", container.ID()) + } + var ( + stats = make(chan *libcontainer.Stats, 1) + events = make(chan *types.Event, 1024) + group = &sync.WaitGroup{} + ) + group.Add(1) + go func() { + defer group.Done() + enc := json.NewEncoder(os.Stdout) + for e := range events { + if err := enc.Encode(e); err != nil { + logrus.Error(err) + } + } + }() + if context.Bool("stats") { + s, err := container.Stats() + if err != nil { + return err + } + events <- &types.Event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)} + close(events) + group.Wait() + return nil + } + go func() { + for range time.Tick(context.Duration("interval")) { + s, err := container.Stats() + if err != nil { + logrus.Error(err) + continue + } + stats <- s + } + }() + n, err := container.NotifyOOM() + if err != nil { + return err + } + for { + select { + case _, ok := <-n: + if ok { + // this means an oom event was received, if it is !ok then + // the channel was closed because the container stopped and + // the cgroups no longer exist. + events <- &types.Event{Type: "oom", ID: container.ID()} + } else { + n = nil + } + case s := <-stats: + events <- &types.Event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)} + } + if n == nil { + close(events) + break + } + } + group.Wait() + return nil + }, +} + +func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats { + cg := ls.CgroupStats + if cg == nil { + return nil + } + var s types.Stats + s.Pids.Current = cg.PidsStats.Current + s.Pids.Limit = cg.PidsStats.Limit + + s.CPU.Usage.Kernel = cg.CpuStats.CpuUsage.UsageInKernelmode + s.CPU.Usage.User = cg.CpuStats.CpuUsage.UsageInUsermode + s.CPU.Usage.Total = cg.CpuStats.CpuUsage.TotalUsage + s.CPU.Usage.Percpu = cg.CpuStats.CpuUsage.PercpuUsage + s.CPU.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods + s.CPU.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods + s.CPU.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime + + s.Memory.Cache = cg.MemoryStats.Cache + s.Memory.Kernel = convertMemoryEntry(cg.MemoryStats.KernelUsage) + s.Memory.KernelTCP = convertMemoryEntry(cg.MemoryStats.KernelTCPUsage) + s.Memory.Swap = convertMemoryEntry(cg.MemoryStats.SwapUsage) + s.Memory.Usage = convertMemoryEntry(cg.MemoryStats.Usage) + s.Memory.Raw = cg.MemoryStats.Stats + + s.Blkio.IoServiceBytesRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceBytesRecursive) + s.Blkio.IoServicedRecursive = convertBlkioEntry(cg.BlkioStats.IoServicedRecursive) + s.Blkio.IoQueuedRecursive = convertBlkioEntry(cg.BlkioStats.IoQueuedRecursive) + s.Blkio.IoServiceTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceTimeRecursive) + s.Blkio.IoWaitTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoWaitTimeRecursive) + s.Blkio.IoMergedRecursive = convertBlkioEntry(cg.BlkioStats.IoMergedRecursive) + s.Blkio.IoTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoTimeRecursive) + s.Blkio.SectorsRecursive = convertBlkioEntry(cg.BlkioStats.SectorsRecursive) + + s.Hugetlb = make(map[string]types.Hugetlb) + for k, v := range cg.HugetlbStats { + s.Hugetlb[k] = convertHugtlb(v) + } + + if is := ls.IntelRdtStats; is != nil { + if intelrdt.IsCatEnabled() { + s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo) + s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot + s.IntelRdt.L3CacheSchema = is.L3CacheSchema + } + if intelrdt.IsMbaEnabled() { + s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo) + s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot + s.IntelRdt.MemBwSchema = is.MemBwSchema + } + } + + s.NetworkInterfaces = ls.Interfaces + return &s +} + +func convertHugtlb(c cgroups.HugetlbStats) types.Hugetlb { + return types.Hugetlb{ + Usage: c.Usage, + Max: c.MaxUsage, + Failcnt: c.Failcnt, + } +} + +func convertMemoryEntry(c cgroups.MemoryData) types.MemoryEntry { + return types.MemoryEntry{ + Limit: c.Limit, + Usage: c.Usage, + Max: c.MaxUsage, + Failcnt: c.Failcnt, + } +} + +func convertBlkioEntry(c []cgroups.BlkioStatEntry) []types.BlkioEntry { + var out []types.BlkioEntry + for _, e := range c { + out = append(out, types.BlkioEntry{ + Major: e.Major, + Minor: e.Minor, + Op: e.Op, + Value: e.Value, + }) + } + return out +} + +func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *types.L3CacheInfo { + return &types.L3CacheInfo{ + CbmMask: i.CbmMask, + MinCbmBits: i.MinCbmBits, + NumClosids: i.NumClosids, + } +} + +func convertMemBwInfo(i *intelrdt.MemBwInfo) *types.MemBwInfo { + return &types.MemBwInfo{ + BandwidthGran: i.BandwidthGran, + DelayLinear: i.DelayLinear, + MinBandwidth: i.MinBandwidth, + NumClosids: i.NumClosids, + } +} diff --git a/exec.go b/exec.go new file mode 100644 index 0000000..b963d68 --- /dev/null +++ b/exec.go @@ -0,0 +1,235 @@ +// +build linux + +package main + +import ( + "encoding/json" + "fmt" + "os" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/urfave/cli" +) + +var execCommand = cli.Command{ + Name: "exec", + Usage: "execute new process inside the container", + ArgsUsage: ` [command options] || -p process.json + +Where "" is the name for the instance of the container and +"" is the command to be executed in the container. +"" can't be empty unless a "-p" flag provided. + +EXAMPLE: +For example, if the container is configured to run the linux ps command the +following will output a list of processes running in the container: + + # runc exec ps`, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "console-socket", + Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal", + }, + cli.StringFlag{ + Name: "cwd", + Usage: "current working directory in the container", + }, + cli.StringSliceFlag{ + Name: "env, e", + Usage: "set environment variables", + }, + cli.BoolFlag{ + Name: "tty, t", + Usage: "allocate a pseudo-TTY", + }, + cli.StringFlag{ + Name: "user, u", + Usage: "UID (format: [:])", + }, + cli.Int64SliceFlag{ + Name: "additional-gids, g", + Usage: "additional gids", + }, + cli.StringFlag{ + Name: "process, p", + Usage: "path to the process.json", + }, + cli.BoolFlag{ + Name: "detach,d", + Usage: "detach from the container's process", + }, + cli.StringFlag{ + Name: "pid-file", + Value: "", + Usage: "specify the file to write the process id to", + }, + cli.StringFlag{ + Name: "process-label", + Usage: "set the asm process label for the process commonly used with selinux", + }, + cli.StringFlag{ + Name: "apparmor", + Usage: "set the apparmor profile for the process", + }, + cli.BoolFlag{ + Name: "no-new-privs", + Usage: "set the no new privileges value for the process", + }, + cli.StringSliceFlag{ + Name: "cap, c", + Value: &cli.StringSlice{}, + Usage: "add a capability to the bounding set for the process", + }, + cli.BoolFlag{ + Name: "no-subreaper", + Usage: "disable the use of the subreaper used to reap reparented processes", + Hidden: true, + }, + cli.IntFlag{ + Name: "preserve-fds", + Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)", + }, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, minArgs); err != nil { + return err + } + if err := revisePidFile(context); err != nil { + return err + } + status, err := execProcess(context) + if err == nil { + os.Exit(status) + } + return fmt.Errorf("exec failed: %v", err) + }, + SkipArgReorder: true, +} + +func execProcess(context *cli.Context) (int, error) { + container, err := getContainer(context) + if err != nil { + return -1, err + } + status, err := container.Status() + if err != nil { + return -1, err + } + if status == libcontainer.Stopped { + return -1, fmt.Errorf("cannot exec a container that has stopped") + } + path := context.String("process") + if path == "" && len(context.Args()) == 1 { + return -1, fmt.Errorf("process args cannot be empty") + } + detach := context.Bool("detach") + state, err := container.State() + if err != nil { + return -1, err + } + bundle := utils.SearchLabels(state.Config.Labels, "bundle") + p, err := getProcess(context, bundle) + if err != nil { + return -1, err + } + + logLevel := "info" + if context.GlobalBool("debug") { + logLevel = "debug" + } + + r := &runner{ + enableSubreaper: false, + shouldDestroy: false, + container: container, + consoleSocket: context.String("console-socket"), + detach: detach, + pidFile: context.String("pid-file"), + action: CT_ACT_RUN, + init: false, + preserveFDs: context.Int("preserve-fds"), + logLevel: logLevel, + } + return r.run(p) +} + +func getProcess(context *cli.Context, bundle string) (*specs.Process, error) { + if path := context.String("process"); path != "" { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + var p specs.Process + if err := json.NewDecoder(f).Decode(&p); err != nil { + return nil, err + } + return &p, validateProcessSpec(&p) + } + // process via cli flags + if err := os.Chdir(bundle); err != nil { + return nil, err + } + spec, err := loadSpec(specConfig) + if err != nil { + return nil, err + } + p := spec.Process + p.Args = context.Args()[1:] + // override the cwd, if passed + if context.String("cwd") != "" { + p.Cwd = context.String("cwd") + } + if ap := context.String("apparmor"); ap != "" { + p.ApparmorProfile = ap + } + if l := context.String("process-label"); l != "" { + p.SelinuxLabel = l + } + if caps := context.StringSlice("cap"); len(caps) > 0 { + for _, c := range caps { + p.Capabilities.Bounding = append(p.Capabilities.Bounding, c) + p.Capabilities.Inheritable = append(p.Capabilities.Inheritable, c) + p.Capabilities.Effective = append(p.Capabilities.Effective, c) + p.Capabilities.Permitted = append(p.Capabilities.Permitted, c) + p.Capabilities.Ambient = append(p.Capabilities.Ambient, c) + } + } + // append the passed env variables + p.Env = append(p.Env, context.StringSlice("env")...) + + // set the tty + if context.IsSet("tty") { + p.Terminal = context.Bool("tty") + } + if context.IsSet("no-new-privs") { + p.NoNewPrivileges = context.Bool("no-new-privs") + } + // override the user, if passed + if context.String("user") != "" { + u := strings.SplitN(context.String("user"), ":", 2) + if len(u) > 1 { + gid, err := strconv.Atoi(u[1]) + if err != nil { + return nil, fmt.Errorf("parsing %s as int for gid failed: %v", u[1], err) + } + p.User.GID = uint32(gid) + } + uid, err := strconv.Atoi(u[0]) + if err != nil { + return nil, fmt.Errorf("parsing %s as int for uid failed: %v", u[0], err) + } + p.User.UID = uint32(uid) + } + for _, gid := range context.Int64Slice("additional-gids") { + if gid < 0 { + return nil, fmt.Errorf("additional-gids must be a positive number %d", gid) + } + p.User.AdditionalGids = append(p.User.AdditionalGids, uint32(gid)) + } + return p, validateProcessSpec(p) +} diff --git a/init.go b/init.go new file mode 100644 index 0000000..08351fd --- /dev/null +++ b/init.go @@ -0,0 +1,50 @@ +package main + +import ( + "fmt" + "os" + "runtime" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/logs" + _ "github.com/opencontainers/runc/libcontainer/nsenter" + "github.com/sirupsen/logrus" + "github.com/urfave/cli" +) + +func init() { + if len(os.Args) > 1 && os.Args[1] == "init" { + runtime.GOMAXPROCS(1) + runtime.LockOSThread() + + level := os.Getenv("_LIBCONTAINER_LOGLEVEL") + logLevel, err := logrus.ParseLevel(level) + if err != nil { + panic(fmt.Sprintf("libcontainer: failed to parse log level: %q: %v", level, err)) + } + + err = logs.ConfigureLogging(logs.Config{ + LogPipeFd: os.Getenv("_LIBCONTAINER_LOGPIPE"), + LogFormat: "json", + LogLevel: logLevel, + }) + if err != nil { + panic(fmt.Sprintf("libcontainer: failed to configure logging: %v", err)) + } + logrus.Debug("child process in init()") + } +} + +var initCommand = cli.Command{ + Name: "init", + Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`, + Action: func(context *cli.Context) error { + factory, _ := libcontainer.New("") + if err := factory.StartInitialization(); err != nil { + // as the error is sent back to the parent there is no need to log + // or write it to stderr because the parent process will handle this + os.Exit(1) + } + panic("libcontainer: container init failed to exec") + }, +} diff --git a/kill.go b/kill.go new file mode 100644 index 0000000..c2d7929 --- /dev/null +++ b/kill.go @@ -0,0 +1,68 @@ +// +build linux + +package main + +import ( + "fmt" + "strconv" + "strings" + "syscall" + + "github.com/urfave/cli" +) + +var killCommand = cli.Command{ + Name: "kill", + Usage: "kill sends the specified signal (default: SIGTERM) to the container's init process", + ArgsUsage: ` [signal] + +Where "" is the name for the instance of the container and +"[signal]" is the signal to be sent to the init process. + +EXAMPLE: +For example, if the container id is "ubuntu01" the following will send a "KILL" +signal to the init process of the "ubuntu01" container: + + # runc kill ubuntu01 KILL`, + Flags: []cli.Flag{ + cli.BoolFlag{ + Name: "all, a", + Usage: "send the specified signal to all processes inside the container", + }, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, minArgs); err != nil { + return err + } + if err := checkArgs(context, 2, maxArgs); err != nil { + return err + } + container, err := getContainer(context) + if err != nil { + return err + } + + sigstr := context.Args().Get(1) + if sigstr == "" { + sigstr = "SIGTERM" + } + + signal, err := parseSignal(sigstr) + if err != nil { + return err + } + return container.Signal(signal, context.Bool("all")) + }, +} + +func parseSignal(rawSignal string) (syscall.Signal, error) { + s, err := strconv.Atoi(rawSignal) + if err == nil { + return syscall.Signal(s), nil + } + signal, ok := signalMap[strings.TrimPrefix(strings.ToUpper(rawSignal), "SIG")] + if !ok { + return -1, fmt.Errorf("unknown signal %q", rawSignal) + } + return signal, nil +} diff --git a/libcontainer/README.md b/libcontainer/README.md new file mode 100644 index 0000000..a791ca2 --- /dev/null +++ b/libcontainer/README.md @@ -0,0 +1,331 @@ +# libcontainer + +[![GoDoc](https://godoc.org/github.com/opencontainers/runc/libcontainer?status.svg)](https://godoc.org/github.com/opencontainers/runc/libcontainer) + +Libcontainer provides a native Go implementation for creating containers +with namespaces, cgroups, capabilities, and filesystem access controls. +It allows you to manage the lifecycle of the container performing additional operations +after the container is created. + + +#### Container +A container is a self contained execution environment that shares the kernel of the +host system and which is (optionally) isolated from other containers in the system. + +#### Using libcontainer + +Because containers are spawned in a two step process you will need a binary that +will be executed as the init process for the container. In libcontainer, we use +the current binary (/proc/self/exe) to be executed as the init process, and use +arg "init", we call the first step process "bootstrap", so you always need a "init" +function as the entry of "bootstrap". + +In addition to the go init function the early stage bootstrap is handled by importing +[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md). + +```go +import ( + _ "github.com/opencontainers/runc/libcontainer/nsenter" +) + +func init() { + if len(os.Args) > 1 && os.Args[1] == "init" { + runtime.GOMAXPROCS(1) + runtime.LockOSThread() + factory, _ := libcontainer.New("") + if err := factory.StartInitialization(); err != nil { + logrus.Fatal(err) + } + panic("--this line should have never been executed, congratulations--") + } +} +``` + +Then to create a container you first have to initialize an instance of a factory +that will handle the creation and initialization for a container. + +```go +factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init")) +if err != nil { + logrus.Fatal(err) + return +} +``` + +Once you have an instance of the factory created we can create a configuration +struct describing how the container is to be created. A sample would look similar to this: + +```go +defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV +config := &configs.Config{ + Rootfs: "/your/path/to/rootfs", + Capabilities: &configs.Capabilities{ + Bounding: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Effective: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Inheritable: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Permitted: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Ambient: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + }, + Namespaces: configs.Namespaces([]configs.Namespace{ + {Type: configs.NEWNS}, + {Type: configs.NEWUTS}, + {Type: configs.NEWIPC}, + {Type: configs.NEWPID}, + {Type: configs.NEWUSER}, + {Type: configs.NEWNET}, + {Type: configs.NEWCGROUP}, + }), + Cgroups: &configs.Cgroup{ + Name: "test-container", + Parent: "system", + Resources: &configs.Resources{ + MemorySwappiness: nil, + AllowAllDevices: nil, + AllowedDevices: configs.DefaultAllowedDevices, + }, + }, + MaskPaths: []string{ + "/proc/kcore", + "/sys/firmware", + }, + ReadonlyPaths: []string{ + "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", + }, + Devices: configs.DefaultAutoCreatedDevices, + Hostname: "testing", + Mounts: []*configs.Mount{ + { + Source: "proc", + Destination: "/proc", + Device: "proc", + Flags: defaultMountFlags, + }, + { + Source: "tmpfs", + Destination: "/dev", + Device: "tmpfs", + Flags: unix.MS_NOSUID | unix.MS_STRICTATIME, + Data: "mode=755", + }, + { + Source: "devpts", + Destination: "/dev/pts", + Device: "devpts", + Flags: unix.MS_NOSUID | unix.MS_NOEXEC, + Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", + }, + { + Device: "tmpfs", + Source: "shm", + Destination: "/dev/shm", + Data: "mode=1777,size=65536k", + Flags: defaultMountFlags, + }, + { + Source: "mqueue", + Destination: "/dev/mqueue", + Device: "mqueue", + Flags: defaultMountFlags, + }, + { + Source: "sysfs", + Destination: "/sys", + Device: "sysfs", + Flags: defaultMountFlags | unix.MS_RDONLY, + }, + }, + UidMappings: []configs.IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 65536, + }, + }, + GidMappings: []configs.IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 65536, + }, + }, + Networks: []*configs.Network{ + { + Type: "loopback", + Address: "127.0.0.1/0", + Gateway: "localhost", + }, + }, + Rlimits: []configs.Rlimit{ + { + Type: unix.RLIMIT_NOFILE, + Hard: uint64(1025), + Soft: uint64(1025), + }, + }, +} +``` + +Once you have the configuration populated you can create a container: + +```go +container, err := factory.Create("container-id", config) +if err != nil { + logrus.Fatal(err) + return +} +``` + +To spawn bash as the initial process inside the container and have the +processes pid returned in order to wait, signal, or kill the process: + +```go +process := &libcontainer.Process{ + Args: []string{"/bin/bash"}, + Env: []string{"PATH=/bin"}, + User: "daemon", + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, + Init: true, +} + +err := container.Run(process) +if err != nil { + container.Destroy() + logrus.Fatal(err) + return +} + +// wait for the process to finish. +_, err := process.Wait() +if err != nil { + logrus.Fatal(err) +} + +// destroy the container. +container.Destroy() +``` + +Additional ways to interact with a running container are: + +```go +// return all the pids for all processes running inside the container. +processes, err := container.Processes() + +// get detailed cpu, memory, io, and network statistics for the container and +// it's processes. +stats, err := container.Stats() + +// pause all processes inside the container. +container.Pause() + +// resume all paused processes. +container.Resume() + +// send signal to container's init process. +container.Signal(signal) + +// update container resource constraints. +container.Set(config) + +// get current status of the container. +status, err := container.Status() + +// get current container's state information. +state, err := container.State() +``` + + +#### Checkpoint & Restore + +libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers. +This let's you save the state of a process running inside a container to disk, and then restore +that state into a new process, on the same machine or on another machine. + +`criu` version 1.5.2 or higher is required to use checkpoint and restore. +If you don't already have `criu` installed, you can build it from source, following the +[online instructions](http://criu.org/Installation). `criu` is also installed in the docker image +generated when building libcontainer with docker. + + +## Copyright and license + +Code and documentation copyright 2014 Docker, inc. +The code and documentation are released under the [Apache 2.0 license](../LICENSE). +The documentation is also released under Creative Commons Attribution 4.0 International License. +You may obtain a copy of the license, titled CC-BY-4.0, at http://creativecommons.org/licenses/by/4.0/. diff --git a/libcontainer/SPEC.md b/libcontainer/SPEC.md new file mode 100644 index 0000000..07ebdc1 --- /dev/null +++ b/libcontainer/SPEC.md @@ -0,0 +1,465 @@ +## Container Specification - v1 + +This is the standard configuration for version 1 containers. It includes +namespaces, standard filesystem setup, a default Linux capability set, and +information about resource reservations. It also has information about any +populated environment settings for the processes running inside a container. + +Along with the configuration of how a container is created the standard also +discusses actions that can be performed on a container to manage and inspect +information about the processes running inside. + +The v1 profile is meant to be able to accommodate the majority of applications +with a strong security configuration. + +### System Requirements and Compatibility + +Minimum requirements: +* Kernel version - 3.10 recommended 2.6.2x minimum(with backported patches) +* Mounted cgroups with each subsystem in its own hierarchy + + +### Namespaces + +| Flag | Enabled | +| --------------- | ------- | +| CLONE_NEWPID | 1 | +| CLONE_NEWUTS | 1 | +| CLONE_NEWIPC | 1 | +| CLONE_NEWNET | 1 | +| CLONE_NEWNS | 1 | +| CLONE_NEWUSER | 1 | +| CLONE_NEWCGROUP | 1 | + +Namespaces are created for the container via the `unshare` syscall. + + +### Filesystem + +A root filesystem must be provided to a container for execution. The container +will use this root filesystem (rootfs) to jail and spawn processes inside where +the binaries and system libraries are local to that directory. Any binaries +to be executed must be contained within this rootfs. + +Mounts that happen inside the container are automatically cleaned up when the +container exits as the mount namespace is destroyed and the kernel will +unmount all the mounts that were setup within that namespace. + +For a container to execute properly there are certain filesystems that +are required to be mounted within the rootfs that the runtime will setup. + +| Path | Type | Flags | Data | +| ----------- | ------ | -------------------------------------- | ---------------------------------------- | +| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | | +| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 | +| /dev/shm | tmpfs | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k | +| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | | +| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid=5 | +| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | | + + +After a container's filesystems are mounted within the newly created +mount namespace `/dev` will need to be populated with a set of device nodes. +It is expected that a rootfs does not need to have any device nodes specified +for `/dev` within the rootfs as the container will setup the correct devices +that are required for executing a container's process. + +| Path | Mode | Access | +| ------------ | ---- | ---------- | +| /dev/null | 0666 | rwm | +| /dev/zero | 0666 | rwm | +| /dev/full | 0666 | rwm | +| /dev/tty | 0666 | rwm | +| /dev/random | 0666 | rwm | +| /dev/urandom | 0666 | rwm | + + +**ptmx** +`/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within +the container. + +The use of a pseudo TTY is optional within a container and it should support both. +If a pseudo is provided to the container `/dev/console` will need to be +setup by binding the console in `/dev/` after it has been populated and mounted +in tmpfs. + +| Source | Destination | UID GID | Mode | Type | +| --------------- | ------------ | ------- | ---- | ---- | +| *pty host path* | /dev/console | 0 0 | 0600 | bind | + + +After `/dev/null` has been setup we check for any external links between +the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing +to `/dev/null` outside the container we close and `dup2` the `/dev/null` +that is local to the container's rootfs. + + +After the container has `/proc` mounted a few standard symlinks are setup +within `/dev/` for the io. + +| Source | Destination | +| --------------- | ----------- | +| /proc/self/fd | /dev/fd | +| /proc/self/fd/0 | /dev/stdin | +| /proc/self/fd/1 | /dev/stdout | +| /proc/self/fd/2 | /dev/stderr | + +A `pivot_root` is used to change the root for the process, effectively +jailing the process inside the rootfs. + +```c +put_old = mkdir(...); +pivot_root(rootfs, put_old); +chdir("/"); +unmount(put_old, MS_DETACH); +rmdir(put_old); +``` + +For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined +with a `chroot` is required as `pivot_root` is not supported in `ramfs`. + +```c +mount(rootfs, "/", NULL, MS_MOVE, NULL); +chroot("."); +chdir("/"); +``` + +The `umask` is set back to `0022` after the filesystem setup has been completed. + +### Resources + +Cgroups are used to handle resource allocation for containers. This includes +system resources like cpu, memory, and device access. + +| Subsystem | Enabled | +| ---------- | ------- | +| devices | 1 | +| memory | 1 | +| cpu | 1 | +| cpuacct | 1 | +| cpuset | 1 | +| blkio | 1 | +| perf_event | 1 | +| freezer | 1 | +| hugetlb | 1 | +| pids | 1 | + + +All cgroup subsystem are joined so that statistics can be collected from +each of the subsystems. Freezer does not expose any stats but is joined +so that containers can be paused and resumed. + +The parent process of the container's init must place the init pid inside +the correct cgroups before the initialization begins. This is done so +that no processes or threads escape the cgroups. This sync is +done via a pipe ( specified in the runtime section below ) that the container's +init process will block waiting for the parent to finish setup. + +### IntelRdt + +Intel platforms with new Xeon CPU support Resource Director Technology (RDT). +Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are +two sub-features of RDT. + +Cache Allocation Technology (CAT) provides a way for the software to restrict +cache allocation to a defined 'subset' of L3 cache which may be overlapping +with other 'subsets'. The different subsets are identified by class of +service (CLOS) and each CLOS has a capacity bitmask (CBM). + +Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle +over memory bandwidth for the software. A user controls the resource by +indicating the percentage of maximum memory bandwidth or memory bandwidth limit +in MBps unit if MBA Software Controller is enabled. + +It can be used to handle L3 cache and memory bandwidth resources allocation +for containers if hardware and kernel support Intel RDT CAT and MBA features. + +In Linux 4.10 kernel or newer, the interface is defined and exposed via +"resource control" filesystem, which is a "cgroup-like" interface. + +Comparing with cgroups, it has similar process management lifecycle and +interfaces in a container. But unlike cgroups' hierarchy, it has single level +filesystem layout. + +CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via +"resource control" filesystem. + +Intel RDT "resource control" filesystem hierarchy: +``` +mount -t resctrl resctrl /sys/fs/resctrl +tree /sys/fs/resctrl +/sys/fs/resctrl/ +|-- info +| |-- L3 +| | |-- cbm_mask +| | |-- min_cbm_bits +| | |-- num_closids +| |-- MB +| |-- bandwidth_gran +| |-- delay_linear +| |-- min_bandwidth +| |-- num_closids +|-- ... +|-- schemata +|-- tasks +|-- + |-- ... + |-- schemata + |-- tasks +``` + +For runc, we can make use of `tasks` and `schemata` configuration for L3 +cache and memory bandwidth resources constraints. + +The file `tasks` has a list of tasks that belongs to this group (e.g., +" group). Tasks can be added to a group by writing the task ID +to the "tasks" file (which will automatically remove them from the previous +group to which they belonged). New tasks created by fork(2) and clone(2) are +added to the same group as their parent. + +The file `schemata` has a list of all the resources available to this group. +Each resource (L3 cache, memory bandwidth) has its own line and format. + +L3 cache schema: +It has allocation bitmasks/values for L3 cache on each socket, which +contains L3 cache id and capacity bitmask (CBM). +``` + Format: "L3:=;=;..." +``` +For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0" +which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + +The valid L3 cache CBM is a *contiguous bits set* and number of bits that can +be set is less than the max bit. The max bits in the CBM is varied among +supported Intel CPU models. Kernel will check if it is valid when writing. +e.g., default value 0xfffff in root indicates the max bits of CBM is 20 +bits, which mapping to entire L3 cache capacity. Some valid CBM values to +set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + +Memory bandwidth schema: +It has allocation values for memory bandwidth on each socket, which contains +L3 cache id and memory bandwidth. +``` + Format: "MB:=bandwidth0;=bandwidth1;..." +``` +For example, on a two-socket machine, the schema line could be "MB:0=20;1=70" + +The minimum bandwidth percentage value for each CPU model is predefined and +can be looked up through "info/MB/min_bandwidth". The bandwidth granularity +that is allocated is also dependent on the CPU model and can be looked up at +"info/MB/bandwidth_gran". The available bandwidth control steps are: +min_bw + N * bw_gran. Intermediate values are rounded to the next control +step available on the hardware. + +If MBA Software Controller is enabled through mount option "-o mba_MBps" +mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl +We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit +instead of "percentages". The kernel underneath would use a software feedback +mechanism or a "Software Controller" which reads the actual bandwidth using +MBM counters and adjust the memory bandwidth percentages to ensure: +"actual memory bandwidth < user specified memory bandwidth". + +For example, on a two-socket machine, the schema line could be +"MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0 +and 7000 MBps memory bandwidth limit on socket 1. + +For more information about Intel RDT kernel interface: +https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt + +``` +An example for runc: +Consider a two-socket machine with two L3 caches where the default CBM is +0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10% +with a memory bandwidth granularity of 10%. + +Tasks inside the container only have access to the "upper" 7/11 of L3 cache +on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a +maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. + +"linux": { + "intelRdt": { + "closID": "guaranteed_group", + "l3CacheSchema": "L3:0=7f0;1=1f", + "memBwSchema": "MB:0=20;1=70" + } +} +``` + +### Security + +The standard set of Linux capabilities that are set in a container +provide a good default for security and flexibility for the applications. + + +| Capability | Enabled | +| -------------------- | ------- | +| CAP_NET_RAW | 1 | +| CAP_NET_BIND_SERVICE | 1 | +| CAP_AUDIT_READ | 1 | +| CAP_AUDIT_WRITE | 1 | +| CAP_DAC_OVERRIDE | 1 | +| CAP_SETFCAP | 1 | +| CAP_SETPCAP | 1 | +| CAP_SETGID | 1 | +| CAP_SETUID | 1 | +| CAP_MKNOD | 1 | +| CAP_CHOWN | 1 | +| CAP_FOWNER | 1 | +| CAP_FSETID | 1 | +| CAP_KILL | 1 | +| CAP_SYS_CHROOT | 1 | +| CAP_NET_BROADCAST | 0 | +| CAP_SYS_MODULE | 0 | +| CAP_SYS_RAWIO | 0 | +| CAP_SYS_PACCT | 0 | +| CAP_SYS_ADMIN | 0 | +| CAP_SYS_NICE | 0 | +| CAP_SYS_RESOURCE | 0 | +| CAP_SYS_TIME | 0 | +| CAP_SYS_TTY_CONFIG | 0 | +| CAP_AUDIT_CONTROL | 0 | +| CAP_MAC_OVERRIDE | 0 | +| CAP_MAC_ADMIN | 0 | +| CAP_NET_ADMIN | 0 | +| CAP_SYSLOG | 0 | +| CAP_DAC_READ_SEARCH | 0 | +| CAP_LINUX_IMMUTABLE | 0 | +| CAP_IPC_LOCK | 0 | +| CAP_IPC_OWNER | 0 | +| CAP_SYS_PTRACE | 0 | +| CAP_SYS_BOOT | 0 | +| CAP_LEASE | 0 | +| CAP_WAKE_ALARM | 0 | +| CAP_BLOCK_SUSPEND | 0 | + + +Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor) +and [selinux](http://selinuxproject.org/page/Main_Page) can be used with +the containers. A container should support setting an apparmor profile or +selinux process and mount labels if provided in the configuration. + +Standard apparmor profile: +```c +#include +profile flags=(attach_disconnected,mediate_deleted) { + #include + network, + capability, + file, + umount, + + deny @{PROC}/sys/fs/** wklx, + deny @{PROC}/sysrq-trigger rwklx, + deny @{PROC}/mem rwklx, + deny @{PROC}/kmem rwklx, + deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx, + deny @{PROC}/sys/kernel/*/** wklx, + + deny mount, + + deny /sys/[^f]*/** wklx, + deny /sys/f[^s]*/** wklx, + deny /sys/fs/[^c]*/** wklx, + deny /sys/fs/c[^g]*/** wklx, + deny /sys/fs/cg[^r]*/** wklx, + deny /sys/firmware/efi/efivars/** rwklx, + deny /sys/kernel/security/** rwklx, +} +``` + +*TODO: seccomp work is being done to find a good default config* + +### Runtime and Init Process + +During container creation the parent process needs to talk to the container's init +process and have a form of synchronization. This is accomplished by creating +a pipe that is passed to the container's init. When the init process first spawns +it will block on its side of the pipe until the parent closes its side. This +allows the parent to have time to set the new process inside a cgroup hierarchy +and/or write any uid/gid mappings required for user namespaces. +The pipe is passed to the init process via FD 3. + +The application consuming libcontainer should be compiled statically. libcontainer +does not define any init process and the arguments provided are used to `exec` the +process inside the application. There should be no long running init within the +container spec. + +If a pseudo tty is provided to a container it will open and `dup2` the console +as the container's STDIN, STDOUT, STDERR as well as mounting the console +as `/dev/console`. + +An extra set of mounts are provided to a container and setup for use. A container's +rootfs can contain some non portable files inside that can cause side effects during +execution of a process. These files are usually created and populated with the container +specific information via the runtime. + +**Extra runtime files:** +* /etc/hosts +* /etc/resolv.conf +* /etc/hostname +* /etc/localtime + + +#### Defaults + +There are a few defaults that can be overridden by users, but in their omission +these apply to processes within a container. + +| Type | Value | +| ------------------- | ------------------------------ | +| Parent Death Signal | SIGKILL | +| UID | 0 | +| GID | 0 | +| GROUPS | 0, NULL | +| CWD | "/" | +| $HOME | Current user's home dir or "/" | +| Readonly rootfs | false | +| Pseudo TTY | false | + + +## Actions + +After a container is created there is a standard set of actions that can +be done to the container. These actions are part of the public API for +a container. + +| Action | Description | +| -------------- | ------------------------------------------------------------------ | +| Get processes | Return all the pids for processes running inside a container | +| Get Stats | Return resource statistics for the container as a whole | +| Wait | Waits on the container's init process ( pid 1 ) | +| Wait Process | Wait on any of the container's processes returning the exit status | +| Destroy | Kill the container's init process and remove any filesystem state | +| Signal | Send a signal to the container's init process | +| Signal Process | Send a signal to any of the container's processes | +| Pause | Pause all processes inside the container | +| Resume | Resume all processes inside the container if paused | +| Exec | Execute a new process inside of the container ( requires setns ) | +| Set | Setup configs of the container after it's created | + +### Execute a new process inside of a running container + +User can execute a new process inside of a running container. Any binaries to be +executed must be accessible within the container's rootfs. + +The started process will run inside the container's rootfs. Any changes +made by the process to the container's filesystem will persist after the +process finished executing. + +The started process will join all the container's existing namespaces. When the +container is paused, the process will also be paused and will resume when +the container is unpaused. The started process will only run when the container's +primary process (PID 1) is running, and will not be restarted when the container +is restarted. + +#### Planned additions + +The started process will have its own cgroups nested inside the container's +cgroups. This is used for process tracking and optionally resource allocation +handling for the new process. Freezer cgroup is required, the rest of the cgroups +are optional. The process executor must place its pid inside the correct +cgroups before starting the process. This is done so that no child processes or +threads can escape the cgroups. + +When the process is stopped, the process executor will try (in a best-effort way) +to stop all its children and remove the sub-cgroups. diff --git a/libcontainer/apparmor/apparmor.go b/libcontainer/apparmor/apparmor.go new file mode 100644 index 0000000..debfc1e --- /dev/null +++ b/libcontainer/apparmor/apparmor.go @@ -0,0 +1,60 @@ +// +build apparmor,linux + +package apparmor + +import ( + "fmt" + "io/ioutil" + "os" + + "github.com/opencontainers/runc/libcontainer/utils" +) + +// IsEnabled returns true if apparmor is enabled for the host. +func IsEnabled() bool { + if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" { + if _, err = os.Stat("/sbin/apparmor_parser"); err == nil { + buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") + return err == nil && len(buf) > 1 && buf[0] == 'Y' + } + } + return false +} + +func setProcAttr(attr, value string) error { + // Under AppArmor you can only change your own attr, so use /proc/self/ + // instead of /proc// like libapparmor does + path := fmt.Sprintf("/proc/self/attr/%s", attr) + + f, err := os.OpenFile(path, os.O_WRONLY, 0) + if err != nil { + return err + } + defer f.Close() + + if err := utils.EnsureProcHandle(f); err != nil { + return err + } + + _, err = fmt.Fprintf(f, "%s", value) + return err +} + +// changeOnExec reimplements aa_change_onexec from libapparmor in Go +func changeOnExec(name string) error { + value := "exec " + name + if err := setProcAttr("exec", value); err != nil { + return fmt.Errorf("apparmor failed to apply profile: %s", err) + } + return nil +} + +// ApplyProfile will apply the profile with the specified name to the process after +// the next exec. +func ApplyProfile(name string) error { + if name == "" { + return nil + } + + return changeOnExec(name) +} diff --git a/libcontainer/apparmor/apparmor_disabled.go b/libcontainer/apparmor/apparmor_disabled.go new file mode 100644 index 0000000..d4110cf --- /dev/null +++ b/libcontainer/apparmor/apparmor_disabled.go @@ -0,0 +1,20 @@ +// +build !apparmor !linux + +package apparmor + +import ( + "errors" +) + +var ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported") + +func IsEnabled() bool { + return false +} + +func ApplyProfile(name string) error { + if name != "" { + return ErrApparmorNotEnabled + } + return nil +} diff --git a/libcontainer/capabilities_linux.go b/libcontainer/capabilities_linux.go new file mode 100644 index 0000000..9daef29 --- /dev/null +++ b/libcontainer/capabilities_linux.go @@ -0,0 +1,117 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/syndtr/gocapability/capability" +) + +const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS + +var capabilityMap map[string]capability.Cap + +func init() { + capabilityMap = make(map[string]capability.Cap) + last := capability.CAP_LAST_CAP + // workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap + if last == capability.Cap(63) { + last = capability.CAP_BLOCK_SUSPEND + } + for _, cap := range capability.List() { + if cap > last { + continue + } + capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String())) + capabilityMap[capKey] = cap + } +} + +func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) { + bounding := []capability.Cap{} + for _, c := range capConfig.Bounding { + v, ok := capabilityMap[c] + if !ok { + return nil, fmt.Errorf("unknown capability %q", c) + } + bounding = append(bounding, v) + } + effective := []capability.Cap{} + for _, c := range capConfig.Effective { + v, ok := capabilityMap[c] + if !ok { + return nil, fmt.Errorf("unknown capability %q", c) + } + effective = append(effective, v) + } + inheritable := []capability.Cap{} + for _, c := range capConfig.Inheritable { + v, ok := capabilityMap[c] + if !ok { + return nil, fmt.Errorf("unknown capability %q", c) + } + inheritable = append(inheritable, v) + } + permitted := []capability.Cap{} + for _, c := range capConfig.Permitted { + v, ok := capabilityMap[c] + if !ok { + return nil, fmt.Errorf("unknown capability %q", c) + } + permitted = append(permitted, v) + } + ambient := []capability.Cap{} + for _, c := range capConfig.Ambient { + v, ok := capabilityMap[c] + if !ok { + return nil, fmt.Errorf("unknown capability %q", c) + } + ambient = append(ambient, v) + } + pid, err := capability.NewPid2(0) + if err != nil { + return nil, err + } + err = pid.Load() + if err != nil { + return nil, err + } + return &containerCapabilities{ + bounding: bounding, + effective: effective, + inheritable: inheritable, + permitted: permitted, + ambient: ambient, + pid: pid, + }, nil +} + +type containerCapabilities struct { + pid capability.Capabilities + bounding []capability.Cap + effective []capability.Cap + inheritable []capability.Cap + permitted []capability.Cap + ambient []capability.Cap +} + +// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist. +func (c *containerCapabilities) ApplyBoundingSet() error { + c.pid.Clear(capability.BOUNDS) + c.pid.Set(capability.BOUNDS, c.bounding...) + return c.pid.Apply(capability.BOUNDS) +} + +// Apply sets all the capabilities for the current process in the config. +func (c *containerCapabilities) ApplyCaps() error { + c.pid.Clear(allCapabilityTypes) + c.pid.Set(capability.BOUNDS, c.bounding...) + c.pid.Set(capability.PERMITTED, c.permitted...) + c.pid.Set(capability.INHERITABLE, c.inheritable...) + c.pid.Set(capability.EFFECTIVE, c.effective...) + c.pid.Set(capability.AMBIENT, c.ambient...) + return c.pid.Apply(allCapabilityTypes) +} diff --git a/libcontainer/cgroups/cgroups.go b/libcontainer/cgroups/cgroups.go new file mode 100644 index 0000000..c0a9659 --- /dev/null +++ b/libcontainer/cgroups/cgroups.go @@ -0,0 +1,74 @@ +// +build linux + +package cgroups + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager interface { + // Applies cgroup configuration to the process with the specified pid + Apply(pid int) error + + // Returns the PIDs inside the cgroup set + GetPids() ([]int, error) + + // Returns the PIDs inside the cgroup set & all sub-cgroups + GetAllPids() ([]int, error) + + // Returns statistics for the cgroup set + GetStats() (*Stats, error) + + // Toggles the freezer cgroup according with specified state + Freeze(state configs.FreezerState) error + + // Destroys the cgroup set + Destroy() error + + // The option func SystemdCgroups() and Cgroupfs() require following attributes: + // Paths map[string]string + // Cgroups *configs.Cgroup + // Paths maps cgroup subsystem to path at which it is mounted. + // Cgroups specifies specific cgroup settings for the various subsystems + + // Returns cgroup paths to save in a state file and to be able to + // restore the object later. + GetPaths() map[string]string + + // GetUnifiedPath returns the unified path when running in unified mode. + // The value corresponds to the all values of GetPaths() map. + // + // GetUnifiedPath returns error when running in hybrid mode as well as + // in legacy mode. + GetUnifiedPath() (string, error) + + // Sets the cgroup as configured. + Set(container *configs.Config) error + + // Gets the cgroup as configured. + GetCgroups() (*configs.Cgroup, error) +} + +type NotFoundError struct { + Subsystem string +} + +func (e *NotFoundError) Error() string { + return fmt.Sprintf("mountpoint for %s not found", e.Subsystem) +} + +func NewNotFoundError(sub string) error { + return &NotFoundError{ + Subsystem: sub, + } +} + +func IsNotFound(err error) bool { + if err == nil { + return false + } + _, ok := err.(*NotFoundError) + return ok +} diff --git a/libcontainer/cgroups/cgroups_test.go b/libcontainer/cgroups/cgroups_test.go new file mode 100644 index 0000000..9efb83e --- /dev/null +++ b/libcontainer/cgroups/cgroups_test.go @@ -0,0 +1,20 @@ +// +build linux + +package cgroups + +import ( + "testing" +) + +func TestParseCgroups(t *testing.T) { + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + t.Fatal(err) + } + if IsCgroup2UnifiedMode() { + return + } + if _, ok := cgroups["cpu"]; !ok { + t.Fail() + } +} diff --git a/libcontainer/cgroups/cgroups_unsupported.go b/libcontainer/cgroups/cgroups_unsupported.go new file mode 100644 index 0000000..278d507 --- /dev/null +++ b/libcontainer/cgroups/cgroups_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux + +package cgroups diff --git a/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go b/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go new file mode 100644 index 0000000..847ce8e --- /dev/null +++ b/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go @@ -0,0 +1,180 @@ +// Package devicefilter containes eBPF device filter program +// +// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c +// +// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano) +// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397 +package devicefilter + +import ( + "fmt" + "math" + + "github.com/cilium/ebpf/asm" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +const ( + // license string format is same as kernel MODULE_LICENSE macro + license = "Apache" +) + +// DeviceFilter returns eBPF device filter program and its license string +func DeviceFilter(devices []*configs.Device) (asm.Instructions, string, error) { + p := &program{} + p.init() + for i := len(devices) - 1; i >= 0; i-- { + if err := p.appendDevice(devices[i]); err != nil { + return nil, "", err + } + } + insts, err := p.finalize() + return insts, license, err +} + +type program struct { + insts asm.Instructions + hasWildCard bool + blockID int +} + +func (p *program) init() { + // struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423 + /* + u32 access_type + u32 major + u32 minor + */ + // R2 <- type (lower 16 bit of u32 access_type at R1[0]) + p.insts = append(p.insts, + asm.LoadMem(asm.R2, asm.R1, 0, asm.Half)) + + // R3 <- access (upper 16 bit of u32 access_type at R1[0]) + p.insts = append(p.insts, + asm.LoadMem(asm.R3, asm.R1, 0, asm.Word), + // RSh: bitwise shift right + asm.RSh.Imm32(asm.R3, 16)) + + // R4 <- major (u32 major at R1[4]) + p.insts = append(p.insts, + asm.LoadMem(asm.R4, asm.R1, 4, asm.Word)) + + // R5 <- minor (u32 minor at R1[8]) + p.insts = append(p.insts, + asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) +} + +// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element. +func (p *program) appendDevice(dev *configs.Device) error { + if p.blockID < 0 { + return errors.New("the program is finalized") + } + if p.hasWildCard { + // All entries after wildcard entry are ignored + return nil + } + + bpfType := int32(-1) + hasType := true + switch dev.Type { + case 'c': + bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) + case 'b': + bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) + case 'a': + hasType = false + default: + // if not specified in OCI json, typ is set to DeviceTypeAll + return errors.Errorf("invalid DeviceType %q", string(dev.Type)) + } + if dev.Major > math.MaxUint32 { + return errors.Errorf("invalid major %d", dev.Major) + } + if dev.Minor > math.MaxUint32 { + return errors.Errorf("invalid minor %d", dev.Major) + } + hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1 + hasMinor := dev.Minor >= 0 + bpfAccess := int32(0) + for _, r := range dev.Permissions { + switch r { + case 'r': + bpfAccess |= unix.BPF_DEVCG_ACC_READ + case 'w': + bpfAccess |= unix.BPF_DEVCG_ACC_WRITE + case 'm': + bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD + default: + return errors.Errorf("unknown device access %v", r) + } + } + // If the access is rwm, skip the check. + hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD) + + blockSym := fmt.Sprintf("block-%d", p.blockID) + nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1) + prevBlockLastIdx := len(p.insts) - 1 + if hasType { + p.insts = append(p.insts, + // if (R2 != bpfType) goto next + asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), + ) + } + if hasAccess { + p.insts = append(p.insts, + // if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next + asm.Mov.Reg32(asm.R1, asm.R3), + asm.And.Imm32(asm.R1, bpfAccess), + asm.JEq.Imm(asm.R1, 0, nextBlockSym), + ) + } + if hasMajor { + p.insts = append(p.insts, + // if (R4 != major) goto next + asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym), + ) + } + if hasMinor { + p.insts = append(p.insts, + // if (R5 != minor) goto next + asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym), + ) + } + if !hasType && !hasAccess && !hasMajor && !hasMinor { + p.hasWildCard = true + } + p.insts = append(p.insts, acceptBlock(dev.Allow)...) + // set blockSym to the first instruction we added in this iteration + p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym) + p.blockID++ + return nil +} + +func (p *program) finalize() (asm.Instructions, error) { + if p.hasWildCard { + // acceptBlock with asm.Return() is already inserted + return p.insts, nil + } + blockSym := fmt.Sprintf("block-%d", p.blockID) + p.insts = append(p.insts, + // R0 <- 0 + asm.Mov.Imm32(asm.R0, 0).Sym(blockSym), + asm.Return(), + ) + p.blockID = -1 + return p.insts, nil +} + +func acceptBlock(accept bool) asm.Instructions { + v := int32(0) + if accept { + v = 1 + } + return []asm.Instruction{ + // R0 <- v + asm.Mov.Imm32(asm.R0, v), + asm.Return(), + } +} diff --git a/libcontainer/cgroups/ebpf/devicefilter/devicefilter_test.go b/libcontainer/cgroups/ebpf/devicefilter/devicefilter_test.go new file mode 100644 index 0000000..59ff4b4 --- /dev/null +++ b/libcontainer/cgroups/ebpf/devicefilter/devicefilter_test.go @@ -0,0 +1,258 @@ +package devicefilter + +import ( + "strings" + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/specconv" +) + +func hash(s, comm string) string { + var res []string + for _, l := range strings.Split(s, "\n") { + trimmed := strings.TrimSpace(l) + if trimmed == "" || strings.HasPrefix(trimmed, comm) { + continue + } + res = append(res, trimmed) + } + return strings.Join(res, "\n") +} + +func testDeviceFilter(t testing.TB, devices []*configs.Device, expectedStr string) { + insts, _, err := DeviceFilter(devices) + if err != nil { + t.Fatalf("%s: %v (devices: %+v)", t.Name(), err, devices) + } + s := insts.String() + t.Logf("%s: devices: %+v\n%s", t.Name(), devices, s) + if expectedStr != "" { + hashed := hash(s, "//") + expectedHashed := hash(expectedStr, "//") + if expectedHashed != hashed { + t.Fatalf("expected:\n%q\ngot\n%q", expectedHashed, hashed) + } + } +} + +func TestDeviceFilter_Nil(t *testing.T) { + expected := ` +// load parameters into registers + 0: LdXMemH dst: r2 src: r1 off: 0 imm: 0 + 1: LdXMemW dst: r3 src: r1 off: 0 imm: 0 + 2: RSh32Imm dst: r3 imm: 16 + 3: LdXMemW dst: r4 src: r1 off: 4 imm: 0 + 4: LdXMemW dst: r5 src: r1 off: 8 imm: 0 +block-0: +// return 0 (reject) + 5: Mov32Imm dst: r0 imm: 0 + 6: Exit + ` + testDeviceFilter(t, nil, expected) +} + +func TestDeviceFilter_BuiltInAllowList(t *testing.T) { + expected := ` +// load parameters into registers + 0: LdXMemH dst: r2 src: r1 off: 0 imm: 0 + 1: LdXMemW dst: r3 src: r1 off: 0 imm: 0 + 2: RSh32Imm dst: r3 imm: 16 + 3: LdXMemW dst: r4 src: r1 off: 4 imm: 0 + 4: LdXMemW dst: r5 src: r1 off: 8 imm: 0 +block-0: +// tuntap (c, 10, 200, rwm, allow) + 5: JNEImm dst: r2 off: -1 imm: 2 + 6: JNEImm dst: r4 off: -1 imm: 10 + 7: JNEImm dst: r5 off: -1 imm: 200 + 8: Mov32Imm dst: r0 imm: 1 + 9: Exit +block-1: + 10: JNEImm dst: r2 off: -1 imm: 2 + 11: JNEImm dst: r4 off: -1 imm: 5 + 12: JNEImm dst: r5 off: -1 imm: 2 + 13: Mov32Imm dst: r0 imm: 1 + 14: Exit +block-2: +// /dev/pts (c, 136, wildcard, rwm, true) + 15: JNEImm dst: r2 off: -1 imm: 2 + 16: JNEImm dst: r4 off: -1 imm: 136 + 17: Mov32Imm dst: r0 imm: 1 + 18: Exit +block-3: + 19: JNEImm dst: r2 off: -1 imm: 2 + 20: JNEImm dst: r4 off: -1 imm: 5 + 21: JNEImm dst: r5 off: -1 imm: 1 + 22: Mov32Imm dst: r0 imm: 1 + 23: Exit +block-4: + 24: JNEImm dst: r2 off: -1 imm: 2 + 25: JNEImm dst: r4 off: -1 imm: 1 + 26: JNEImm dst: r5 off: -1 imm: 9 + 27: Mov32Imm dst: r0 imm: 1 + 28: Exit +block-5: + 29: JNEImm dst: r2 off: -1 imm: 2 + 30: JNEImm dst: r4 off: -1 imm: 1 + 31: JNEImm dst: r5 off: -1 imm: 5 + 32: Mov32Imm dst: r0 imm: 1 + 33: Exit +block-6: + 34: JNEImm dst: r2 off: -1 imm: 2 + 35: JNEImm dst: r4 off: -1 imm: 5 + 36: JNEImm dst: r5 off: -1 imm: 0 + 37: Mov32Imm dst: r0 imm: 1 + 38: Exit +block-7: + 39: JNEImm dst: r2 off: -1 imm: 2 + 40: JNEImm dst: r4 off: -1 imm: 1 + 41: JNEImm dst: r5 off: -1 imm: 7 + 42: Mov32Imm dst: r0 imm: 1 + 43: Exit +block-8: + 44: JNEImm dst: r2 off: -1 imm: 2 + 45: JNEImm dst: r4 off: -1 imm: 1 + 46: JNEImm dst: r5 off: -1 imm: 8 + 47: Mov32Imm dst: r0 imm: 1 + 48: Exit +block-9: + 49: JNEImm dst: r2 off: -1 imm: 2 + 50: JNEImm dst: r4 off: -1 imm: 1 + 51: JNEImm dst: r5 off: -1 imm: 3 + 52: Mov32Imm dst: r0 imm: 1 + 53: Exit +block-10: +// (b, wildcard, wildcard, m, true) + 54: JNEImm dst: r2 off: -1 imm: 1 + 55: Mov32Reg dst: r1 src: r3 + 56: And32Imm dst: r1 imm: 1 + 57: JEqImm dst: r1 off: -1 imm: 0 + 58: Mov32Imm dst: r0 imm: 1 + 59: Exit +block-11: +// (c, wildcard, wildcard, m, true) + 60: JNEImm dst: r2 off: -1 imm: 2 + 61: Mov32Reg dst: r1 src: r3 + 62: And32Imm dst: r1 imm: 1 + 63: JEqImm dst: r1 off: -1 imm: 0 + 64: Mov32Imm dst: r0 imm: 1 + 65: Exit +block-12: + 66: Mov32Imm dst: r0 imm: 0 + 67: Exit +` + testDeviceFilter(t, specconv.AllowedDevices, expected) +} + +func TestDeviceFilter_Privileged(t *testing.T) { + devices := []*configs.Device{ + { + Type: 'a', + Major: -1, + Minor: -1, + Permissions: "rwm", + Allow: true, + }, + } + expected := + ` +// load parameters into registers + 0: LdXMemH dst: r2 src: r1 off: 0 imm: 0 + 1: LdXMemW dst: r3 src: r1 off: 0 imm: 0 + 2: RSh32Imm dst: r3 imm: 16 + 3: LdXMemW dst: r4 src: r1 off: 4 imm: 0 + 4: LdXMemW dst: r5 src: r1 off: 8 imm: 0 +block-0: +// return 1 (accept) + 5: Mov32Imm dst: r0 imm: 1 + 6: Exit + ` + testDeviceFilter(t, devices, expected) +} + +func TestDeviceFilter_PrivilegedExceptSingleDevice(t *testing.T) { + devices := []*configs.Device{ + { + Type: 'a', + Major: -1, + Minor: -1, + Permissions: "rwm", + Allow: true, + }, + { + Type: 'b', + Major: 8, + Minor: 0, + Permissions: "rwm", + Allow: false, + }, + } + expected := ` +// load parameters into registers + 0: LdXMemH dst: r2 src: r1 off: 0 imm: 0 + 1: LdXMemW dst: r3 src: r1 off: 0 imm: 0 + 2: RSh32Imm dst: r3 imm: 16 + 3: LdXMemW dst: r4 src: r1 off: 4 imm: 0 + 4: LdXMemW dst: r5 src: r1 off: 8 imm: 0 +block-0: +// return 0 (reject) if type==b && major == 8 && minor == 0 + 5: JNEImm dst: r2 off: -1 imm: 1 + 6: JNEImm dst: r4 off: -1 imm: 8 + 7: JNEImm dst: r5 off: -1 imm: 0 + 8: Mov32Imm dst: r0 imm: 0 + 9: Exit +block-1: +// return 1 (accept) + 10: Mov32Imm dst: r0 imm: 1 + 11: Exit +` + testDeviceFilter(t, devices, expected) +} + +func TestDeviceFilter_Weird(t *testing.T) { + devices := []*configs.Device{ + { + Type: 'b', + Major: 8, + Minor: 1, + Permissions: "rwm", + Allow: false, + }, + { + Type: 'a', + Major: -1, + Minor: -1, + Permissions: "rwm", + Allow: true, + }, + { + Type: 'b', + Major: 8, + Minor: 2, + Permissions: "rwm", + Allow: false, + }, + } + // 8/1 is allowed, 8/2 is not allowed. + // This conforms to runc v1.0.0-rc.9 (cgroup1) behavior. + expected := ` +// load parameters into registers + 0: LdXMemH dst: r2 src: r1 off: 0 imm: 0 + 1: LdXMemW dst: r3 src: r1 off: 0 imm: 0 + 2: RSh32Imm dst: r3 imm: 16 + 3: LdXMemW dst: r4 src: r1 off: 4 imm: 0 + 4: LdXMemW dst: r5 src: r1 off: 8 imm: 0 +block-0: +// return 0 (reject) if type==b && major == 8 && minor == 2 + 5: JNEImm dst: r2 off: -1 imm: 1 + 6: JNEImm dst: r4 off: -1 imm: 8 + 7: JNEImm dst: r5 off: -1 imm: 2 + 8: Mov32Imm dst: r0 imm: 0 + 9: Exit +block-1: +// return 1 (accept) + 10: Mov32Imm dst: r0 imm: 1 + 11: Exit +` + testDeviceFilter(t, devices, expected) +} diff --git a/libcontainer/cgroups/ebpf/ebpf.go b/libcontainer/cgroups/ebpf/ebpf.go new file mode 100644 index 0000000..4795e0a --- /dev/null +++ b/libcontainer/cgroups/ebpf/ebpf.go @@ -0,0 +1,45 @@ +package ebpf + +import ( + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" + "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/ directory. +// +// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 . +// +// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92 +func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) { + nilCloser := func() error { + return nil + } + // Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167). + // This limit is not inherited into the container. + memlockLimit := &unix.Rlimit{ + Cur: unix.RLIM_INFINITY, + Max: unix.RLIM_INFINITY, + } + _ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit) + spec := &ebpf.ProgramSpec{ + Type: ebpf.CGroupDevice, + Instructions: insts, + License: license, + } + prog, err := ebpf.NewProgram(spec) + if err != nil { + return nilCloser, err + } + if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil { + return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)") + } + closer := func() error { + if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil { + return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)") + } + return nil + } + return closer, nil +} diff --git a/libcontainer/cgroups/fs/apply_raw.go b/libcontainer/cgroups/fs/apply_raw.go new file mode 100644 index 0000000..ec148b4 --- /dev/null +++ b/libcontainer/cgroups/fs/apply_raw.go @@ -0,0 +1,411 @@ +// +build linux + +package fs + +import ( + "fmt" + "io" + "os" + "path/filepath" + "sync" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" + "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +var ( + subsystemsLegacy = subsystemSet{ + &CpusetGroup{}, + &DevicesGroup{}, + &MemoryGroup{}, + &CpuGroup{}, + &CpuacctGroup{}, + &PidsGroup{}, + &BlkioGroup{}, + &HugetlbGroup{}, + &NetClsGroup{}, + &NetPrioGroup{}, + &PerfEventGroup{}, + &FreezerGroup{}, + &NameGroup{GroupName: "name=systemd", Join: true}, + } + HugePageSizes, _ = cgroups.GetHugePageSize() +) + +var errSubsystemDoesNotExist = fmt.Errorf("cgroup: subsystem does not exist") + +type subsystemSet []subsystem + +func (s subsystemSet) Get(name string) (subsystem, error) { + for _, ss := range s { + if ss.Name() == name { + return ss, nil + } + } + return nil, errSubsystemDoesNotExist +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Removes the cgroup represented by 'cgroupData'. + Remove(*cgroupData) error + // Creates and joins the cgroup represented by 'cgroupData'. + Apply(*cgroupData) error + // Set the cgroup represented by cgroup. + Set(path string, cgroup *configs.Cgroup) error +} + +type Manager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Rootless bool // ignore permission-related errors + Paths map[string]string +} + +// The absolute path to the root of the cgroup hierarchies. +var cgroupRootLock sync.Mutex +var cgroupRoot string + +// Gets the cgroupRoot. +func getCgroupRoot() (string, error) { + cgroupRootLock.Lock() + defer cgroupRootLock.Unlock() + + if cgroupRoot != "" { + return cgroupRoot, nil + } + + root, err := cgroups.FindCgroupMountpointDir() + if err != nil { + return "", err + } + + if _, err := os.Stat(root); err != nil { + return "", err + } + + cgroupRoot = root + return cgroupRoot, nil +} + +type cgroupData struct { + root string + innerPath string + config *configs.Cgroup + pid int +} + +// isIgnorableError returns whether err is a permission error (in the loose +// sense of the word). This includes EROFS (which for an unprivileged user is +// basically a permission error) and EACCES (for similar reasons) as well as +// the normal EPERM. +func isIgnorableError(rootless bool, err error) bool { + // We do not ignore errors if we are root. + if !rootless { + return false + } + // Is it an ordinary EPERM? + if os.IsPermission(errors.Cause(err)) { + return true + } + + // Try to handle other errnos. + var errno error + switch err := errors.Cause(err).(type) { + case *os.PathError: + errno = err.Err + case *os.LinkError: + errno = err.Err + case *os.SyscallError: + errno = err.Err + } + return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES +} + +func (m *Manager) getSubsystems() subsystemSet { + return subsystemsLegacy +} + +func (m *Manager) Apply(pid int) (err error) { + if m.Cgroups == nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + + var c = m.Cgroups + + d, err := getCgroupData(m.Cgroups, pid) + if err != nil { + return err + } + + m.Paths = make(map[string]string) + if c.Paths != nil { + for name, path := range c.Paths { + _, err := d.path(name) + if err != nil { + if cgroups.IsNotFound(err) { + continue + } + return err + } + m.Paths[name] = path + } + return cgroups.EnterPid(m.Paths, pid) + } + + for _, sys := range m.getSubsystems() { + // TODO: Apply should, ideally, be reentrant or be broken up into a separate + // create and join phase so that the cgroup hierarchy for a container can be + // created then join consists of writing the process pids to cgroup.procs + p, err := d.path(sys.Name()) + if err != nil { + // The non-presence of the devices subsystem is + // considered fatal for security reasons. + if cgroups.IsNotFound(err) && sys.Name() != "devices" { + continue + } + return err + } + m.Paths[sys.Name()] = p + + if err := sys.Apply(d); err != nil { + // In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't + // been set, we don't bail on error in case of permission problems. + // Cases where limits have been set (and we couldn't create our own + // cgroup) are handled by Set. + if isIgnorableError(m.Rootless, err) && m.Cgroups.Path == "" { + delete(m.Paths, sys.Name()) + continue + } + return err + } + + } + return nil +} + +func (m *Manager) Destroy() error { + if m.Cgroups == nil || m.Cgroups.Paths != nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil +} + +func (m *Manager) GetPaths() map[string]string { + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths +} + +func (m *Manager) GetUnifiedPath() (string, error) { + return "", errors.New("unified path is only supported when running in unified mode") +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for name, path := range m.Paths { + sys, err := m.getSubsystems().Get(name) + if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + return stats, nil +} + +func (m *Manager) Set(container *configs.Config) error { + if container.Cgroups == nil { + return nil + } + + // If Paths are set, then we are just joining cgroups paths + // and there is no need to set any values. + if m.Cgroups != nil && m.Cgroups.Paths != nil { + return nil + } + + paths := m.GetPaths() + for _, sys := range m.getSubsystems() { + path := paths[sys.Name()] + if err := sys.Set(path, container.Cgroups); err != nil { + if m.Rootless && sys.Name() == "devices" { + continue + } + // When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work. + // However, errors from other subsystems are not ignored. + // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" + if path == "" { + // We never created a path for this cgroup, so we cannot set + // limits for it (though we have already tried at this point). + return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name()) + } + return err + } + } + + if m.Paths["cpu"] != "" { + if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil { + return err + } + } + return nil +} + +// Freeze toggles the container's freezer cgroup depending on the state +// provided +func (m *Manager) Freeze(state configs.FreezerState) error { + if m.Cgroups == nil { + return errors.New("cannot toggle freezer: cgroups not configured for container") + } + + paths := m.GetPaths() + dir := paths["freezer"] + prevState := m.Cgroups.Resources.Freezer + m.Cgroups.Resources.Freezer = state + freezer, err := m.getSubsystems().Get("freezer") + if err != nil { + return err + } + err = freezer.Set(dir, m.Cgroups) + if err != nil { + m.Cgroups.Resources.Freezer = prevState + return err + } + return nil +} + +func (m *Manager) GetPids() ([]int, error) { + paths := m.GetPaths() + return cgroups.GetPids(paths["devices"]) +} + +func (m *Manager) GetAllPids() ([]int, error) { + paths := m.GetPaths() + return cgroups.GetAllPids(paths["devices"]) +} + +func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) { + root, err := getCgroupRoot() + if err != nil { + return nil, err + } + + if (c.Name != "" || c.Parent != "") && c.Path != "" { + return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used") + } + + // XXX: Do not remove this code. Path safety is important! -- cyphar + cgPath := libcontainerUtils.CleanPath(c.Path) + cgParent := libcontainerUtils.CleanPath(c.Parent) + cgName := libcontainerUtils.CleanPath(c.Name) + + innerPath := cgPath + if innerPath == "" { + innerPath = filepath.Join(cgParent, cgName) + } + + return &cgroupData{ + root: root, + innerPath: innerPath, + config: c, + pid: pid, + }, nil +} + +func (raw *cgroupData) path(subsystem string) (string, error) { + mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem) + // If we didn't mount the subsystem, there is no point we make the path. + if err != nil { + return "", err + } + + // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. + if filepath.IsAbs(raw.innerPath) { + // Sometimes subsystems can be mounted together as 'cpu,cpuacct'. + return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil + } + + // Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating + // process could in container and shared pid namespace with host, and + // /proc/1/cgroup could point to whole other world of cgroups. + parentPath, err := cgroups.GetOwnCgroupPath(subsystem) + if err != nil { + return "", err + } + + return filepath.Join(parentPath, raw.innerPath), nil +} + +func (raw *cgroupData) join(subsystem string) (string, error) { + path, err := raw.path(subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil { + return "", err + } + return path, nil +} + +func removePath(p string, err error) error { + if err != nil { + return err + } + if p != "" { + return os.RemoveAll(p) + } + return nil +} + +func CheckCpushares(path string, c uint64) error { + var cpuShares uint64 + + if c == 0 { + return nil + } + + fd, err := os.Open(filepath.Join(path, "cpu.shares")) + if err != nil { + return err + } + defer fd.Close() + + _, err = fmt.Fscanf(fd, "%d", &cpuShares) + if err != nil && err != io.EOF { + return err + } + + if c > cpuShares { + return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares) + } else if c < cpuShares { + return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares) + } + + return nil +} + +func (m *Manager) GetCgroups() (*configs.Cgroup, error) { + return m.Cgroups, nil +} diff --git a/libcontainer/cgroups/fs/apply_raw_test.go b/libcontainer/cgroups/fs/apply_raw_test.go new file mode 100644 index 0000000..f3b6556 --- /dev/null +++ b/libcontainer/cgroups/fs/apply_raw_test.go @@ -0,0 +1,297 @@ +// +build linux + +package fs + +import ( + "path/filepath" + "strings" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +func TestInvalidCgroupPath(t *testing.T) { + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + root, err := getCgroupRoot() + if err != nil { + t.Errorf("couldn't get cgroup root: %v", err) + } + + config := &configs.Cgroup{ + Path: "../../../../../../../../../../some/path", + } + + data, err := getCgroupData(config, 0) + if err != nil { + t.Errorf("couldn't get cgroup data: %v", err) + } + + // Make sure the final innerPath doesn't go outside the cgroup mountpoint. + if strings.HasPrefix(data.innerPath, "..") { + t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") + } + + // Double-check, using an actual cgroup. + deviceRoot := filepath.Join(root, "devices") + devicePath, err := data.path("devices") + if err != nil { + t.Errorf("couldn't get cgroup path: %v", err) + } + if !strings.HasPrefix(devicePath, deviceRoot) { + t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") + } +} + +func TestInvalidAbsoluteCgroupPath(t *testing.T) { + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + root, err := getCgroupRoot() + if err != nil { + t.Errorf("couldn't get cgroup root: %v", err) + } + + config := &configs.Cgroup{ + Path: "/../../../../../../../../../../some/path", + } + + data, err := getCgroupData(config, 0) + if err != nil { + t.Errorf("couldn't get cgroup data: %v", err) + } + + // Make sure the final innerPath doesn't go outside the cgroup mountpoint. + if strings.HasPrefix(data.innerPath, "..") { + t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") + } + + // Double-check, using an actual cgroup. + deviceRoot := filepath.Join(root, "devices") + devicePath, err := data.path("devices") + if err != nil { + t.Errorf("couldn't get cgroup path: %v", err) + } + if !strings.HasPrefix(devicePath, deviceRoot) { + t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") + } +} + +// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. +func TestInvalidCgroupParent(t *testing.T) { + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + root, err := getCgroupRoot() + if err != nil { + t.Errorf("couldn't get cgroup root: %v", err) + } + + config := &configs.Cgroup{ + Parent: "../../../../../../../../../../some/path", + Name: "name", + } + + data, err := getCgroupData(config, 0) + if err != nil { + t.Errorf("couldn't get cgroup data: %v", err) + } + + // Make sure the final innerPath doesn't go outside the cgroup mountpoint. + if strings.HasPrefix(data.innerPath, "..") { + t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") + } + + // Double-check, using an actual cgroup. + deviceRoot := filepath.Join(root, "devices") + devicePath, err := data.path("devices") + if err != nil { + t.Errorf("couldn't get cgroup path: %v", err) + } + if !strings.HasPrefix(devicePath, deviceRoot) { + t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") + } +} + +// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. +func TestInvalidAbsoluteCgroupParent(t *testing.T) { + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + root, err := getCgroupRoot() + if err != nil { + t.Errorf("couldn't get cgroup root: %v", err) + } + + config := &configs.Cgroup{ + Parent: "/../../../../../../../../../../some/path", + Name: "name", + } + + data, err := getCgroupData(config, 0) + if err != nil { + t.Errorf("couldn't get cgroup data: %v", err) + } + + // Make sure the final innerPath doesn't go outside the cgroup mountpoint. + if strings.HasPrefix(data.innerPath, "..") { + t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") + } + + // Double-check, using an actual cgroup. + deviceRoot := filepath.Join(root, "devices") + devicePath, err := data.path("devices") + if err != nil { + t.Errorf("couldn't get cgroup path: %v", err) + } + if !strings.HasPrefix(devicePath, deviceRoot) { + t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") + } +} + +// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. +func TestInvalidCgroupName(t *testing.T) { + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + root, err := getCgroupRoot() + if err != nil { + t.Errorf("couldn't get cgroup root: %v", err) + } + + config := &configs.Cgroup{ + Parent: "parent", + Name: "../../../../../../../../../../some/path", + } + + data, err := getCgroupData(config, 0) + if err != nil { + t.Errorf("couldn't get cgroup data: %v", err) + } + + // Make sure the final innerPath doesn't go outside the cgroup mountpoint. + if strings.HasPrefix(data.innerPath, "..") { + t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") + } + + // Double-check, using an actual cgroup. + deviceRoot := filepath.Join(root, "devices") + devicePath, err := data.path("devices") + if err != nil { + t.Errorf("couldn't get cgroup path: %v", err) + } + if !strings.HasPrefix(devicePath, deviceRoot) { + t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") + } + +} + +// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. +func TestInvalidAbsoluteCgroupName(t *testing.T) { + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + root, err := getCgroupRoot() + if err != nil { + t.Errorf("couldn't get cgroup root: %v", err) + } + + config := &configs.Cgroup{ + Parent: "parent", + Name: "/../../../../../../../../../../some/path", + } + + data, err := getCgroupData(config, 0) + if err != nil { + t.Errorf("couldn't get cgroup data: %v", err) + } + + // Make sure the final innerPath doesn't go outside the cgroup mountpoint. + if strings.HasPrefix(data.innerPath, "..") { + t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") + } + + // Double-check, using an actual cgroup. + deviceRoot := filepath.Join(root, "devices") + devicePath, err := data.path("devices") + if err != nil { + t.Errorf("couldn't get cgroup path: %v", err) + } + if !strings.HasPrefix(devicePath, deviceRoot) { + t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") + } +} + +// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. +func TestInvalidCgroupNameAndParent(t *testing.T) { + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + root, err := getCgroupRoot() + if err != nil { + t.Errorf("couldn't get cgroup root: %v", err) + } + + config := &configs.Cgroup{ + Parent: "../../../../../../../../../../some/path", + Name: "../../../../../../../../../../some/path", + } + + data, err := getCgroupData(config, 0) + if err != nil { + t.Errorf("couldn't get cgroup data: %v", err) + } + + // Make sure the final innerPath doesn't go outside the cgroup mountpoint. + if strings.HasPrefix(data.innerPath, "..") { + t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") + } + + // Double-check, using an actual cgroup. + deviceRoot := filepath.Join(root, "devices") + devicePath, err := data.path("devices") + if err != nil { + t.Errorf("couldn't get cgroup path: %v", err) + } + if !strings.HasPrefix(devicePath, deviceRoot) { + t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") + } +} + +// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent. +func TestInvalidAbsoluteCgroupNameAndParent(t *testing.T) { + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + root, err := getCgroupRoot() + if err != nil { + t.Errorf("couldn't get cgroup root: %v", err) + } + + config := &configs.Cgroup{ + Parent: "/../../../../../../../../../../some/path", + Name: "/../../../../../../../../../../some/path", + } + + data, err := getCgroupData(config, 0) + if err != nil { + t.Errorf("couldn't get cgroup data: %v", err) + } + + // Make sure the final innerPath doesn't go outside the cgroup mountpoint. + if strings.HasPrefix(data.innerPath, "..") { + t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!") + } + + // Double-check, using an actual cgroup. + deviceRoot := filepath.Join(root, "devices") + devicePath, err := data.path("devices") + if err != nil { + t.Errorf("couldn't get cgroup path: %v", err) + } + if !strings.HasPrefix(devicePath, deviceRoot) { + t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!") + } +} diff --git a/libcontainer/cgroups/fs/blkio.go b/libcontainer/cgroups/fs/blkio.go new file mode 100644 index 0000000..52c118d --- /dev/null +++ b/libcontainer/cgroups/fs/blkio.go @@ -0,0 +1,238 @@ +// +build linux + +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type BlkioGroup struct { +} + +func (s *BlkioGroup) Name() string { + return "blkio" +} + +func (s *BlkioGroup) Apply(d *cgroupData) error { + _, err := d.join("blkio") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.BlkioWeight != 0 { + if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil { + return err + } + } + + if cgroup.Resources.BlkioLeafWeight != 0 { + if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil { + return err + } + } + for _, wd := range cgroup.Resources.BlkioWeightDevice { + if err := fscommon.WriteFile(path, "blkio.weight_device", wd.WeightString()); err != nil { + return err + } + if err := fscommon.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice { + if err := fscommon.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice { + if err := fscommon.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice { + if err := fscommon.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice { + if err := fscommon.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { + return err + } + } + + return nil +} + +func (s *BlkioGroup) Remove(d *cgroupData) error { + return removePath(d.path("blkio")) +} + +/* +examples: + + blkio.sectors + 8:0 6792 + + blkio.io_service_bytes + 8:0 Read 1282048 + 8:0 Write 2195456 + 8:0 Sync 2195456 + 8:0 Async 1282048 + 8:0 Total 3477504 + Total 3477504 + + blkio.io_serviced + 8:0 Read 124 + 8:0 Write 104 + 8:0 Sync 104 + 8:0 Async 124 + 8:0 Total 228 + Total 228 + + blkio.io_queued + 8:0 Read 0 + 8:0 Write 0 + 8:0 Sync 0 + 8:0 Async 0 + 8:0 Total 0 + Total 0 +*/ + +func splitBlkioStatLine(r rune) bool { + return r == ' ' || r == ':' +} + +func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) { + var blkioStats []cgroups.BlkioStatEntry + f, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return blkioStats, nil + } + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + // format: dev type amount + fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) + if len(fields) < 3 { + if len(fields) == 2 && fields[0] == "Total" { + // skip total line + continue + } else { + return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text()) + } + } + + v, err := strconv.ParseUint(fields[0], 10, 64) + if err != nil { + return nil, err + } + major := v + + v, err = strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return nil, err + } + minor := v + + op := "" + valueField := 2 + if len(fields) == 4 { + op = fields[2] + valueField = 3 + } + v, err = strconv.ParseUint(fields[valueField], 10, 64) + if err != nil { + return nil, err + } + blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v}) + } + + return blkioStats, nil +} + +func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error { + // Try to read CFQ stats available on all CFQ enabled kernels first + if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil { + return getCFQStats(path, stats) + } + return getStats(path, stats) // Use generic stats as fallback +} + +func getCFQStats(path string, stats *cgroups.Stats) error { + var blkioStats []cgroups.BlkioStatEntry + var err error + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil { + return err + } + stats.BlkioStats.SectorsRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServiceBytesRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServicedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil { + return err + } + stats.BlkioStats.IoQueuedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServiceTimeRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoWaitTimeRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil { + return err + } + stats.BlkioStats.IoMergedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoTimeRecursive = blkioStats + + return nil +} + +func getStats(path string, stats *cgroups.Stats) error { + var blkioStats []cgroups.BlkioStatEntry + var err error + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil { + return err + } + stats.BlkioStats.IoServiceBytesRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil { + return err + } + stats.BlkioStats.IoServicedRecursive = blkioStats + + return nil +} diff --git a/libcontainer/cgroups/fs/blkio_test.go b/libcontainer/cgroups/fs/blkio_test.go new file mode 100644 index 0000000..5ba60fa --- /dev/null +++ b/libcontainer/cgroups/fs/blkio_test.go @@ -0,0 +1,637 @@ +// +build linux + +package fs + +import ( + "strconv" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +const ( + sectorsRecursiveContents = `8:0 1024` + serviceBytesRecursiveContents = `8:0 Read 100 +8:0 Write 200 +8:0 Sync 300 +8:0 Async 500 +8:0 Total 500 +Total 500` + servicedRecursiveContents = `8:0 Read 10 +8:0 Write 40 +8:0 Sync 20 +8:0 Async 30 +8:0 Total 50 +Total 50` + queuedRecursiveContents = `8:0 Read 1 +8:0 Write 4 +8:0 Sync 2 +8:0 Async 3 +8:0 Total 5 +Total 5` + serviceTimeRecursiveContents = `8:0 Read 173959 +8:0 Write 0 +8:0 Sync 0 +8:0 Async 173959 +8:0 Total 17395 +Total 17395` + waitTimeRecursiveContents = `8:0 Read 15571 +8:0 Write 0 +8:0 Sync 0 +8:0 Async 15571 +8:0 Total 15571` + mergedRecursiveContents = `8:0 Read 5 +8:0 Write 10 +8:0 Sync 0 +8:0 Async 0 +8:0 Total 15 +Total 15` + timeRecursiveContents = `8:0 8` + throttleServiceBytes = `8:0 Read 11030528 +8:0 Write 23 +8:0 Sync 42 +8:0 Async 11030528 +8:0 Total 11030528 +252:0 Read 11030528 +252:0 Write 23 +252:0 Sync 42 +252:0 Async 11030528 +252:0 Total 11030528 +Total 22061056` + throttleServiced = `8:0 Read 164 +8:0 Write 23 +8:0 Sync 42 +8:0 Async 164 +8:0 Total 164 +252:0 Read 164 +252:0 Write 23 +252:0 Sync 42 +252:0 Async 164 +252:0 Total 164 +Total 328` +) + +func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) { + *blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op}) +} + +func TestBlkioSetWeight(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + const ( + weightBefore = 100 + weightAfter = 200 + ) + + helper.writeFileContents(map[string]string{ + "blkio.weight": strconv.Itoa(weightBefore), + }) + + helper.CgroupData.config.Resources.BlkioWeight = weightAfter + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "blkio.weight") + if err != nil { + t.Fatalf("Failed to parse blkio.weight - %s", err) + } + + if value != weightAfter { + t.Fatal("Got the wrong value, set blkio.weight failed.") + } +} + +func TestBlkioSetWeightDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + const ( + weightDeviceBefore = "8:0 400" + ) + + wd := configs.NewWeightDevice(8, 0, 500, 0) + weightDeviceAfter := wd.WeightString() + + helper.writeFileContents(map[string]string{ + "blkio.weight_device": weightDeviceBefore, + }) + + helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd} + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.weight_device") + if err != nil { + t.Fatalf("Failed to parse blkio.weight_device - %s", err) + } + + if value != weightDeviceAfter { + t.Fatal("Got the wrong value, set blkio.weight_device failed.") + } +} + +// regression #274 +func TestBlkioSetMultipleWeightDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + const ( + weightDeviceBefore = "8:0 400" + ) + + wd1 := configs.NewWeightDevice(8, 0, 500, 0) + wd2 := configs.NewWeightDevice(8, 16, 500, 0) + // we cannot actually set and check both because normal ioutil.WriteFile + // when writing to cgroup file will overwrite the whole file content instead + // of updating it as the kernel is doing. Just check the second device + // is present will suffice for the test to ensure multiple writes are done. + weightDeviceAfter := wd2.WeightString() + + helper.writeFileContents(map[string]string{ + "blkio.weight_device": weightDeviceBefore, + }) + + helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd1, wd2} + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.weight_device") + if err != nil { + t.Fatalf("Failed to parse blkio.weight_device - %s", err) + } + + if value != weightDeviceAfter { + t.Fatal("Got the wrong value, set blkio.weight_device failed.") + } +} + +func TestBlkioStats(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } + + // Verify expected stats. + expectedStats := cgroups.BlkioStats{} + appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "") + + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total") + + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total") + + appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read") + appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write") + appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync") + appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async") + appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total") + + appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Read") + appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Write") + appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Sync") + appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Async") + appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 17395, "Total") + + appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Read") + appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Write") + appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Sync") + appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Async") + appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Total") + + appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 5, "Read") + appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 10, "Write") + appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Sync") + appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Async") + appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 15, "Total") + + appendBlkioStatEntry(&expectedStats.IoTimeRecursive, 8, 0, 8, "") + + expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats) +} + +func TestBlkioStatsNoSectorsFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoServiceBytesFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoServicedFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoQueuedFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoServiceTimeFile(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoWaitTimeFile(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoMergedFile(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoTimeFile(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": "8:0 Read 100 100", + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected to fail, but did not") + } +} + +func TestBlkioStatsUnexpectedFieldType(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": "8:0 Read Write", + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected to fail, but did not") + } +} + +func TestNonCFQBlkioStats(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": "", + "blkio.io_serviced_recursive": "", + "blkio.io_queued_recursive": "", + "blkio.sectors_recursive": "", + "blkio.io_service_time_recursive": "", + "blkio.io_wait_time_recursive": "", + "blkio.io_merged_recursive": "", + "blkio.time_recursive": "", + "blkio.throttle.io_service_bytes": throttleServiceBytes, + "blkio.throttle.io_serviced": throttleServiced, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } + + // Verify expected stats. + expectedStats := cgroups.BlkioStats{} + + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Read") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 23, "Write") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 42, "Sync") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Async") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Total") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Read") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 23, "Write") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 42, "Sync") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Async") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Total") + + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Read") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 23, "Write") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 42, "Sync") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Async") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Total") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Read") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 23, "Write") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 42, "Sync") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Async") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Total") + + expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats) +} + +func TestBlkioSetThrottleReadBpsDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + const ( + throttleBefore = `8:0 1024` + ) + + td := configs.NewThrottleDevice(8, 0, 2048) + throttleAfter := td.String() + + helper.writeFileContents(map[string]string{ + "blkio.throttle.read_bps_device": throttleBefore, + }) + + helper.CgroupData.config.Resources.BlkioThrottleReadBpsDevice = []*configs.ThrottleDevice{td} + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.throttle.read_bps_device") + if err != nil { + t.Fatalf("Failed to parse blkio.throttle.read_bps_device - %s", err) + } + + if value != throttleAfter { + t.Fatal("Got the wrong value, set blkio.throttle.read_bps_device failed.") + } +} +func TestBlkioSetThrottleWriteBpsDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + const ( + throttleBefore = `8:0 1024` + ) + + td := configs.NewThrottleDevice(8, 0, 2048) + throttleAfter := td.String() + + helper.writeFileContents(map[string]string{ + "blkio.throttle.write_bps_device": throttleBefore, + }) + + helper.CgroupData.config.Resources.BlkioThrottleWriteBpsDevice = []*configs.ThrottleDevice{td} + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.throttle.write_bps_device") + if err != nil { + t.Fatalf("Failed to parse blkio.throttle.write_bps_device - %s", err) + } + + if value != throttleAfter { + t.Fatal("Got the wrong value, set blkio.throttle.write_bps_device failed.") + } +} +func TestBlkioSetThrottleReadIOpsDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + const ( + throttleBefore = `8:0 1024` + ) + + td := configs.NewThrottleDevice(8, 0, 2048) + throttleAfter := td.String() + + helper.writeFileContents(map[string]string{ + "blkio.throttle.read_iops_device": throttleBefore, + }) + + helper.CgroupData.config.Resources.BlkioThrottleReadIOPSDevice = []*configs.ThrottleDevice{td} + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.throttle.read_iops_device") + if err != nil { + t.Fatalf("Failed to parse blkio.throttle.read_iops_device - %s", err) + } + + if value != throttleAfter { + t.Fatal("Got the wrong value, set blkio.throttle.read_iops_device failed.") + } +} +func TestBlkioSetThrottleWriteIOpsDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + const ( + throttleBefore = `8:0 1024` + ) + + td := configs.NewThrottleDevice(8, 0, 2048) + throttleAfter := td.String() + + helper.writeFileContents(map[string]string{ + "blkio.throttle.write_iops_device": throttleBefore, + }) + + helper.CgroupData.config.Resources.BlkioThrottleWriteIOPSDevice = []*configs.ThrottleDevice{td} + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.throttle.write_iops_device") + if err != nil { + t.Fatalf("Failed to parse blkio.throttle.write_iops_device - %s", err) + } + + if value != throttleAfter { + t.Fatal("Got the wrong value, set blkio.throttle.write_iops_device failed.") + } +} diff --git a/libcontainer/cgroups/fs/cpu.go b/libcontainer/cgroups/fs/cpu.go new file mode 100644 index 0000000..4db7b64 --- /dev/null +++ b/libcontainer/cgroups/fs/cpu.go @@ -0,0 +1,118 @@ +// +build linux + +package fs + +import ( + "bufio" + "os" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type CpuGroup struct { +} + +func (s *CpuGroup) Name() string { + return "cpu" +} + +func (s *CpuGroup) Apply(d *cgroupData) error { + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + path, err := d.path("cpu") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return s.ApplyDir(path, d.config, d.pid) +} + +func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error { + // This might happen if we have no cpu cgroup mounted. + // Just do nothing and don't fail. + if path == "" { + return nil + } + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + // We should set the real-Time group scheduling settings before moving + // in the process because if the process is already in SCHED_RR mode + // and no RT bandwidth is set, adding it will fail. + if err := s.SetRtSched(path, cgroup); err != nil { + return err + } + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + return cgroups.WriteCgroupProc(path, pid) +} + +func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpuRtPeriod != 0 { + if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuRtRuntime != 0 { + if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil { + return err + } + } + return nil +} + +func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpuShares != 0 { + if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuPeriod != 0 { + if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil { + return err + } + } + if cgroup.Resources.CpuQuota != 0 { + if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil { + return err + } + } + return s.SetRtSched(path, cgroup) +} + +func (s *CpuGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpu")) +} + +func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error { + f, err := os.Open(filepath.Join(path, "cpu.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text()) + if err != nil { + return err + } + switch t { + case "nr_periods": + stats.CpuStats.ThrottlingData.Periods = v + + case "nr_throttled": + stats.CpuStats.ThrottlingData.ThrottledPeriods = v + + case "throttled_time": + stats.CpuStats.ThrottlingData.ThrottledTime = v + } + } + return nil +} diff --git a/libcontainer/cgroups/fs/cpu_test.go b/libcontainer/cgroups/fs/cpu_test.go new file mode 100644 index 0000000..2eeb489 --- /dev/null +++ b/libcontainer/cgroups/fs/cpu_test.go @@ -0,0 +1,210 @@ +// +build linux + +package fs + +import ( + "fmt" + "strconv" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" +) + +func TestCpuSetShares(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + + const ( + sharesBefore = 1024 + sharesAfter = 512 + ) + + helper.writeFileContents(map[string]string{ + "cpu.shares": strconv.Itoa(sharesBefore), + }) + + helper.CgroupData.config.Resources.CpuShares = sharesAfter + cpu := &CpuGroup{} + if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.shares") + if err != nil { + t.Fatalf("Failed to parse cpu.shares - %s", err) + } + + if value != sharesAfter { + t.Fatal("Got the wrong value, set cpu.shares failed.") + } +} + +func TestCpuSetBandWidth(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + + const ( + quotaBefore = 8000 + quotaAfter = 5000 + periodBefore = 10000 + periodAfter = 7000 + rtRuntimeBefore = 8000 + rtRuntimeAfter = 5000 + rtPeriodBefore = 10000 + rtPeriodAfter = 7000 + ) + + helper.writeFileContents(map[string]string{ + "cpu.cfs_quota_us": strconv.Itoa(quotaBefore), + "cpu.cfs_period_us": strconv.Itoa(periodBefore), + "cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore), + "cpu.rt_period_us": strconv.Itoa(rtPeriodBefore), + }) + + helper.CgroupData.config.Resources.CpuQuota = quotaAfter + helper.CgroupData.config.Resources.CpuPeriod = periodAfter + helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter + helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter + cpu := &CpuGroup{} + if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + quota, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us") + if err != nil { + t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err) + } + if quota != quotaAfter { + t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.") + } + + period, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us") + if err != nil { + t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err) + } + if period != periodAfter { + t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.") + } + rtRuntime, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us") + if err != nil { + t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err) + } + if rtRuntime != rtRuntimeAfter { + t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.") + } + rtPeriod, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us") + if err != nil { + t.Fatalf("Failed to parse cpu.rt_period_us - %s", err) + } + if rtPeriod != rtPeriodAfter { + t.Fatal("Got the wrong value, set cpu.rt_period_us failed.") + } +} + +func TestCpuStats(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + + const ( + nrPeriods = 2000 + nrThrottled = 200 + throttledTime = uint64(18446744073709551615) + ) + + cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n", + nrPeriods, nrThrottled, throttledTime) + helper.writeFileContents(map[string]string{ + "cpu.stat": cpuStatContent, + }) + + cpu := &CpuGroup{} + actualStats := *cgroups.NewStats() + err := cpu.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } + + expectedStats := cgroups.ThrottlingData{ + Periods: nrPeriods, + ThrottledPeriods: nrThrottled, + ThrottledTime: throttledTime} + + expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData) +} + +func TestNoCpuStatFile(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + + cpu := &CpuGroup{} + actualStats := *cgroups.NewStats() + err := cpu.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal("Expected not to fail, but did") + } +} + +func TestInvalidCpuStat(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + cpuStatContent := `nr_periods 2000 + nr_throttled 200 + throttled_time fortytwo` + helper.writeFileContents(map[string]string{ + "cpu.stat": cpuStatContent, + }) + + cpu := &CpuGroup{} + actualStats := *cgroups.NewStats() + err := cpu.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failed stat parsing.") + } +} + +func TestCpuSetRtSchedAtApply(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + + const ( + rtRuntimeBefore = 0 + rtRuntimeAfter = 5000 + rtPeriodBefore = 0 + rtPeriodAfter = 7000 + ) + + helper.writeFileContents(map[string]string{ + "cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore), + "cpu.rt_period_us": strconv.Itoa(rtPeriodBefore), + }) + + helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter + helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter + cpu := &CpuGroup{} + if err := cpu.ApplyDir(helper.CgroupPath, helper.CgroupData.config, 1234); err != nil { + t.Fatal(err) + } + + rtRuntime, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us") + if err != nil { + t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err) + } + if rtRuntime != rtRuntimeAfter { + t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.") + } + rtPeriod, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us") + if err != nil { + t.Fatalf("Failed to parse cpu.rt_period_us - %s", err) + } + if rtPeriod != rtPeriodAfter { + t.Fatal("Got the wrong value, set cpu.rt_period_us failed.") + } + pid, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cgroup.procs") + if err != nil { + t.Fatalf("Failed to parse cgroup.procs - %s", err) + } + if pid != 1234 { + t.Fatal("Got the wrong value, set cgroup.procs failed.") + } +} diff --git a/libcontainer/cgroups/fs/cpuacct.go b/libcontainer/cgroups/fs/cpuacct.go new file mode 100644 index 0000000..95dc9a1 --- /dev/null +++ b/libcontainer/cgroups/fs/cpuacct.go @@ -0,0 +1,122 @@ +// +build linux + +package fs + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" +) + +const ( + cgroupCpuacctStat = "cpuacct.stat" + nanosecondsInSecond = 1000000000 +) + +var clockTicks = uint64(system.GetClockTicks()) + +type CpuacctGroup struct { +} + +func (s *CpuacctGroup) Name() string { + return "cpuacct" +} + +func (s *CpuacctGroup) Apply(d *cgroupData) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) { + return err + } + + return nil +} + +func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *CpuacctGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpuacct")) +} + +func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { + userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path) + if err != nil { + return err + } + + totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage") + if err != nil { + return err + } + + percpuUsage, err := getPercpuUsage(path) + if err != nil { + return err + } + + stats.CpuStats.CpuUsage.TotalUsage = totalUsage + stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage + stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage + stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage + return nil +} + +// Returns user and kernel usage breakdown in nanoseconds. +func getCpuUsageBreakdown(path string) (uint64, uint64, error) { + userModeUsage := uint64(0) + kernelModeUsage := uint64(0) + const ( + userField = "user" + systemField = "system" + ) + + // Expected format: + // user + // system + data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat)) + if err != nil { + return 0, 0, err + } + fields := strings.Fields(string(data)) + if len(fields) < 4 { + return 0, 0, fmt.Errorf("failure - %s is expected to have at least 4 fields", filepath.Join(path, cgroupCpuacctStat)) + } + if fields[0] != userField { + return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField) + } + if fields[2] != systemField { + return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField) + } + if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil { + return 0, 0, err + } + if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil { + return 0, 0, err + } + + return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil +} + +func getPercpuUsage(path string) ([]uint64, error) { + percpuUsage := []uint64{} + data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu")) + if err != nil { + return percpuUsage, err + } + for _, value := range strings.Fields(string(data)) { + value, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err) + } + percpuUsage = append(percpuUsage, value) + } + return percpuUsage, nil +} diff --git a/libcontainer/cgroups/fs/cpuset.go b/libcontainer/cgroups/fs/cpuset.go new file mode 100644 index 0000000..bfc900e --- /dev/null +++ b/libcontainer/cgroups/fs/cpuset.go @@ -0,0 +1,160 @@ +// +build linux + +package fs + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" +) + +type CpusetGroup struct { +} + +func (s *CpusetGroup) Name() string { + return "cpuset" +} + +func (s *CpusetGroup) Apply(d *cgroupData) error { + dir, err := d.path("cpuset") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return s.ApplyDir(dir, d.config, d.pid) +} + +func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpusetCpus != "" { + if err := fscommon.WriteFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil { + return err + } + } + if cgroup.Resources.CpusetMems != "" { + if err := fscommon.WriteFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroup) Remove(d *cgroupData) error { + return removePath(d.path("cpuset")) +} + +func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} + +func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error { + // This might happen if we have no cpuset cgroup mounted. + // Just do nothing and don't fail. + if dir == "" { + return nil + } + mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo") + if err != nil { + return err + } + root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo))) + // 'ensureParent' start with parent because we don't want to + // explicitly inherit from parent, it could conflict with + // 'cpuset.cpu_exclusive'. + if err := s.ensureParent(filepath.Dir(dir), root); err != nil { + return err + } + if err := os.MkdirAll(dir, 0755); err != nil { + return err + } + // We didn't inherit cpuset configs from parent, but we have + // to ensure cpuset configs are set before moving task into the + // cgroup. + // The logic is, if user specified cpuset configs, use these + // specified configs, otherwise, inherit from parent. This makes + // cpuset configs work correctly with 'cpuset.cpu_exclusive', and + // keep backward compatibility. + if err := s.ensureCpusAndMems(dir, cgroup); err != nil { + return err + } + + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + return cgroups.WriteCgroupProc(dir, pid) +} + +func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { + if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil { + return + } + if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil { + return + } + return cpus, mems, nil +} + +// ensureParent makes sure that the parent directory of current is created +// and populated with the proper cpus and mems files copied from +// it's parent. +func (s *CpusetGroup) ensureParent(current, root string) error { + parent := filepath.Dir(current) + if libcontainerUtils.CleanPath(parent) == root { + return nil + } + // Avoid infinite recursion. + if parent == current { + return fmt.Errorf("cpuset: cgroup parent path outside cgroup root") + } + if err := s.ensureParent(parent, root); err != nil { + return err + } + if err := os.MkdirAll(current, 0755); err != nil { + return err + } + return s.copyIfNeeded(current, parent) +} + +// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent +// directory to the current directory if the file's contents are 0 +func (s *CpusetGroup) copyIfNeeded(current, parent string) error { + var ( + err error + currentCpus, currentMems []byte + parentCpus, parentMems []byte + ) + + if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil { + return err + } + if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil { + return err + } + + if s.isEmpty(currentCpus) { + if err := fscommon.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil { + return err + } + } + if s.isEmpty(currentMems) { + if err := fscommon.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroup) isEmpty(b []byte) bool { + return len(bytes.Trim(b, "\n")) == 0 +} + +func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error { + if err := s.Set(path, cgroup); err != nil { + return err + } + return s.copyIfNeeded(path, filepath.Dir(path)) +} diff --git a/libcontainer/cgroups/fs/cpuset_test.go b/libcontainer/cgroups/fs/cpuset_test.go new file mode 100644 index 0000000..927e631 --- /dev/null +++ b/libcontainer/cgroups/fs/cpuset_test.go @@ -0,0 +1,67 @@ +// +build linux + +package fs + +import ( + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" +) + +func TestCpusetSetCpus(t *testing.T) { + helper := NewCgroupTestUtil("cpuset", t) + defer helper.cleanup() + + const ( + cpusBefore = "0" + cpusAfter = "1-3" + ) + + helper.writeFileContents(map[string]string{ + "cpuset.cpus": cpusBefore, + }) + + helper.CgroupData.config.Resources.CpusetCpus = cpusAfter + cpuset := &CpusetGroup{} + if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "cpuset.cpus") + if err != nil { + t.Fatalf("Failed to parse cpuset.cpus - %s", err) + } + + if value != cpusAfter { + t.Fatal("Got the wrong value, set cpuset.cpus failed.") + } +} + +func TestCpusetSetMems(t *testing.T) { + helper := NewCgroupTestUtil("cpuset", t) + defer helper.cleanup() + + const ( + memsBefore = "0" + memsAfter = "1" + ) + + helper.writeFileContents(map[string]string{ + "cpuset.mems": memsBefore, + }) + + helper.CgroupData.config.Resources.CpusetMems = memsAfter + cpuset := &CpusetGroup{} + if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "cpuset.mems") + if err != nil { + t.Fatalf("Failed to parse cpuset.mems - %s", err) + } + + if value != memsAfter { + t.Fatal("Got the wrong value, set cpuset.mems failed.") + } +} diff --git a/libcontainer/cgroups/fs/devices.go b/libcontainer/cgroups/fs/devices.go new file mode 100644 index 0000000..036c8db --- /dev/null +++ b/libcontainer/cgroups/fs/devices.go @@ -0,0 +1,81 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" +) + +type DevicesGroup struct { +} + +func (s *DevicesGroup) Name() string { + return "devices" +} + +func (s *DevicesGroup) Apply(d *cgroupData) error { + _, err := d.join("devices") + if err != nil { + // We will return error even it's `not found` error, devices + // cgroup is hard requirement for container's security. + return err + } + return nil +} + +func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error { + if system.RunningInUserNS() { + return nil + } + + devices := cgroup.Resources.Devices + if len(devices) > 0 { + for _, dev := range devices { + file := "devices.deny" + if dev.Allow { + file = "devices.allow" + } + if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil { + return err + } + } + return nil + } + if cgroup.Resources.AllowAllDevices != nil { + if *cgroup.Resources.AllowAllDevices == false { + if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil { + return err + } + + for _, dev := range cgroup.Resources.AllowedDevices { + if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil { + return err + } + } + return nil + } + + if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil { + return err + } + } + + for _, dev := range cgroup.Resources.DeniedDevices { + if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil { + return err + } + } + + return nil +} + +func (s *DevicesGroup) Remove(d *cgroupData) error { + return removePath(d.path("devices")) +} + +func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/libcontainer/cgroups/fs/devices_test.go b/libcontainer/cgroups/fs/devices_test.go new file mode 100644 index 0000000..648f4a2 --- /dev/null +++ b/libcontainer/cgroups/fs/devices_test.go @@ -0,0 +1,99 @@ +// +build linux + +package fs + +import ( + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +var ( + allowedDevices = []*configs.Device{ + { + Path: "/dev/zero", + Type: 'c', + Major: 1, + Minor: 5, + Permissions: "rwm", + FileMode: 0666, + }, + } + allowedList = "c 1:5 rwm" + deniedDevices = []*configs.Device{ + { + Path: "/dev/null", + Type: 'c', + Major: 1, + Minor: 3, + Permissions: "rwm", + FileMode: 0666, + }, + } + deniedList = "c 1:3 rwm" +) + +func TestDevicesSetAllow(t *testing.T) { + helper := NewCgroupTestUtil("devices", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "devices.deny": "a", + }) + allowAllDevices := false + helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices + helper.CgroupData.config.Resources.AllowedDevices = allowedDevices + devices := &DevicesGroup{} + if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow") + if err != nil { + t.Fatalf("Failed to parse devices.allow - %s", err) + } + + if value != allowedList { + t.Fatal("Got the wrong value, set devices.allow failed.") + } + + // When AllowAllDevices is nil, devices.allow file should not be modified. + helper.CgroupData.config.Resources.AllowAllDevices = nil + if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + value, err = fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow") + if err != nil { + t.Fatalf("Failed to parse devices.allow - %s", err) + } + if value != allowedList { + t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.") + } +} + +func TestDevicesSetDeny(t *testing.T) { + helper := NewCgroupTestUtil("devices", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "devices.allow": "a", + }) + + allowAllDevices := true + helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices + helper.CgroupData.config.Resources.DeniedDevices = deniedDevices + devices := &DevicesGroup{} + if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.deny") + if err != nil { + t.Fatalf("Failed to parse devices.deny - %s", err) + } + + if value != deniedList { + t.Fatal("Got the wrong value, set devices.deny failed.") + } +} diff --git a/libcontainer/cgroups/fs/freezer.go b/libcontainer/cgroups/fs/freezer.go new file mode 100644 index 0000000..9dc81bd --- /dev/null +++ b/libcontainer/cgroups/fs/freezer.go @@ -0,0 +1,67 @@ +// +build linux + +package fs + +import ( + "fmt" + "strings" + "time" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type FreezerGroup struct { +} + +func (s *FreezerGroup) Name() string { + return "freezer" +} + +func (s *FreezerGroup) Apply(d *cgroupData) error { + _, err := d.join("freezer") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { + switch cgroup.Resources.Freezer { + case configs.Frozen, configs.Thawed: + for { + // In case this loop does not exit because it doesn't get the expected + // state, let's write again this state, hoping it's going to be properly + // set this time. Otherwise, this loop could run infinitely, waiting for + // a state change that would never happen. + if err := fscommon.WriteFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil { + return err + } + + state, err := fscommon.ReadFile(path, "freezer.state") + if err != nil { + return err + } + if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) { + break + } + + time.Sleep(1 * time.Millisecond) + } + case configs.Undefined: + return nil + default: + return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer)) + } + + return nil +} + +func (s *FreezerGroup) Remove(d *cgroupData) error { + return removePath(d.path("freezer")) +} + +func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/libcontainer/cgroups/fs/freezer_test.go b/libcontainer/cgroups/fs/freezer_test.go new file mode 100644 index 0000000..ad80261 --- /dev/null +++ b/libcontainer/cgroups/fs/freezer_test.go @@ -0,0 +1,48 @@ +// +build linux + +package fs + +import ( + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +func TestFreezerSetState(t *testing.T) { + helper := NewCgroupTestUtil("freezer", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "freezer.state": string(configs.Frozen), + }) + + helper.CgroupData.config.Resources.Freezer = configs.Thawed + freezer := &FreezerGroup{} + if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "freezer.state") + if err != nil { + t.Fatalf("Failed to parse freezer.state - %s", err) + } + if value != string(configs.Thawed) { + t.Fatal("Got the wrong value, set freezer.state failed.") + } +} + +func TestFreezerSetInvalidState(t *testing.T) { + helper := NewCgroupTestUtil("freezer", t) + defer helper.cleanup() + + const ( + invalidArg configs.FreezerState = "Invalid" + ) + + helper.CgroupData.config.Resources.Freezer = invalidArg + freezer := &FreezerGroup{} + if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err == nil { + t.Fatal("Failed to return invalid argument error") + } +} diff --git a/libcontainer/cgroups/fs/fs_unsupported.go b/libcontainer/cgroups/fs/fs_unsupported.go new file mode 100644 index 0000000..3ef9e03 --- /dev/null +++ b/libcontainer/cgroups/fs/fs_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux + +package fs diff --git a/libcontainer/cgroups/fs/hugetlb.go b/libcontainer/cgroups/fs/hugetlb.go new file mode 100644 index 0000000..68719c2 --- /dev/null +++ b/libcontainer/cgroups/fs/hugetlb.go @@ -0,0 +1,72 @@ +// +build linux + +package fs + +import ( + "fmt" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type HugetlbGroup struct { +} + +func (s *HugetlbGroup) Name() string { + return "hugetlb" +} + +func (s *HugetlbGroup) Apply(d *cgroupData) error { + _, err := d.join("hugetlb") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, hugetlb := range cgroup.Resources.HugetlbLimit { + if err := fscommon.WriteFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil { + return err + } + } + + return nil +} + +func (s *HugetlbGroup) Remove(d *cgroupData) error { + return removePath(d.path("hugetlb")) +} + +func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error { + hugetlbStats := cgroups.HugetlbStats{} + for _, pageSize := range HugePageSizes { + usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".") + value, err := fscommon.GetCgroupParamUint(path, usage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", usage, err) + } + hugetlbStats.Usage = value + + maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".") + value, err = fscommon.GetCgroupParamUint(path, maxUsage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + hugetlbStats.MaxUsage = value + + failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".") + value, err = fscommon.GetCgroupParamUint(path, failcnt) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + hugetlbStats.Failcnt = value + + stats.HugetlbStats[pageSize] = hugetlbStats + } + + return nil +} diff --git a/libcontainer/cgroups/fs/hugetlb_test.go b/libcontainer/cgroups/fs/hugetlb_test.go new file mode 100644 index 0000000..9ddacfe --- /dev/null +++ b/libcontainer/cgroups/fs/hugetlb_test.go @@ -0,0 +1,155 @@ +// +build linux + +package fs + +import ( + "fmt" + "strconv" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +const ( + hugetlbUsageContents = "128\n" + hugetlbMaxUsageContents = "256\n" + hugetlbFailcnt = "100\n" +) + +var ( + usage = "hugetlb.%s.usage_in_bytes" + limit = "hugetlb.%s.limit_in_bytes" + maxUsage = "hugetlb.%s.max_usage_in_bytes" + failcnt = "hugetlb.%s.failcnt" +) + +func TestHugetlbSetHugetlb(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + + const ( + hugetlbBefore = 256 + hugetlbAfter = 512 + ) + + for _, pageSize := range HugePageSizes { + helper.writeFileContents(map[string]string{ + fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore), + }) + } + + for _, pageSize := range HugePageSizes { + helper.CgroupData.config.Resources.HugetlbLimit = []*configs.HugepageLimit{ + { + Pagesize: pageSize, + Limit: hugetlbAfter, + }, + } + hugetlb := &HugetlbGroup{} + if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + } + + for _, pageSize := range HugePageSizes { + limit := fmt.Sprintf(limit, pageSize) + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, limit) + if err != nil { + t.Fatalf("Failed to parse %s - %s", limit, err) + } + if value != hugetlbAfter { + t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value) + } + } +} + +func TestHugetlbStats(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + for _, pageSize := range HugePageSizes { + helper.writeFileContents(map[string]string{ + fmt.Sprintf(usage, pageSize): hugetlbUsageContents, + fmt.Sprintf(maxUsage, pageSize): hugetlbMaxUsageContents, + fmt.Sprintf(failcnt, pageSize): hugetlbFailcnt, + }) + } + + hugetlb := &HugetlbGroup{} + actualStats := *cgroups.NewStats() + err := hugetlb.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } + expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100} + for _, pageSize := range HugePageSizes { + expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize]) + } +} + +func TestHugetlbStatsNoUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + maxUsage: hugetlbMaxUsageContents, + }) + + hugetlb := &HugetlbGroup{} + actualStats := *cgroups.NewStats() + err := hugetlb.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestHugetlbStatsNoMaxUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + for _, pageSize := range HugePageSizes { + helper.writeFileContents(map[string]string{ + fmt.Sprintf(usage, pageSize): hugetlbUsageContents, + }) + } + + hugetlb := &HugetlbGroup{} + actualStats := *cgroups.NewStats() + err := hugetlb.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestHugetlbStatsBadUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + for _, pageSize := range HugePageSizes { + helper.writeFileContents(map[string]string{ + fmt.Sprintf(usage, pageSize): "bad", + maxUsage: hugetlbMaxUsageContents, + }) + } + + hugetlb := &HugetlbGroup{} + actualStats := *cgroups.NewStats() + err := hugetlb.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestHugetlbStatsBadMaxUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + usage: hugetlbUsageContents, + maxUsage: "bad", + }) + + hugetlb := &HugetlbGroup{} + actualStats := *cgroups.NewStats() + err := hugetlb.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} diff --git a/libcontainer/cgroups/fs/kmem.go b/libcontainer/cgroups/fs/kmem.go new file mode 100644 index 0000000..69b5a19 --- /dev/null +++ b/libcontainer/cgroups/fs/kmem.go @@ -0,0 +1,62 @@ +// +build linux,!nokmem + +package fs + +import ( + "errors" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "syscall" // for Errno type only + + "github.com/opencontainers/runc/libcontainer/cgroups" + "golang.org/x/sys/unix" +) + +const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes" + +func EnableKernelMemoryAccounting(path string) error { + // Ensure that kernel memory is available in this kernel build. If it + // isn't, we just ignore it because EnableKernelMemoryAccounting is + // automatically called for all memory limits. + if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) { + return nil + } + // We have to limit the kernel memory here as it won't be accounted at all + // until a limit is set on the cgroup and limit cannot be set once the + // cgroup has children, or if there are already tasks in the cgroup. + for _, i := range []int64{1, -1} { + if err := setKernelMemory(path, i); err != nil { + return err + } + } + return nil +} + +func setKernelMemory(path string, kernelMemoryLimit int64) error { + if path == "" { + return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit) + } + if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) { + // We have specifically been asked to set a kmem limit. If the kernel + // doesn't support it we *must* error out. + return errors.New("kernel memory accounting not supported by this kernel") + } + if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil { + // Check if the error number returned by the syscall is "EBUSY" + // The EBUSY signal is returned on attempts to write to the + // memory.kmem.limit_in_bytes file if the cgroup has children or + // once tasks have been attached to the cgroup + if pathErr, ok := err.(*os.PathError); ok { + if errNo, ok := pathErr.Err.(syscall.Errno); ok { + if errNo == unix.EBUSY { + return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit) + } + } + } + return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err) + } + return nil +} diff --git a/libcontainer/cgroups/fs/kmem_disabled.go b/libcontainer/cgroups/fs/kmem_disabled.go new file mode 100644 index 0000000..ac290fd --- /dev/null +++ b/libcontainer/cgroups/fs/kmem_disabled.go @@ -0,0 +1,15 @@ +// +build linux,nokmem + +package fs + +import ( + "errors" +) + +func EnableKernelMemoryAccounting(path string) error { + return nil +} + +func setKernelMemory(path string, kernelMemoryLimit int64) error { + return errors.New("kernel memory accounting disabled in this runc build") +} diff --git a/libcontainer/cgroups/fs/memory.go b/libcontainer/cgroups/fs/memory.go new file mode 100644 index 0000000..f81ed05 --- /dev/null +++ b/libcontainer/cgroups/fs/memory.go @@ -0,0 +1,271 @@ +// +build linux + +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +const ( + cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes" + cgroupMemoryLimit = "memory.limit_in_bytes" +) + +type MemoryGroup struct { +} + +func (s *MemoryGroup) Name() string { + return "memory" +} + +func (s *MemoryGroup) Apply(d *cgroupData) (err error) { + path, err := d.path("memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } else if path == "" { + return nil + } + if memoryAssigned(d.config) { + if _, err := os.Stat(path); os.IsNotExist(err) { + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + // Only enable kernel memory accouting when this cgroup + // is created by libcontainer, otherwise we might get + // error when people use `cgroupsPath` to join an existed + // cgroup whose kernel memory is not initialized. + if err := EnableKernelMemoryAccounting(path); err != nil { + return err + } + } + } + defer func() { + if err != nil { + os.RemoveAll(path) + } + }() + + // We need to join memory cgroup after set memory limits, because + // kmem.limit_in_bytes can only be set when the cgroup is empty. + _, err = d.join("memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error { + // If the memory update is set to -1 we should also + // set swap to -1, it means unlimited memory. + if cgroup.Resources.Memory == -1 { + // Only set swap if it's enabled in kernel + if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) { + cgroup.Resources.MemorySwap = -1 + } + } + + // When memory and swap memory are both set, we need to handle the cases + // for updating container. + if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 { + memoryUsage, err := getMemoryData(path, "") + if err != nil { + return err + } + + // When update memory limit, we should adapt the write sequence + // for memory and swap memory, so it won't fail because the new + // value and the old value don't fit kernel's validation. + if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) { + if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + } else { + if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + } + } else { + if cgroup.Resources.Memory != 0 { + if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + } + if cgroup.Resources.MemorySwap != 0 { + if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + } + } + + return nil +} + +func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { + if err := setMemoryAndSwap(path, cgroup); err != nil { + return err + } + + if cgroup.Resources.KernelMemory != 0 { + if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil { + return err + } + } + + if cgroup.Resources.MemoryReservation != 0 { + if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil { + return err + } + } + + if cgroup.Resources.KernelMemoryTCP != 0 { + if err := fscommon.WriteFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil { + return err + } + } + if cgroup.Resources.OomKillDisable { + if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil { + return err + } + } + if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 { + return nil + } else if *cgroup.Resources.MemorySwappiness <= 100 { + if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil { + return err + } + } else { + return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness) + } + + return nil +} + +func (s *MemoryGroup) Remove(d *cgroupData) error { + return removePath(d.path("memory")) +} + +func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { + // Set stats from memory.stat. + statsFile, err := os.Open(filepath.Join(path, "memory.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer statsFile.Close() + + sc := bufio.NewScanner(statsFile) + for sc.Scan() { + t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text()) + if err != nil { + return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err) + } + stats.MemoryStats.Stats[t] = v + } + stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] + + memoryUsage, err := getMemoryData(path, "") + if err != nil { + return err + } + stats.MemoryStats.Usage = memoryUsage + swapUsage, err := getMemoryData(path, "memsw") + if err != nil { + return err + } + stats.MemoryStats.SwapUsage = swapUsage + kernelUsage, err := getMemoryData(path, "kmem") + if err != nil { + return err + } + stats.MemoryStats.KernelUsage = kernelUsage + kernelTCPUsage, err := getMemoryData(path, "kmem.tcp") + if err != nil { + return err + } + stats.MemoryStats.KernelTCPUsage = kernelTCPUsage + + useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".") + value, err := fscommon.GetCgroupParamUint(path, useHierarchy) + if err != nil { + return err + } + if value == 1 { + stats.MemoryStats.UseHierarchy = true + } + return nil +} + +func memoryAssigned(cgroup *configs.Cgroup) bool { + return cgroup.Resources.Memory != 0 || + cgroup.Resources.MemoryReservation != 0 || + cgroup.Resources.MemorySwap > 0 || + cgroup.Resources.KernelMemory > 0 || + cgroup.Resources.KernelMemoryTCP > 0 || + cgroup.Resources.OomKillDisable || + (cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1) +} + +func getMemoryData(path, name string) (cgroups.MemoryData, error) { + memoryData := cgroups.MemoryData{} + + moduleName := "memory" + if name != "" { + moduleName = strings.Join([]string{"memory", name}, ".") + } + usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".") + maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".") + failcnt := strings.Join([]string{moduleName, "failcnt"}, ".") + limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".") + + value, err := fscommon.GetCgroupParamUint(path, usage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err) + } + memoryData.Usage = value + value, err = fscommon.GetCgroupParamUint(path, maxUsage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + memoryData.MaxUsage = value + value, err = fscommon.GetCgroupParamUint(path, failcnt) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + memoryData.Failcnt = value + value, err = fscommon.GetCgroupParamUint(path, limit) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err) + } + memoryData.Limit = value + + return memoryData, nil +} diff --git a/libcontainer/cgroups/fs/memory_test.go b/libcontainer/cgroups/fs/memory_test.go new file mode 100644 index 0000000..62de563 --- /dev/null +++ b/libcontainer/cgroups/fs/memory_test.go @@ -0,0 +1,456 @@ +// +build linux + +package fs + +import ( + "strconv" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" +) + +const ( + memoryStatContents = `cache 512 +rss 1024` + memoryUsageContents = "2048\n" + memoryMaxUsageContents = "4096\n" + memoryFailcnt = "100\n" + memoryLimitContents = "8192\n" + memoryUseHierarchyContents = "1\n" +) + +func TestMemorySetMemory(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + memoryBefore = 314572800 // 300M + memoryAfter = 524288000 // 500M + reservationBefore = 209715200 // 200M + reservationAfter = 314572800 // 300M + ) + + helper.writeFileContents(map[string]string{ + "memory.limit_in_bytes": strconv.Itoa(memoryBefore), + "memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore), + }) + + helper.CgroupData.config.Resources.Memory = memoryAfter + helper.CgroupData.config.Resources.MemoryReservation = reservationAfter + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err) + } + if value != memoryAfter { + t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.") + } + + value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err) + } + if value != reservationAfter { + t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.") + } +} + +func TestMemorySetMemoryswap(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + memoryswapBefore = 314572800 // 300M + memoryswapAfter = 524288000 // 500M + ) + + helper.writeFileContents(map[string]string{ + "memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore), + }) + + helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err) + } + if value != memoryswapAfter { + t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.") + } +} + +func TestMemorySetMemoryLargerThanSwap(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + memoryBefore = 314572800 // 300M + memoryswapBefore = 524288000 // 500M + memoryAfter = 629145600 // 600M + memoryswapAfter = 838860800 // 800M + ) + + helper.writeFileContents(map[string]string{ + "memory.limit_in_bytes": strconv.Itoa(memoryBefore), + "memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore), + // Set will call getMemoryData when memory and swap memory are + // both set, fake these fields so we don't get error. + "memory.usage_in_bytes": "0", + "memory.max_usage_in_bytes": "0", + "memory.failcnt": "0", + }) + + helper.CgroupData.config.Resources.Memory = memoryAfter + helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err) + } + if value != memoryAfter { + t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.") + } + value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err) + } + if value != memoryswapAfter { + t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.") + } +} + +func TestMemorySetSwapSmallerThanMemory(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + memoryBefore = 629145600 // 600M + memoryswapBefore = 838860800 // 800M + memoryAfter = 314572800 // 300M + memoryswapAfter = 524288000 // 500M + ) + + helper.writeFileContents(map[string]string{ + "memory.limit_in_bytes": strconv.Itoa(memoryBefore), + "memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore), + // Set will call getMemoryData when memory and swap memory are + // both set, fake these fields so we don't get error. + "memory.usage_in_bytes": "0", + "memory.max_usage_in_bytes": "0", + "memory.failcnt": "0", + }) + + helper.CgroupData.config.Resources.Memory = memoryAfter + helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err) + } + if value != memoryAfter { + t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.") + } + value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err) + } + if value != memoryswapAfter { + t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.") + } +} + +func TestMemorySetKernelMemory(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + kernelMemoryBefore = 314572800 // 300M + kernelMemoryAfter = 524288000 // 500M + ) + + helper.writeFileContents(map[string]string{ + "memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore), + }) + + helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err) + } + if value != kernelMemoryAfter { + t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.") + } +} + +func TestMemorySetKernelMemoryTCP(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + kernelMemoryTCPBefore = 314572800 // 300M + kernelMemoryTCPAfter = 524288000 // 500M + ) + + helper.writeFileContents(map[string]string{ + "memory.kmem.tcp.limit_in_bytes": strconv.Itoa(kernelMemoryTCPBefore), + }) + + helper.CgroupData.config.Resources.KernelMemoryTCP = kernelMemoryTCPAfter + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.tcp.limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.kmem.tcp.limit_in_bytes - %s", err) + } + if value != kernelMemoryTCPAfter { + t.Fatal("Got the wrong value, set memory.kmem.tcp.limit_in_bytes failed.") + } +} + +func TestMemorySetMemorySwappinessDefault(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + swappinessBefore := 60 //default is 60 + swappinessAfter := uint64(0) + + helper.writeFileContents(map[string]string{ + "memory.swappiness": strconv.Itoa(swappinessBefore), + }) + + helper.CgroupData.config.Resources.MemorySwappiness = &swappinessAfter + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.swappiness") + if err != nil { + t.Fatalf("Failed to parse memory.swappiness - %s", err) + } + if value != swappinessAfter { + t.Fatalf("Got the wrong value (%d), set memory.swappiness = %d failed.", value, swappinessAfter) + } +} + +func TestMemoryStats(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": memoryUsageContents, + "memory.limit_in_bytes": memoryLimitContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + "memory.failcnt": memoryFailcnt, + "memory.memsw.usage_in_bytes": memoryUsageContents, + "memory.memsw.max_usage_in_bytes": memoryMaxUsageContents, + "memory.memsw.failcnt": memoryFailcnt, + "memory.memsw.limit_in_bytes": memoryLimitContents, + "memory.kmem.usage_in_bytes": memoryUsageContents, + "memory.kmem.max_usage_in_bytes": memoryMaxUsageContents, + "memory.kmem.failcnt": memoryFailcnt, + "memory.kmem.limit_in_bytes": memoryLimitContents, + "memory.use_hierarchy": memoryUseHierarchyContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } + expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, Stats: map[string]uint64{"cache": 512, "rss": 1024}, UseHierarchy: true} + expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats) +} + +func TestMemoryStatsNoStatFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + "memory.limit_in_bytes": memoryLimitContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } +} + +func TestMemoryStatsNoUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + "memory.limit_in_bytes": memoryLimitContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsNoMaxUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": memoryUsageContents, + "memory.limit_in_bytes": memoryLimitContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsNoLimitInBytesFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsBadStatFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": "rss rss", + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + "memory.limit_in_bytes": memoryLimitContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsBadUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": "bad", + "memory.max_usage_in_bytes": memoryMaxUsageContents, + "memory.limit_in_bytes": memoryLimitContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsBadMaxUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": "bad", + "memory.limit_in_bytes": memoryLimitContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsBadLimitInBytesFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + "memory.limit_in_bytes": "bad", + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemorySetOomControl(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + oomKillDisable = 1 // disable oom killer, default is 0 + ) + + helper.writeFileContents(map[string]string{ + "memory.oom_control": strconv.Itoa(oomKillDisable), + }) + + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.oom_control") + if err != nil { + t.Fatalf("Failed to parse memory.oom_control - %s", err) + } + + if value != oomKillDisable { + t.Fatalf("Got the wrong value, set memory.oom_control failed.") + } +} diff --git a/libcontainer/cgroups/fs/name.go b/libcontainer/cgroups/fs/name.go new file mode 100644 index 0000000..d8cf1d8 --- /dev/null +++ b/libcontainer/cgroups/fs/name.go @@ -0,0 +1,40 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NameGroup struct { + GroupName string + Join bool +} + +func (s *NameGroup) Name() string { + return s.GroupName +} + +func (s *NameGroup) Apply(d *cgroupData) error { + if s.Join { + // ignore errors if the named cgroup does not exist + d.join(s.GroupName) + } + return nil +} + +func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *NameGroup) Remove(d *cgroupData) error { + if s.Join { + removePath(d.path(s.GroupName)) + } + return nil +} + +func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/libcontainer/cgroups/fs/net_cls.go b/libcontainer/cgroups/fs/net_cls.go new file mode 100644 index 0000000..0212015 --- /dev/null +++ b/libcontainer/cgroups/fs/net_cls.go @@ -0,0 +1,44 @@ +// +build linux + +package fs + +import ( + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NetClsGroup struct { +} + +func (s *NetClsGroup) Name() string { + return "net_cls" +} + +func (s *NetClsGroup) Apply(d *cgroupData) error { + _, err := d.join("net_cls") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.NetClsClassid != 0 { + if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil { + return err + } + } + + return nil +} + +func (s *NetClsGroup) Remove(d *cgroupData) error { + return removePath(d.path("net_cls")) +} + +func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/libcontainer/cgroups/fs/net_cls_test.go b/libcontainer/cgroups/fs/net_cls_test.go new file mode 100644 index 0000000..602133a --- /dev/null +++ b/libcontainer/cgroups/fs/net_cls_test.go @@ -0,0 +1,41 @@ +// +build linux + +package fs + +import ( + "strconv" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" +) + +const ( + classidBefore = 0x100002 + classidAfter = 0x100001 +) + +func TestNetClsSetClassid(t *testing.T) { + helper := NewCgroupTestUtil("net_cls", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "net_cls.classid": strconv.FormatUint(classidBefore, 10), + }) + + helper.CgroupData.config.Resources.NetClsClassid = classidAfter + netcls := &NetClsGroup{} + if err := netcls.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + // As we are in mock environment, we can't get correct value of classid from + // net_cls.classid. + // So. we just judge if we successfully write classid into file + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "net_cls.classid") + if err != nil { + t.Fatalf("Failed to parse net_cls.classid - %s", err) + } + if value != classidAfter { + t.Fatal("Got the wrong value, set net_cls.classid failed.") + } +} diff --git a/libcontainer/cgroups/fs/net_prio.go b/libcontainer/cgroups/fs/net_prio.go new file mode 100644 index 0000000..2bdeedf --- /dev/null +++ b/libcontainer/cgroups/fs/net_prio.go @@ -0,0 +1,42 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NetPrioGroup struct { +} + +func (s *NetPrioGroup) Name() string { + return "net_prio" +} + +func (s *NetPrioGroup) Apply(d *cgroupData) error { + _, err := d.join("net_prio") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, prioMap := range cgroup.Resources.NetPrioIfpriomap { + if err := fscommon.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil { + return err + } + } + + return nil +} + +func (s *NetPrioGroup) Remove(d *cgroupData) error { + return removePath(d.path("net_prio")) +} + +func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/libcontainer/cgroups/fs/net_prio_test.go b/libcontainer/cgroups/fs/net_prio_test.go new file mode 100644 index 0000000..2ce8e19 --- /dev/null +++ b/libcontainer/cgroups/fs/net_prio_test.go @@ -0,0 +1,39 @@ +// +build linux + +package fs + +import ( + "strings" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +var ( + prioMap = []*configs.IfPrioMap{ + { + Interface: "test", + Priority: 5, + }, + } +) + +func TestNetPrioSetIfPrio(t *testing.T) { + helper := NewCgroupTestUtil("net_prio", t) + defer helper.cleanup() + + helper.CgroupData.config.Resources.NetPrioIfpriomap = prioMap + netPrio := &NetPrioGroup{} + if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap") + if err != nil { + t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err) + } + if !strings.Contains(value, "test 5") { + t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.") + } +} diff --git a/libcontainer/cgroups/fs/perf_event.go b/libcontainer/cgroups/fs/perf_event.go new file mode 100644 index 0000000..5693676 --- /dev/null +++ b/libcontainer/cgroups/fs/perf_event.go @@ -0,0 +1,35 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type PerfEventGroup struct { +} + +func (s *PerfEventGroup) Name() string { + return "perf_event" +} + +func (s *PerfEventGroup) Apply(d *cgroupData) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *PerfEventGroup) Remove(d *cgroupData) error { + return removePath(d.path("perf_event")) +} + +func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/libcontainer/cgroups/fs/pids.go b/libcontainer/cgroups/fs/pids.go new file mode 100644 index 0000000..7bf6801 --- /dev/null +++ b/libcontainer/cgroups/fs/pids.go @@ -0,0 +1,74 @@ +// +build linux + +package fs + +import ( + "fmt" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type PidsGroup struct { +} + +func (s *PidsGroup) Name() string { + return "pids" +} + +func (s *PidsGroup) Apply(d *cgroupData) error { + _, err := d.join("pids") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Resources.PidsLimit != 0 { + // "max" is the fallback value. + limit := "max" + + if cgroup.Resources.PidsLimit > 0 { + limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10) + } + + if err := fscommon.WriteFile(path, "pids.max", limit); err != nil { + return err + } + } + + return nil +} + +func (s *PidsGroup) Remove(d *cgroupData) error { + return removePath(d.path("pids")) +} + +func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error { + current, err := fscommon.GetCgroupParamUint(path, "pids.current") + if err != nil { + return fmt.Errorf("failed to parse pids.current - %s", err) + } + + maxString, err := fscommon.GetCgroupParamString(path, "pids.max") + if err != nil { + return fmt.Errorf("failed to parse pids.max - %s", err) + } + + // Default if pids.max == "max" is 0 -- which represents "no limit". + var max uint64 + if maxString != "max" { + max, err = fscommon.ParseUint(maxString, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max")) + } + } + + stats.PidsStats.Current = current + stats.PidsStats.Limit = max + return nil +} diff --git a/libcontainer/cgroups/fs/pids_test.go b/libcontainer/cgroups/fs/pids_test.go new file mode 100644 index 0000000..66f3aa3 --- /dev/null +++ b/libcontainer/cgroups/fs/pids_test.go @@ -0,0 +1,112 @@ +// +build linux + +package fs + +import ( + "strconv" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" +) + +const ( + maxUnlimited = -1 + maxLimited = 1024 +) + +func TestPidsSetMax(t *testing.T) { + helper := NewCgroupTestUtil("pids", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "pids.max": "max", + }) + + helper.CgroupData.config.Resources.PidsLimit = maxLimited + pids := &PidsGroup{} + if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "pids.max") + if err != nil { + t.Fatalf("Failed to parse pids.max - %s", err) + } + + if value != maxLimited { + t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value) + } +} + +func TestPidsSetUnlimited(t *testing.T) { + helper := NewCgroupTestUtil("pids", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "pids.max": strconv.Itoa(maxLimited), + }) + + helper.CgroupData.config.Resources.PidsLimit = maxUnlimited + pids := &PidsGroup{} + if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "pids.max") + if err != nil { + t.Fatalf("Failed to parse pids.max - %s", err) + } + + if value != "max" { + t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value) + } +} + +func TestPidsStats(t *testing.T) { + helper := NewCgroupTestUtil("pids", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "pids.current": strconv.Itoa(1337), + "pids.max": strconv.Itoa(maxLimited), + }) + + pids := &PidsGroup{} + stats := *cgroups.NewStats() + if err := pids.GetStats(helper.CgroupPath, &stats); err != nil { + t.Fatal(err) + } + + if stats.PidsStats.Current != 1337 { + t.Fatalf("Expected %d, got %d for pids.current", 1337, stats.PidsStats.Current) + } + + if stats.PidsStats.Limit != maxLimited { + t.Fatalf("Expected %d, got %d for pids.max", maxLimited, stats.PidsStats.Limit) + } +} + +func TestPidsStatsUnlimited(t *testing.T) { + helper := NewCgroupTestUtil("pids", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "pids.current": strconv.Itoa(4096), + "pids.max": "max", + }) + + pids := &PidsGroup{} + stats := *cgroups.NewStats() + if err := pids.GetStats(helper.CgroupPath, &stats); err != nil { + t.Fatal(err) + } + + if stats.PidsStats.Current != 4096 { + t.Fatalf("Expected %d, got %d for pids.current", 4096, stats.PidsStats.Current) + } + + if stats.PidsStats.Limit != 0 { + t.Fatalf("Expected %d, got %d for pids.max", 0, stats.PidsStats.Limit) + } +} diff --git a/libcontainer/cgroups/fs/stats_util_test.go b/libcontainer/cgroups/fs/stats_util_test.go new file mode 100644 index 0000000..c5a8d18 --- /dev/null +++ b/libcontainer/cgroups/fs/stats_util_test.go @@ -0,0 +1,123 @@ +// +build linux + +package fs + +import ( + "fmt" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" + + "github.com/sirupsen/logrus" +) + +func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error { + if len(expected) != len(actual) { + return fmt.Errorf("blkioStatEntries length do not match") + } + for i, expValue := range expected { + actValue := actual[i] + if expValue != actValue { + return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue) + } + } + return nil +} + +func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) { + if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil { + logrus.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil { + logrus.Printf("blkio IoServicedRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil { + logrus.Printf("blkio IoQueuedRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil { + logrus.Printf("blkio SectorsRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoServiceTimeRecursive, actual.IoServiceTimeRecursive); err != nil { + logrus.Printf("blkio IoServiceTimeRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoWaitTimeRecursive, actual.IoWaitTimeRecursive); err != nil { + logrus.Printf("blkio IoWaitTimeRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil { + logrus.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoTimeRecursive, actual.IoTimeRecursive); err != nil { + logrus.Printf("blkio IoTimeRecursive do not match - %s\n", err) + t.Fail() + } +} + +func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) { + if expected != actual { + logrus.Printf("Expected throttling data %v but found %v\n", expected, actual) + t.Fail() + } +} + +func expectHugetlbStatEquals(t *testing.T, expected, actual cgroups.HugetlbStats) { + if expected != actual { + logrus.Printf("Expected hugetlb stats %v but found %v\n", expected, actual) + t.Fail() + } +} + +func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) { + expectMemoryDataEquals(t, expected.Usage, actual.Usage) + expectMemoryDataEquals(t, expected.SwapUsage, actual.SwapUsage) + expectMemoryDataEquals(t, expected.KernelUsage, actual.KernelUsage) + + if expected.UseHierarchy != actual.UseHierarchy { + logrus.Printf("Expected memory use hierarchy %v, but found %v\n", expected.UseHierarchy, actual.UseHierarchy) + t.Fail() + } + + for key, expValue := range expected.Stats { + actValue, ok := actual.Stats[key] + if !ok { + logrus.Printf("Expected memory stat key %s not found\n", key) + t.Fail() + } + if expValue != actValue { + logrus.Printf("Expected memory stat value %d but found %d\n", expValue, actValue) + t.Fail() + } + } +} + +func expectMemoryDataEquals(t *testing.T, expected, actual cgroups.MemoryData) { + if expected.Usage != actual.Usage { + logrus.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage) + t.Fail() + } + if expected.MaxUsage != actual.MaxUsage { + logrus.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage) + t.Fail() + } + if expected.Failcnt != actual.Failcnt { + logrus.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt) + t.Fail() + } + if expected.Limit != actual.Limit { + logrus.Printf("Expected memory limit %d but found %d\n", expected.Limit, actual.Limit) + t.Fail() + } +} diff --git a/libcontainer/cgroups/fs/util_test.go b/libcontainer/cgroups/fs/util_test.go new file mode 100644 index 0000000..2c50d6f --- /dev/null +++ b/libcontainer/cgroups/fs/util_test.go @@ -0,0 +1,68 @@ +// +build linux + +/* +Utility for testing cgroup operations. + +Creates a mock of the cgroup filesystem for the duration of the test. +*/ +package fs + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type cgroupTestUtil struct { + // cgroup data to use in tests. + CgroupData *cgroupData + + // Path to the mock cgroup directory. + CgroupPath string + + // Temporary directory to store mock cgroup filesystem. + tempDir string + t *testing.T +} + +// Creates a new test util for the specified subsystem +func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil { + d := &cgroupData{ + config: &configs.Cgroup{}, + } + d.config.Resources = &configs.Resources{} + tempDir, err := ioutil.TempDir("", "cgroup_test") + if err != nil { + t.Fatal(err) + } + d.root = tempDir + testCgroupPath := filepath.Join(d.root, subsystem) + if err != nil { + t.Fatal(err) + } + + // Ensure the full mock cgroup path exists. + err = os.MkdirAll(testCgroupPath, 0755) + if err != nil { + t.Fatal(err) + } + return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t} +} + +func (c *cgroupTestUtil) cleanup() { + os.RemoveAll(c.tempDir) +} + +// Write the specified contents on the mock of the specified cgroup files. +func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) { + for file, contents := range fileContents { + err := fscommon.WriteFile(c.CgroupPath, file, contents) + if err != nil { + c.t.Fatal(err) + } + } +} diff --git a/libcontainer/cgroups/fs2/cpu.go b/libcontainer/cgroups/fs2/cpu.go new file mode 100644 index 0000000..f0f5df0 --- /dev/null +++ b/libcontainer/cgroups/fs2/cpu.go @@ -0,0 +1,56 @@ +// +build linux + +package fs2 + +import ( + "bufio" + "os" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +func setCpu(dirPath string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpuWeight != 0 { + if err := fscommon.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(cgroup.Resources.CpuWeight, 10)); err != nil { + return err + } + } + + if cgroup.Resources.CpuMax != "" { + if err := fscommon.WriteFile(dirPath, "cpu.max", cgroup.Resources.CpuMax); err != nil { + return err + } + } + + return nil +} +func statCpu(dirPath string, stats *cgroups.Stats) error { + f, err := os.Open(filepath.Join(dirPath, "cpu.stat")) + if err != nil { + return err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text()) + if err != nil { + return err + } + switch t { + case "usage_usec": + stats.CpuStats.CpuUsage.TotalUsage = v * 1000 + + case "user_usec": + stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000 + + case "system_usec": + stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000 + } + } + return nil +} diff --git a/libcontainer/cgroups/fs2/cpuset.go b/libcontainer/cgroups/fs2/cpuset.go new file mode 100644 index 0000000..6492ac9 --- /dev/null +++ b/libcontainer/cgroups/fs2/cpuset.go @@ -0,0 +1,22 @@ +// +build linux + +package fs2 + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +func setCpuset(dirPath string, cgroup *configs.Cgroup) error { + if cgroup.Resources.CpusetCpus != "" { + if err := fscommon.WriteFile(dirPath, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil { + return err + } + } + if cgroup.Resources.CpusetMems != "" { + if err := fscommon.WriteFile(dirPath, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil { + return err + } + } + return nil +} diff --git a/libcontainer/cgroups/fs2/defaultpath.go b/libcontainer/cgroups/fs2/defaultpath.go new file mode 100644 index 0000000..e84b33f --- /dev/null +++ b/libcontainer/cgroups/fs2/defaultpath.go @@ -0,0 +1,99 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package fs2 + +import ( + "bufio" + "io" + "os" + "path/filepath" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" + "github.com/pkg/errors" +) + +const UnifiedMountpoint = "/sys/fs/cgroup" + +func defaultDirPath(c *configs.Cgroup) (string, error) { + if (c.Name != "" || c.Parent != "") && c.Path != "" { + return "", errors.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c) + } + if len(c.Paths) != 0 { + // never set by specconv + return "", errors.Errorf("cgroup: Paths is unsupported, use Path, got %+v", c) + } + + // XXX: Do not remove this code. Path safety is important! -- cyphar + cgPath := libcontainerUtils.CleanPath(c.Path) + cgParent := libcontainerUtils.CleanPath(c.Parent) + cgName := libcontainerUtils.CleanPath(c.Name) + + ownCgroup, err := parseCgroupFile("/proc/self/cgroup") + if err != nil { + return "", err + } + return _defaultDirPath(UnifiedMountpoint, cgPath, cgParent, cgName, ownCgroup) +} + +func _defaultDirPath(root, cgPath, cgParent, cgName, ownCgroup string) (string, error) { + if (cgName != "" || cgParent != "") && cgPath != "" { + return "", errors.New("cgroup: either Path or Name and Parent should be used") + } + innerPath := cgPath + if innerPath == "" { + innerPath = filepath.Join(cgParent, cgName) + } + if filepath.IsAbs(innerPath) { + return filepath.Join(root, innerPath), nil + } + return filepath.Join(root, ownCgroup, innerPath), nil +} + +// parseCgroupFile parses /proc/PID/cgroup file and return string +func parseCgroupFile(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + return parseCgroupFromReader(f) +} + +func parseCgroupFromReader(r io.Reader) (string, error) { + var ( + s = bufio.NewScanner(r) + ) + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + var ( + text = s.Text() + parts = strings.SplitN(text, ":", 3) + ) + if len(parts) < 3 { + return "", errors.Errorf("invalid cgroup entry: %q", text) + } + // text is like "0::/user.slice/user-1001.slice/session-1.scope" + if parts[0] == "0" && parts[1] == "" { + return parts[2], nil + } + } + return "", errors.New("cgroup path not found") +} diff --git a/libcontainer/cgroups/fs2/defaultpath_test.go b/libcontainer/cgroups/fs2/defaultpath_test.go new file mode 100644 index 0000000..6d5d117 --- /dev/null +++ b/libcontainer/cgroups/fs2/defaultpath_test.go @@ -0,0 +1,76 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package fs2 + +import ( + "strings" + "testing" +) + +func TestParseCgroupFromReader(t *testing.T) { + cases := map[string]string{ + "0::/user.slice/user-1001.slice/session-1.scope\n": "/user.slice/user-1001.slice/session-1.scope", + "2:cpuset:/foo\n1:name=systemd:/\n": "", + "2:cpuset:/foo\n1:name=systemd:/\n0::/user.slice/user-1001.slice/session-1.scope\n": "/user.slice/user-1001.slice/session-1.scope", + } + for s, expected := range cases { + g, err := parseCgroupFromReader(strings.NewReader(s)) + if expected != "" { + if string(g) != expected { + t.Errorf("expected %q, got %q", expected, string(g)) + } + if err != nil { + t.Error(err) + } + } else { + if err == nil { + t.Error("error is expected") + } + } + } +} + +func TestDefaultDirPath(t *testing.T) { + root := "/sys/fs/cgroup" + cases := []struct { + cgPath string + cgParent string + cgName string + ownCgroup string + expected string + }{ + { + cgPath: "/foo/bar", + ownCgroup: "/apple/banana", + expected: "/sys/fs/cgroup/foo/bar", + }, + { + cgPath: "foo/bar", + ownCgroup: "/apple/banana", + expected: "/sys/fs/cgroup/apple/banana/foo/bar", + }, + } + for _, c := range cases { + got, err := _defaultDirPath(root, c.cgPath, c.cgParent, c.cgName, c.ownCgroup) + if err != nil { + t.Fatal(err) + } + if got != c.expected { + t.Fatalf("expected %q, got %q", c.expected, got) + } + } +} diff --git a/libcontainer/cgroups/fs2/devices.go b/libcontainer/cgroups/fs2/devices.go new file mode 100644 index 0000000..e0fd685 --- /dev/null +++ b/libcontainer/cgroups/fs2/devices.go @@ -0,0 +1,73 @@ +// +build linux + +package fs2 + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups/ebpf" + "github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +func isRWM(cgroupPermissions string) bool { + r := false + w := false + m := false + for _, rn := range cgroupPermissions { + switch rn { + case 'r': + r = true + case 'w': + w = true + case 'm': + m = true + } + } + return r && w && m +} + +// the logic is from crun +// https://github.com/containers/crun/blob/0.10.2/src/libcrun/cgroup.c#L1644-L1652 +func canSkipEBPFError(cgroup *configs.Cgroup) bool { + for _, dev := range cgroup.Resources.Devices { + if dev.Allow || !isRWM(dev.Permissions) { + return false + } + } + return true +} + +func setDevices(dirPath string, cgroup *configs.Cgroup) error { + devices := cgroup.Devices + if allowAllDevices := cgroup.Resources.AllowAllDevices; allowAllDevices != nil { + // never set by OCI specconv, but *allowAllDevices=false is still used by the integration test + if *allowAllDevices == true { + return errors.New("libcontainer AllowAllDevices is not supported, use Devices") + } + for _, ad := range cgroup.Resources.AllowedDevices { + d := *ad + d.Allow = true + devices = append(devices, &d) + } + } + if len(cgroup.Resources.DeniedDevices) != 0 { + // never set by OCI specconv + return errors.New("libcontainer DeniedDevices is not supported, use Devices") + } + insts, license, err := devicefilter.DeviceFilter(devices) + if err != nil { + return err + } + dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0600) + if err != nil { + return errors.Errorf("cannot get dir FD for %s", dirPath) + } + defer unix.Close(dirFD) + if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { + if !canSkipEBPFError(cgroup) { + return err + } + } + return nil +} diff --git a/libcontainer/cgroups/fs2/freezer.go b/libcontainer/cgroups/fs2/freezer.go new file mode 100644 index 0000000..130c63f --- /dev/null +++ b/libcontainer/cgroups/fs2/freezer.go @@ -0,0 +1,53 @@ +// +build linux + +package fs2 + +import ( + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/pkg/errors" +) + +func setFreezer(dirPath string, state configs.FreezerState) error { + var desired int + switch state { + case configs.Undefined: + return nil + case configs.Frozen: + desired = 1 + case configs.Thawed: + desired = 0 + default: + return errors.Errorf("unknown freezer state %+v", state) + } + supportedErr := supportsFreezer(dirPath) + if supportedErr != nil && desired != 0 { + // can ignore error if desired == 1 + return errors.Wrap(supportedErr, "freezer not supported") + } + return freezeWithInt(dirPath, desired) +} + +func supportsFreezer(dirPath string) error { + _, err := fscommon.ReadFile(dirPath, "cgroup.freeze") + return err +} + +// freeze writes desired int to "cgroup.freeze". +func freezeWithInt(dirPath string, desired int) error { + desiredS := strconv.Itoa(desired) + if err := fscommon.WriteFile(dirPath, "cgroup.freeze", desiredS); err != nil { + return err + } + got, err := fscommon.ReadFile(dirPath, "cgroup.freeze") + if err != nil { + return err + } + if gotS := strings.TrimSpace(string(got)); gotS != desiredS { + return errors.Errorf("expected \"cgroup.freeze\" in %q to be %q, got %q", dirPath, desiredS, gotS) + } + return nil +} diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go new file mode 100644 index 0000000..4bb7091 --- /dev/null +++ b/libcontainer/cgroups/fs2/fs2.go @@ -0,0 +1,214 @@ +// +build linux + +package fs2 + +import ( + "io/ioutil" + "os" + "path/filepath" + "strings" + + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/pkg/errors" +) + +// NewManager creates a manager for cgroup v2 unified hierarchy. +// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope". +// If dirPath is empty, it is automatically set using config. +func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.Manager, error) { + if config == nil { + config = &configs.Cgroup{} + } + if dirPath != "" { + if filepath.Clean(dirPath) != dirPath || !filepath.IsAbs(dirPath) { + return nil, errors.Errorf("invalid dir path %q", dirPath) + } + } else { + var err error + dirPath, err = defaultDirPath(config) + if err != nil { + return nil, err + } + } + controllers, err := detectControllers(dirPath) + if err != nil && !rootless { + return nil, err + } + + m := &manager{ + config: config, + dirPath: dirPath, + controllers: controllers, + rootless: rootless, + } + return m, nil +} + +func detectControllers(dirPath string) (map[string]struct{}, error) { + if err := os.MkdirAll(dirPath, 0755); err != nil { + return nil, err + } + controllersPath, err := securejoin.SecureJoin(dirPath, "cgroup.controllers") + if err != nil { + return nil, err + } + controllersData, err := ioutil.ReadFile(controllersPath) + if err != nil { + return nil, err + } + controllersFields := strings.Fields(string(controllersData)) + controllers := make(map[string]struct{}, len(controllersFields)) + for _, c := range controllersFields { + controllers[c] = struct{}{} + } + return controllers, nil +} + +type manager struct { + config *configs.Cgroup + // dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" + dirPath string + // controllers is content of "cgroup.controllers" file. + // excludes pseudo-controllers ("devices" and "freezer"). + controllers map[string]struct{} + rootless bool +} + +func (m *manager) Apply(pid int) error { + if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil && !m.rootless { + return err + } + return nil +} + +func (m *manager) GetPids() ([]int, error) { + return cgroups.GetPids(m.dirPath) +} + +func (m *manager) GetAllPids() ([]int, error) { + return cgroups.GetAllPids(m.dirPath) +} + +func (m *manager) GetStats() (*cgroups.Stats, error) { + var ( + st cgroups.Stats + errs []error + ) + // pids (since kernel 4.5) + if _, ok := m.controllers["pids"]; ok { + if err := statPids(m.dirPath, &st); err != nil { + errs = append(errs, err) + } + } else { + if err := statPidsWithoutController(m.dirPath, &st); err != nil { + errs = append(errs, err) + } + } + // memory (since kenrel 4.5) + if _, ok := m.controllers["memory"]; ok { + if err := statMemory(m.dirPath, &st); err != nil { + errs = append(errs, err) + } + } + // io (since kernel 4.5) + if _, ok := m.controllers["io"]; ok { + if err := statIo(m.dirPath, &st); err != nil { + errs = append(errs, err) + } + } + // cpu (since kernel 4.15) + if _, ok := m.controllers["cpu"]; ok { + if err := statCpu(m.dirPath, &st); err != nil { + errs = append(errs, err) + } + } + if len(errs) > 0 && !m.rootless { + return &st, errors.Errorf("error while statting cgroup v2: %+v", errs) + } + return &st, nil +} + +func (m *manager) Freeze(state configs.FreezerState) error { + if err := setFreezer(m.dirPath, state); err != nil { + return err + } + m.config.Resources.Freezer = state + return nil +} + +func (m *manager) Destroy() error { + return os.RemoveAll(m.dirPath) +} + +// GetPaths is for compatibility purpose and should be removed in future +func (m *manager) GetPaths() map[string]string { + paths := map[string]string{ + // pseudo-controller for compatibility + "devices": m.dirPath, + "freezer": m.dirPath, + } + for c := range m.controllers { + paths[c] = m.dirPath + } + return paths +} + +func (m *manager) GetUnifiedPath() (string, error) { + return m.dirPath, nil +} + +func (m *manager) Set(container *configs.Config) error { + if container == nil || container.Cgroups == nil { + return nil + } + var errs []error + // pids (since kernel 4.5) + if _, ok := m.controllers["pids"]; ok { + if err := setPids(m.dirPath, container.Cgroups); err != nil { + errs = append(errs, err) + } + } + // memory (since kernel 4.5) + if _, ok := m.controllers["memory"]; ok { + if err := setMemory(m.dirPath, container.Cgroups); err != nil { + errs = append(errs, err) + } + } + // io (since kernel 4.5) + if _, ok := m.controllers["io"]; ok { + if err := setIo(m.dirPath, container.Cgroups); err != nil { + errs = append(errs, err) + } + } + // cpu (since kernel 4.15) + if _, ok := m.controllers["cpu"]; ok { + if err := setCpu(m.dirPath, container.Cgroups); err != nil { + errs = append(errs, err) + } + } + // devices (since kernel 4.15, pseudo-controller) + if err := setDevices(m.dirPath, container.Cgroups); err != nil { + errs = append(errs, err) + } + // cpuset (since kernel 5.0) + if _, ok := m.controllers["cpuset"]; ok { + if err := setCpuset(m.dirPath, container.Cgroups); err != nil { + errs = append(errs, err) + } + } + // freezer (since kernel 5.2, pseudo-controller) + if err := setFreezer(m.dirPath, container.Cgroups.Freezer); err != nil { + errs = append(errs, err) + } + if len(errs) > 0 && !m.rootless { + return errors.Errorf("error while setting cgroup v2: %+v", errs) + } + m.config = container.Cgroups + return nil +} + +func (m *manager) GetCgroups() (*configs.Cgroup, error) { + return m.config, nil +} diff --git a/libcontainer/cgroups/fs2/io.go b/libcontainer/cgroups/fs2/io.go new file mode 100644 index 0000000..9a07308 --- /dev/null +++ b/libcontainer/cgroups/fs2/io.go @@ -0,0 +1,124 @@ +// +build linux + +package fs2 + +import ( + "bufio" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +func setIo(dirPath string, cgroup *configs.Cgroup) error { + if cgroup.Resources.BlkioWeight != 0 { + filename := "io.bfq.weight" + if err := fscommon.WriteFile(dirPath, filename, strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil { + return err + } + } + + for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice { + if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice { + if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice { + if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil { + return err + } + } + for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice { + if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil { + return err + } + } + + return nil +} + +func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) { + ret := map[string][]string{} + p := filepath.Join(dirPath, name) + f, err := os.Open(p) + if err != nil { + return nil, err + } + defer f.Close() + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + parts := strings.Fields(line) + if len(parts) < 2 { + continue + } + ret[parts[0]] = parts[1:] + } + if err := scanner.Err(); err != nil { + return nil, err + } + return ret, nil +} + +func statIo(dirPath string, stats *cgroups.Stats) error { + // more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt + var ioServiceBytesRecursive []cgroups.BlkioStatEntry + values, err := readCgroup2MapFile(dirPath, "io.stat") + if err != nil { + return err + } + for k, v := range values { + d := strings.Split(k, ":") + if len(d) != 2 { + continue + } + minor, err := strconv.ParseUint(d[0], 10, 0) + if err != nil { + return err + } + major, err := strconv.ParseUint(d[1], 10, 0) + if err != nil { + return err + } + + for _, item := range v { + d := strings.Split(item, "=") + if len(d) != 2 { + continue + } + op := d[0] + + // Accommodate the cgroup v1 naming + switch op { + case "rbytes": + op = "read" + case "wbytes": + op = "write" + } + + value, err := strconv.ParseUint(d[1], 10, 0) + if err != nil { + return err + } + + entry := cgroups.BlkioStatEntry{ + Op: op, + Major: major, + Minor: minor, + Value: value, + } + ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry) + } + } + stats.BlkioStats = cgroups.BlkioStats{IoServiceBytesRecursive: ioServiceBytesRecursive} + return nil +} diff --git a/libcontainer/cgroups/fs2/memory.go b/libcontainer/cgroups/fs2/memory.go new file mode 100644 index 0000000..23eccbe --- /dev/null +++ b/libcontainer/cgroups/fs2/memory.go @@ -0,0 +1,103 @@ +// +build linux + +package fs2 + +import ( + "bufio" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/pkg/errors" +) + +func setMemory(dirPath string, cgroup *configs.Cgroup) error { + if cgroup.Resources.MemorySwap != 0 { + if err := fscommon.WriteFile(dirPath, "memory.swap.max", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + return err + } + } + if cgroup.Resources.Memory != 0 { + if err := fscommon.WriteFile(dirPath, "memory.max", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + return err + } + } + + // cgroup.Resources.KernelMemory is ignored + + if cgroup.Resources.MemoryReservation != 0 { + if err := fscommon.WriteFile(dirPath, "memory.low", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil { + return err + } + } + + return nil +} + +func statMemory(dirPath string, stats *cgroups.Stats) error { + // Set stats from memory.stat. + statsFile, err := os.Open(filepath.Join(dirPath, "memory.stat")) + if err != nil { + return err + } + defer statsFile.Close() + + sc := bufio.NewScanner(statsFile) + for sc.Scan() { + t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text()) + if err != nil { + return errors.Wrapf(err, "failed to parse memory.stat (%q)", sc.Text()) + } + stats.MemoryStats.Stats[t] = v + } + stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] + + memoryUsage, err := getMemoryDataV2(dirPath, "") + if err != nil { + return err + } + stats.MemoryStats.Usage = memoryUsage + swapUsage, err := getMemoryDataV2(dirPath, "swap") + if err != nil { + return err + } + stats.MemoryStats.SwapUsage = swapUsage + + stats.MemoryStats.UseHierarchy = true + return nil +} + +func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) { + memoryData := cgroups.MemoryData{} + + moduleName := "memory" + if name != "" { + moduleName = strings.Join([]string{"memory", name}, ".") + } + usage := strings.Join([]string{moduleName, "current"}, ".") + limit := strings.Join([]string{moduleName, "max"}, ".") + + value, err := fscommon.GetCgroupParamUint(path, usage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", usage) + } + memoryData.Usage = value + + value, err = fscommon.GetCgroupParamUint(path, limit) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", limit) + } + memoryData.Limit = value + + return memoryData, nil +} diff --git a/libcontainer/cgroups/fs2/pids.go b/libcontainer/cgroups/fs2/pids.go new file mode 100644 index 0000000..db2d7ac --- /dev/null +++ b/libcontainer/cgroups/fs2/pids.go @@ -0,0 +1,90 @@ +// +build linux + +package fs2 + +import ( + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +func setPids(dirPath string, cgroup *configs.Cgroup) error { + if cgroup.Resources.PidsLimit != 0 { + // "max" is the fallback value. + limit := "max" + + if cgroup.Resources.PidsLimit > 0 { + limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10) + } + + if err := fscommon.WriteFile(dirPath, "pids.max", limit); err != nil { + return err + } + } + + return nil +} + +func isNOTSUP(err error) bool { + switch err := err.(type) { + case *os.PathError: + return err.Err == unix.ENOTSUP + default: + return false + } +} + +func statPidsWithoutController(dirPath string, stats *cgroups.Stats) error { + // if the controller is not enabled, let's read PIDS from cgroups.procs + // (or threads if cgroup.threads is enabled) + contents, err := ioutil.ReadFile(filepath.Join(dirPath, "cgroup.procs")) + if err != nil && isNOTSUP(err) { + contents, err = ioutil.ReadFile(filepath.Join(dirPath, "cgroup.threads")) + } + if err != nil { + return err + } + pids := make(map[string]string) + for _, i := range strings.Split(string(contents), "\n") { + if i != "" { + pids[i] = i + } + } + stats.PidsStats.Current = uint64(len(pids)) + stats.PidsStats.Limit = 0 + return nil +} + +func statPids(dirPath string, stats *cgroups.Stats) error { + current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current") + if err != nil { + return errors.Wrap(err, "failed to parse pids.current") + } + + maxString, err := fscommon.GetCgroupParamString(dirPath, "pids.max") + if err != nil { + return errors.Wrap(err, "failed to parse pids.max") + } + + // Default if pids.max == "max" is 0 -- which represents "no limit". + var max uint64 + if maxString != "max" { + max, err = fscommon.ParseUint(maxString, 10, 64) + if err != nil { + return errors.Wrapf(err, "failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", + maxString, filepath.Join(dirPath, "pids.max")) + } + } + + stats.PidsStats.Current = current + stats.PidsStats.Limit = max + return nil +} diff --git a/libcontainer/cgroups/fscommon/fscommon.go b/libcontainer/cgroups/fscommon/fscommon.go new file mode 100644 index 0000000..dd92e8c --- /dev/null +++ b/libcontainer/cgroups/fscommon/fscommon.go @@ -0,0 +1,36 @@ +// +build linux + +package fscommon + +import ( + "io/ioutil" + + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/pkg/errors" +) + +func WriteFile(dir, file, data string) error { + if dir == "" { + return errors.Errorf("no directory specified for %s", file) + } + path, err := securejoin.SecureJoin(dir, file) + if err != nil { + return err + } + if err := ioutil.WriteFile(path, []byte(data), 0700); err != nil { + return errors.Wrapf(err, "failed to write %q to %q", data, path) + } + return nil +} + +func ReadFile(dir, file string) (string, error) { + if dir == "" { + return "", errors.Errorf("no directory specified for %s", file) + } + path, err := securejoin.SecureJoin(dir, file) + if err != nil { + return "", err + } + data, err := ioutil.ReadFile(path) + return string(data), err +} diff --git a/libcontainer/cgroups/fscommon/utils.go b/libcontainer/cgroups/fscommon/utils.go new file mode 100644 index 0000000..46c3c77 --- /dev/null +++ b/libcontainer/cgroups/fscommon/utils.go @@ -0,0 +1,83 @@ +// +build linux + +package fscommon + +import ( + "errors" + "fmt" + "io/ioutil" + "math" + "path/filepath" + "strconv" + "strings" +) + +var ( + ErrNotValidFormat = errors.New("line is not a valid key value format") +) + +// Saturates negative values at zero and returns a uint64. +// Due to kernel bugs, some of the memory cgroup stats can be negative. +func ParseUint(s string, base, bitSize int) (uint64, error) { + value, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { + return 0, nil + } + + return value, err + } + + return value, nil +} + +// Parses a cgroup param and returns as name, value +// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234 +func GetCgroupParamKeyValue(t string) (string, uint64, error) { + parts := strings.Fields(t) + switch len(parts) { + case 2: + value, err := ParseUint(parts[1], 10, 64) + if err != nil { + return "", 0, fmt.Errorf("unable to convert param value (%q) to uint64: %v", parts[1], err) + } + + return parts[0], value, nil + default: + return "", 0, ErrNotValidFormat + } +} + +// Gets a single uint64 value from the specified cgroup file. +func GetCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) { + fileName := filepath.Join(cgroupPath, cgroupFile) + contents, err := ioutil.ReadFile(fileName) + if err != nil { + return 0, err + } + trimmed := strings.TrimSpace(string(contents)) + if trimmed == "max" { + return math.MaxUint64, nil + } + + res, err := ParseUint(trimmed, 10, 64) + if err != nil { + return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName) + } + return res, nil +} + +// Gets a string value from the specified cgroup file +func GetCgroupParamString(cgroupPath, cgroupFile string) (string, error) { + contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(contents)), nil +} diff --git a/libcontainer/cgroups/fscommon/utils_test.go b/libcontainer/cgroups/fscommon/utils_test.go new file mode 100644 index 0000000..d0c5668 --- /dev/null +++ b/libcontainer/cgroups/fscommon/utils_test.go @@ -0,0 +1,97 @@ +// +build linux + +package fscommon + +import ( + "io/ioutil" + "math" + "os" + "path/filepath" + "strconv" + "testing" +) + +const ( + cgroupFile = "cgroup.file" + floatValue = 2048.0 + floatString = "2048" +) + +func TestGetCgroupParamsInt(t *testing.T) { + // Setup tempdir. + tempDir, err := ioutil.TempDir("", "cgroup_utils_test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tempDir) + tempFile := filepath.Join(tempDir, cgroupFile) + + // Success. + err = ioutil.WriteFile(tempFile, []byte(floatString), 0755) + if err != nil { + t.Fatal(err) + } + value, err := GetCgroupParamUint(tempDir, cgroupFile) + if err != nil { + t.Fatal(err) + } else if value != floatValue { + t.Fatalf("Expected %d to equal %f", value, floatValue) + } + + // Success with new line. + err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755) + if err != nil { + t.Fatal(err) + } + value, err = GetCgroupParamUint(tempDir, cgroupFile) + if err != nil { + t.Fatal(err) + } else if value != floatValue { + t.Fatalf("Expected %d to equal %f", value, floatValue) + } + + // Success with negative values + err = ioutil.WriteFile(tempFile, []byte("-12345"), 0755) + if err != nil { + t.Fatal(err) + } + value, err = GetCgroupParamUint(tempDir, cgroupFile) + if err != nil { + t.Fatal(err) + } else if value != 0 { + t.Fatalf("Expected %d to equal %d", value, 0) + } + + // Success with negative values lesser than min int64 + s := strconv.FormatFloat(math.MinInt64, 'f', -1, 64) + err = ioutil.WriteFile(tempFile, []byte(s), 0755) + if err != nil { + t.Fatal(err) + } + value, err = GetCgroupParamUint(tempDir, cgroupFile) + if err != nil { + t.Fatal(err) + } else if value != 0 { + t.Fatalf("Expected %d to equal %d", value, 0) + } + + // Not a float. + err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755) + if err != nil { + t.Fatal(err) + } + _, err = GetCgroupParamUint(tempDir, cgroupFile) + if err == nil { + t.Fatal("Expecting error, got none") + } + + // Unknown file. + err = os.Remove(tempFile) + if err != nil { + t.Fatal(err) + } + _, err = GetCgroupParamUint(tempDir, cgroupFile) + if err == nil { + t.Fatal("Expecting error, got none") + } +} diff --git a/libcontainer/cgroups/stats.go b/libcontainer/cgroups/stats.go new file mode 100644 index 0000000..8eeedc5 --- /dev/null +++ b/libcontainer/cgroups/stats.go @@ -0,0 +1,108 @@ +// +build linux + +package cgroups + +type ThrottlingData struct { + // Number of periods with throttling active + Periods uint64 `json:"periods,omitempty"` + // Number of periods when the container hit its throttling limit. + ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` + // Aggregate time the container was throttled for in nanoseconds. + ThrottledTime uint64 `json:"throttled_time,omitempty"` +} + +// CpuUsage denotes the usage of a CPU. +// All CPU stats are aggregate since container inception. +type CpuUsage struct { + // Total CPU time consumed. + // Units: nanoseconds. + TotalUsage uint64 `json:"total_usage,omitempty"` + // Total CPU time consumed per core. + // Units: nanoseconds. + PercpuUsage []uint64 `json:"percpu_usage,omitempty"` + // Time spent by tasks of the cgroup in kernel mode. + // Units: nanoseconds. + UsageInKernelmode uint64 `json:"usage_in_kernelmode"` + // Time spent by tasks of the cgroup in user mode. + // Units: nanoseconds. + UsageInUsermode uint64 `json:"usage_in_usermode"` +} + +type CpuStats struct { + CpuUsage CpuUsage `json:"cpu_usage,omitempty"` + ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` +} + +type MemoryData struct { + Usage uint64 `json:"usage,omitempty"` + MaxUsage uint64 `json:"max_usage,omitempty"` + Failcnt uint64 `json:"failcnt"` + Limit uint64 `json:"limit"` +} + +type MemoryStats struct { + // memory used for cache + Cache uint64 `json:"cache,omitempty"` + // usage of memory + Usage MemoryData `json:"usage,omitempty"` + // usage of memory + swap + SwapUsage MemoryData `json:"swap_usage,omitempty"` + // usage of kernel memory + KernelUsage MemoryData `json:"kernel_usage,omitempty"` + // usage of kernel TCP memory + KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` + // if true, memory usage is accounted for throughout a hierarchy of cgroups. + UseHierarchy bool `json:"use_hierarchy"` + + Stats map[string]uint64 `json:"stats,omitempty"` +} + +type PidsStats struct { + // number of pids in the cgroup + Current uint64 `json:"current,omitempty"` + // active pids hard limit + Limit uint64 `json:"limit,omitempty"` +} + +type BlkioStatEntry struct { + Major uint64 `json:"major,omitempty"` + Minor uint64 `json:"minor,omitempty"` + Op string `json:"op,omitempty"` + Value uint64 `json:"value,omitempty"` +} + +type BlkioStats struct { + // number of bytes tranferred to and from the block device + IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` + IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` + IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` + IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` + IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` + IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` + IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` + SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` +} + +type HugetlbStats struct { + // current res_counter usage for hugetlb + Usage uint64 `json:"usage,omitempty"` + // maximum usage ever recorded. + MaxUsage uint64 `json:"max_usage,omitempty"` + // number of times hugetlb usage allocation failure. + Failcnt uint64 `json:"failcnt"` +} + +type Stats struct { + CpuStats CpuStats `json:"cpu_stats,omitempty"` + MemoryStats MemoryStats `json:"memory_stats,omitempty"` + PidsStats PidsStats `json:"pids_stats,omitempty"` + BlkioStats BlkioStats `json:"blkio_stats,omitempty"` + // the map is in the format "size of hugepage: stats of the hugepage" + HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` +} + +func NewStats() *Stats { + memoryStats := MemoryStats{Stats: make(map[string]uint64)} + hugetlbStats := make(map[string]HugetlbStats) + return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats} +} diff --git a/libcontainer/cgroups/systemd/apply_nosystemd.go b/libcontainer/cgroups/systemd/apply_nosystemd.go new file mode 100644 index 0000000..ef0db5a --- /dev/null +++ b/libcontainer/cgroups/systemd/apply_nosystemd.go @@ -0,0 +1,67 @@ +// +build !linux + +package systemd + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager struct { + Cgroups *configs.Cgroup + Paths map[string]string +} + +func UseSystemd() bool { + return false +} + +func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Apply(pid int) error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetPids() ([]int, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetAllPids() ([]int, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Destroy() error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetPaths() map[string]string { + return nil +} + +func (m *Manager) GetUnifiedPath() (string, error) { + return "", fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Set(container *configs.Config) error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Freeze(state configs.FreezerState) error { + return fmt.Errorf("Systemd not supported") +} + +func Freeze(c *configs.Cgroup, state configs.FreezerState) error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetCgroups() (*configs.Cgroup, error) { + return nil, fmt.Errorf("Systemd not supported") +} diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go new file mode 100644 index 0000000..c4b19b3 --- /dev/null +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -0,0 +1,534 @@ +// +build linux + +package systemd + +import ( + "errors" + "fmt" + "io/ioutil" + "math" + "os" + "path/filepath" + "strings" + "sync" + "time" + + systemdDbus "github.com/coreos/go-systemd/dbus" + "github.com/godbus/dbus" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/sirupsen/logrus" +) + +type LegacyManager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Set the cgroup represented by cgroup. + Set(path string, cgroup *configs.Cgroup) error +} + +var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") + +type subsystemSet []subsystem + +func (s subsystemSet) Get(name string) (subsystem, error) { + for _, ss := range s { + if ss.Name() == name { + return ss, nil + } + } + return nil, errSubsystemDoesNotExist +} + +var legacySubsystems = subsystemSet{ + &fs.CpusetGroup{}, + &fs.DevicesGroup{}, + &fs.MemoryGroup{}, + &fs.CpuGroup{}, + &fs.CpuacctGroup{}, + &fs.PidsGroup{}, + &fs.BlkioGroup{}, + &fs.HugetlbGroup{}, + &fs.PerfEventGroup{}, + &fs.FreezerGroup{}, + &fs.NetPrioGroup{}, + &fs.NetClsGroup{}, + &fs.NameGroup{GroupName: "name=systemd"}, +} + +const ( + testScopeWait = 4 + testSliceWait = 4 +) + +var ( + connLock sync.Mutex + theConn *systemdDbus.Conn +) + +func newProp(name string, units interface{}) systemdDbus.Property { + return systemdDbus.Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + +// NOTE: This function comes from package github.com/coreos/go-systemd/util +// It was borrowed here to avoid a dependency on cgo. +// +// IsRunningSystemd checks whether the host was booted with systemd as its init +// system. This functions similarly to systemd's `sd_booted(3)`: internally, it +// checks whether /run/systemd/system/ exists and is a directory. +// http://www.freedesktop.org/software/systemd/man/sd_booted.html +func isRunningSystemd() bool { + fi, err := os.Lstat("/run/systemd/system") + if err != nil { + return false + } + return fi.IsDir() +} + +func UseSystemd() bool { + if !isRunningSystemd() { + return false + } + + connLock.Lock() + defer connLock.Unlock() + + if theConn == nil { + var err error + theConn, err = systemdDbus.New() + if err != nil { + return false + } + } + return true +} + +func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) { + if !isRunningSystemd() { + return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager") + } + if cgroups.IsCgroup2UnifiedMode() { + return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &UnifiedManager{ + Cgroups: config, + Paths: paths, + } + }, nil + } + return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &LegacyManager{ + Cgroups: config, + Paths: paths, + } + }, nil +} + +func (m *LegacyManager) Apply(pid int) error { + var ( + c = m.Cgroups + unitName = getUnitName(c) + slice = "system.slice" + properties []systemdDbus.Property + ) + + if c.Paths != nil { + paths := make(map[string]string) + for name, path := range c.Paths { + _, err := getSubsystemPath(m.Cgroups, name) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[name] = path + } + m.Paths = paths + return cgroups.EnterPid(m.Paths, pid) + } + + if c.Parent != "" { + slice = c.Parent + } + + properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name)) + + // if we create a slice, the parent is defined via a Wants= + if strings.HasSuffix(unitName, ".slice") { + properties = append(properties, systemdDbus.PropWants(slice)) + } else { + // otherwise, we use Slice= + properties = append(properties, systemdDbus.PropSlice(slice)) + } + + // only add pid if its valid, -1 is used w/ general slice creation. + if pid != -1 { + properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) + } + + // Check if we can delegate. This is only supported on systemd versions 218 and above. + if !strings.HasSuffix(unitName, ".slice") { + // Assume scopes always support delegation. + properties = append(properties, newProp("Delegate", true)) + } + + // Always enable accounting, this gets us the same behaviour as the fs implementation, + // plus the kernel has some problems with joining the memory cgroup at a later time. + properties = append(properties, + newProp("MemoryAccounting", true), + newProp("CPUAccounting", true), + newProp("BlockIOAccounting", true)) + + // Assume DefaultDependencies= will always work (the check for it was previously broken.) + properties = append(properties, + newProp("DefaultDependencies", false)) + + if c.Resources.Memory != 0 { + properties = append(properties, + newProp("MemoryLimit", uint64(c.Resources.Memory))) + } + + if c.Resources.CpuShares != 0 { + properties = append(properties, + newProp("CPUShares", c.Resources.CpuShares)) + } + + // cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd. + if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 { + // corresponds to USEC_INFINITY in systemd + // if USEC_INFINITY is provided, CPUQuota is left unbound by systemd + // always setting a property value ensures we can apply a quota and remove it later + cpuQuotaPerSecUSec := uint64(math.MaxUint64) + if c.Resources.CpuQuota > 0 { + // systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota + // (integer percentage of CPU) internally. This means that if a fractional percent of + // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest + // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect. + cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod + if cpuQuotaPerSecUSec%10000 != 0 { + cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000 + } + } + properties = append(properties, + newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) + } + + if c.Resources.BlkioWeight != 0 { + properties = append(properties, + newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight))) + } + + if c.Resources.PidsLimit > 0 { + properties = append(properties, + newProp("TasksAccounting", true), + newProp("TasksMax", uint64(c.Resources.PidsLimit))) + } + + // We have to set kernel memory here, as we can't change it once + // processes have been attached to the cgroup. + if c.Resources.KernelMemory != 0 { + if err := setKernelMemory(c); err != nil { + return err + } + } + + statusChan := make(chan string, 1) + if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil { + select { + case <-statusChan: + case <-time.After(time.Second): + logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName) + } + } else if !isUnitExists(err) { + return err + } + + if err := joinCgroups(c, pid); err != nil { + return err + } + + paths := make(map[string]string) + for _, s := range legacySubsystems { + subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name()) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[s.Name()] = subsystemPath + } + m.Paths = paths + return nil +} + +func (m *LegacyManager) Destroy() error { + if m.Cgroups.Paths != nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil) + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil +} + +func (m *LegacyManager) GetPaths() map[string]string { + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths +} + +func (m *LegacyManager) GetUnifiedPath() (string, error) { + return "", errors.New("unified path is only supported when running in unified mode") +} + +func join(c *configs.Cgroup, subsystem string, pid int) (string, error) { + path, err := getSubsystemPath(c, subsystem) + if err != nil { + return "", err + } + + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + if err := cgroups.WriteCgroupProc(path, pid); err != nil { + return "", err + } + return path, nil +} + +func joinCgroups(c *configs.Cgroup, pid int) error { + for _, sys := range legacySubsystems { + name := sys.Name() + switch name { + case "name=systemd": + // let systemd handle this + case "cpuset": + path, err := getSubsystemPath(c, name) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + s := &fs.CpusetGroup{} + if err := s.ApplyDir(path, c, pid); err != nil { + return err + } + default: + _, err := join(c, name, pid) + if err != nil { + // Even if it's `not found` error, we'll return err + // because devices cgroup is hard requirement for + // container security. + if name == "devices" { + return err + } + // For other subsystems, omit the `not found` error + // because they are optional. + if !cgroups.IsNotFound(err) { + return err + } + } + } + } + + return nil +} + +// systemd represents slice hierarchy using `-`, so we need to follow suit when +// generating the path of slice. Essentially, test-a-b.slice becomes +// /test.slice/test-a.slice/test-a-b.slice. +func ExpandSlice(slice string) (string, error) { + suffix := ".slice" + // Name has to end with ".slice", but can't be just ".slice". + if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + // Path-separators are not allowed. + if strings.Contains(slice, "/") { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + var path, prefix string + sliceName := strings.TrimSuffix(slice, suffix) + // if input was -.slice, we should just return root now + if sliceName == "-" { + return "/", nil + } + for _, component := range strings.Split(sliceName, "-") { + // test--a.slice isn't permitted, nor is -test.slice. + if component == "" { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + // Append the component to the path and to the prefix. + path += "/" + prefix + component + suffix + prefix += component + "-" + } + return path, nil +} + +func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { + mountpoint, err := cgroups.FindCgroupMountpoint(c.Path, subsystem) + if err != nil { + return "", err + } + + initPath, err := cgroups.GetInitCgroup(subsystem) + if err != nil { + return "", err + } + // if pid 1 is systemd 226 or later, it will be in init.scope, not the root + initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope") + + slice := "system.slice" + if c.Parent != "" { + slice = c.Parent + } + + slice, err = ExpandSlice(slice) + if err != nil { + return "", err + } + + return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil +} + +func (m *LegacyManager) Freeze(state configs.FreezerState) error { + path, err := getSubsystemPath(m.Cgroups, "freezer") + if err != nil { + return err + } + prevState := m.Cgroups.Resources.Freezer + m.Cgroups.Resources.Freezer = state + freezer, err := legacySubsystems.Get("freezer") + if err != nil { + return err + } + err = freezer.Set(path, m.Cgroups) + if err != nil { + m.Cgroups.Resources.Freezer = prevState + return err + } + return nil +} + +func (m *LegacyManager) GetPids() ([]int, error) { + path, err := getSubsystemPath(m.Cgroups, "devices") + if err != nil { + return nil, err + } + return cgroups.GetPids(path) +} + +func (m *LegacyManager) GetAllPids() ([]int, error) { + path, err := getSubsystemPath(m.Cgroups, "devices") + if err != nil { + return nil, err + } + return cgroups.GetAllPids(path) +} + +func (m *LegacyManager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for name, path := range m.Paths { + sys, err := legacySubsystems.Get(name) + if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + + return stats, nil +} + +func (m *LegacyManager) Set(container *configs.Config) error { + // If Paths are set, then we are just joining cgroups paths + // and there is no need to set any values. + if m.Cgroups.Paths != nil { + return nil + } + for _, sys := range legacySubsystems { + // Get the subsystem path, but don't error out for not found cgroups. + path, err := getSubsystemPath(container.Cgroups, sys.Name()) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := sys.Set(path, container.Cgroups); err != nil { + return err + } + } + + if m.Paths["cpu"] != "" { + if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil { + return err + } + } + return nil +} + +func getUnitName(c *configs.Cgroup) string { + // by default, we create a scope unless the user explicitly asks for a slice. + if !strings.HasSuffix(c.Name, ".slice") { + return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name) + } + return c.Name +} + +func setKernelMemory(c *configs.Cgroup) error { + path, err := getSubsystemPath(c, "memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + // do not try to enable the kernel memory if we already have + // tasks in the cgroup. + content, err := ioutil.ReadFile(filepath.Join(path, "tasks")) + if err != nil { + return err + } + if len(content) > 0 { + return nil + } + return fs.EnableKernelMemoryAccounting(path) +} + +// isUnitExists returns true if the error is that a systemd unit already exists. +func isUnitExists(err error) bool { + if err != nil { + if dbusError, ok := err.(dbus.Error); ok { + return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists") + } + } + return false +} + +func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) { + return m.Cgroups, nil +} diff --git a/libcontainer/cgroups/systemd/unified_hierarchy.go b/libcontainer/cgroups/systemd/unified_hierarchy.go new file mode 100644 index 0000000..6605099 --- /dev/null +++ b/libcontainer/cgroups/systemd/unified_hierarchy.go @@ -0,0 +1,312 @@ +// +build linux + +package systemd + +import ( + "fmt" + "io/ioutil" + "math" + "os" + "path/filepath" + "strings" + "sync" + "time" + + systemdDbus "github.com/coreos/go-systemd/dbus" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs2" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +type UnifiedManager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string +} + +func (m *UnifiedManager) Apply(pid int) error { + var ( + c = m.Cgroups + unitName = getUnitName(c) + slice = "system.slice" + properties []systemdDbus.Property + ) + + if c.Paths != nil { + paths := make(map[string]string) + for name, path := range c.Paths { + _, err := getSubsystemPath(m.Cgroups, name) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[name] = path + } + m.Paths = paths + return cgroups.EnterPid(m.Paths, pid) + } + + if c.Parent != "" { + slice = c.Parent + } + + properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name)) + + // if we create a slice, the parent is defined via a Wants= + if strings.HasSuffix(unitName, ".slice") { + properties = append(properties, systemdDbus.PropWants(slice)) + } else { + // otherwise, we use Slice= + properties = append(properties, systemdDbus.PropSlice(slice)) + } + + // only add pid if its valid, -1 is used w/ general slice creation. + if pid != -1 { + properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) + } + + // Check if we can delegate. This is only supported on systemd versions 218 and above. + if !strings.HasSuffix(unitName, ".slice") { + // Assume scopes always support delegation. + properties = append(properties, newProp("Delegate", true)) + } + + // Always enable accounting, this gets us the same behaviour as the fs implementation, + // plus the kernel has some problems with joining the memory cgroup at a later time. + properties = append(properties, + newProp("MemoryAccounting", true), + newProp("CPUAccounting", true), + newProp("BlockIOAccounting", true)) + + // Assume DefaultDependencies= will always work (the check for it was previously broken.) + properties = append(properties, + newProp("DefaultDependencies", false)) + + if c.Resources.Memory != 0 { + properties = append(properties, + newProp("MemoryLimit", uint64(c.Resources.Memory))) + } + + if c.Resources.CpuShares != 0 { + properties = append(properties, + newProp("CPUShares", c.Resources.CpuShares)) + } + + // cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd. + if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 { + // corresponds to USEC_INFINITY in systemd + // if USEC_INFINITY is provided, CPUQuota is left unbound by systemd + // always setting a property value ensures we can apply a quota and remove it later + cpuQuotaPerSecUSec := uint64(math.MaxUint64) + if c.Resources.CpuQuota > 0 { + // systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota + // (integer percentage of CPU) internally. This means that if a fractional percent of + // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest + // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect. + cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod + if cpuQuotaPerSecUSec%10000 != 0 { + cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000 + } + } + properties = append(properties, + newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) + } + + if c.Resources.BlkioWeight != 0 { + properties = append(properties, + newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight))) + } + + if c.Resources.PidsLimit > 0 { + properties = append(properties, + newProp("TasksAccounting", true), + newProp("TasksMax", uint64(c.Resources.PidsLimit))) + } + + // We have to set kernel memory here, as we can't change it once + // processes have been attached to the cgroup. + if c.Resources.KernelMemory != 0 { + if err := setKernelMemory(c); err != nil { + return err + } + } + + statusChan := make(chan string, 1) + if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil { + select { + case <-statusChan: + case <-time.After(time.Second): + logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName) + } + } else if !isUnitExists(err) { + return err + } + + if err := joinCgroupsV2(c, pid); err != nil { + return err + } + + path, err := getSubsystemPath(m.Cgroups, "") + if err != nil { + return err + } + m.Paths = map[string]string{ + "pids": path, + "memory": path, + "io": path, + "cpu": path, + "devices": path, + "cpuset": path, + "freezer": path, + } + return nil +} + +func (m *UnifiedManager) Destroy() error { + if m.Cgroups.Paths != nil { + return nil + } + m.mu.Lock() + defer m.mu.Unlock() + theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil) + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil +} + +func (m *UnifiedManager) GetPaths() map[string]string { + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths +} +func (m *UnifiedManager) GetUnifiedPath() (string, error) { + unifiedPath := "" + m.mu.Lock() + defer m.mu.Unlock() + for k, v := range m.Paths { + if unifiedPath == "" { + unifiedPath = v + } else if v != unifiedPath { + return unifiedPath, + errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v) + } + } + if unifiedPath == "" { + // FIXME: unified path could be detected even when no controller is available + return unifiedPath, errors.New("cannot detect unified path") + } + return unifiedPath, nil +} +func createCgroupsv2Path(path string) (Err error) { + content, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers") + if err != nil { + return err + } + if !filepath.HasPrefix(path, "/sys/fs/cgroup") { + return fmt.Errorf("invalid cgroup path %s", path) + } + + res := "" + for i, c := range strings.Split(strings.TrimSpace(string(content)), " ") { + if i == 0 { + res = fmt.Sprintf("+%s", c) + } else { + res = res + fmt.Sprintf(" +%s", c) + } + } + resByte := []byte(res) + + current := "/sys/fs" + elements := strings.Split(path, "/") + for i, e := range elements[3:] { + current = filepath.Join(current, e) + if i > 0 { + if err := os.Mkdir(current, 0755); err != nil { + if !os.IsExist(err) { + return err + } + } else { + // If the directory was created, be sure it is not left around on errors. + defer func() { + if Err != nil { + os.Remove(current) + } + }() + } + } + if i < len(elements[3:])-1 { + if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), resByte, 0755); err != nil { + return err + } + } + } + return nil +} + +func joinCgroupsV2(c *configs.Cgroup, pid int) error { + path, err := getSubsystemPath(c, "memory") + if err != nil { + return err + } + return createCgroupsv2Path(path) +} + +func (m *UnifiedManager) fsManager() (cgroups.Manager, error) { + path, err := m.GetUnifiedPath() + if err != nil { + return nil, err + } + return fs2.NewManager(m.Cgroups, path, false) +} + +func (m *UnifiedManager) Freeze(state configs.FreezerState) error { + fsMgr, err := m.fsManager() + if err != nil { + return err + } + return fsMgr.Freeze(state) +} + +func (m *UnifiedManager) GetPids() ([]int, error) { + path, err := m.GetUnifiedPath() + if err != nil { + return nil, err + } + return cgroups.GetPids(path) +} + +func (m *UnifiedManager) GetAllPids() ([]int, error) { + path, err := m.GetUnifiedPath() + if err != nil { + return nil, err + } + return cgroups.GetAllPids(path) +} + +func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) { + fsMgr, err := m.fsManager() + if err != nil { + return nil, err + } + return fsMgr.GetStats() +} + +func (m *UnifiedManager) Set(container *configs.Config) error { + fsMgr, err := m.fsManager() + if err != nil { + return err + } + return fsMgr.Set(container) +} + +func (m *UnifiedManager) GetCgroups() (*configs.Cgroup, error) { + return m.Cgroups, nil +} diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go new file mode 100644 index 0000000..dbcc58f --- /dev/null +++ b/libcontainer/cgroups/utils.go @@ -0,0 +1,588 @@ +// +build linux + +package cgroups + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + "syscall" + "time" + + units "github.com/docker/go-units" + "golang.org/x/sys/unix" +) + +const ( + CgroupNamePrefix = "name=" + CgroupProcesses = "cgroup.procs" + unifiedMountpoint = "/sys/fs/cgroup" +) + +var ( + isUnifiedOnce sync.Once + isUnified bool +) + +// HugePageSizeUnitList is a list of the units used by the linux kernel when +// naming the HugePage control files. +// https://www.kernel.org/doc/Documentation/cgroup-v1/hugetlb.txt +// TODO Since the kernel only use KB, MB and GB; TB and PB should be removed, +// depends on https://github.com/docker/go-units/commit/a09cd47f892041a4fac473133d181f5aea6fa393 +var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"} + +// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode. +func IsCgroup2UnifiedMode() bool { + isUnifiedOnce.Do(func() { + var st syscall.Statfs_t + if err := syscall.Statfs(unifiedMountpoint, &st); err != nil { + panic("cannot statfs cgroup root") + } + isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC + }) + return isUnified +} + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt +func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { + if IsCgroup2UnifiedMode() { + return unifiedMountpoint, nil + } + mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) + return mnt, err +} + +func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) { + // We are not using mount.GetMounts() because it's super-inefficient, + // parsing it directly sped up x10 times because of not using Sscanf. + // It was one of two major performance drawbacks in container start. + if !isSubsystemAvailable(subsystem) { + return "", "", NewNotFoundError(subsystem) + } + + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", "", err + } + defer f.Close() + + if IsCgroup2UnifiedMode() { + subsystem = "" + } + + return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem) +} + +func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) { + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + txt := scanner.Text() + fields := strings.Fields(txt) + if len(fields) < 9 { + continue + } + if strings.HasPrefix(fields[4], cgroupPath) { + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if (subsystem == "" && fields[9] == "cgroup2") || opt == subsystem { + return fields[4], fields[3], nil + } + } + } + } + if err := scanner.Err(); err != nil { + return "", "", err + } + + return "", "", NewNotFoundError(subsystem) +} + +func isSubsystemAvailable(subsystem string) bool { + if IsCgroup2UnifiedMode() { + controllers, err := GetAllSubsystems() + if err != nil { + return false + } + for _, c := range controllers { + if c == subsystem { + return true + } + } + return false + } + + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return false + } + _, avail := cgroups[subsystem] + return avail +} + +func GetClosestMountpointAncestor(dir, mountinfo string) string { + deepestMountPoint := "" + for _, mountInfoEntry := range strings.Split(mountinfo, "\n") { + mountInfoParts := strings.Fields(mountInfoEntry) + if len(mountInfoParts) < 5 { + continue + } + mountPoint := mountInfoParts[4] + if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) { + deepestMountPoint = mountPoint + } + } + return deepestMountPoint +} + +func FindCgroupMountpointDir() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + text := scanner.Text() + fields := strings.Split(text, " ") + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + numPostFields := len(postSeparatorFields) + + // This is an error as we can't detect if the mount is for "cgroup" + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + + if postSeparatorFields[0] == "cgroup" || postSeparatorFields[0] == "cgroup2" { + // Check that the mount is properly formatted. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + return filepath.Dir(fields[4]), nil + } + } + if err := scanner.Err(); err != nil { + return "", err + } + + return "", NewNotFoundError("cgroup") +} + +type Mount struct { + Mountpoint string + Root string + Subsystems []string +} + +func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { + if len(m.Subsystems) == 0 { + return "", fmt.Errorf("no subsystem for mount") + } + + return getControllerPath(m.Subsystems[0], cgroups) +} + +func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) { + res := make([]Mount, 0, len(ss)) + scanner := bufio.NewScanner(mi) + numFound := 0 + for scanner.Scan() && numFound < len(ss) { + txt := scanner.Text() + sepIdx := strings.Index(txt, " - ") + if sepIdx == -1 { + return nil, fmt.Errorf("invalid mountinfo format") + } + if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" { + continue + } + fields := strings.Split(txt, " ") + m := Mount{ + Mountpoint: fields[4], + Root: fields[3], + } + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + seen, known := ss[opt] + if !known || (!all && seen) { + continue + } + ss[opt] = true + if strings.HasPrefix(opt, CgroupNamePrefix) { + opt = opt[len(CgroupNamePrefix):] + } + m.Subsystems = append(m.Subsystems, opt) + numFound++ + } + if len(m.Subsystems) > 0 || all { + res = append(res, m) + } + } + if err := scanner.Err(); err != nil { + return nil, err + } + return res, nil +} + +// GetCgroupMounts returns the mounts for the cgroup subsystems. +// all indicates whether to return just the first instance or all the mounts. +func GetCgroupMounts(all bool) ([]Mount, error) { + if IsCgroup2UnifiedMode() { + availableControllers, err := GetAllSubsystems() + if err != nil { + return nil, err + } + m := Mount{ + Mountpoint: unifiedMountpoint, + Root: unifiedMountpoint, + Subsystems: availableControllers, + } + return []Mount{m}, nil + } + + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return nil, err + } + defer f.Close() + + allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return nil, err + } + + allMap := make(map[string]bool) + for s := range allSubsystems { + allMap[s] = false + } + return getCgroupMountsHelper(allMap, f, all) +} + +// GetAllSubsystems returns all the cgroup subsystems supported by the kernel +func GetAllSubsystems() ([]string, error) { + // /proc/cgroups is meaningless for v2 + // https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features + if IsCgroup2UnifiedMode() { + // "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers. + // - devices: implemented in kernel 4.15 + // - freezer: implemented in kernel 5.2 + // We assume these are always available, as it is hard to detect availability. + pseudo := []string{"devices", "freezer"} + data, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers") + if err != nil { + return nil, err + } + subsystems := append(pseudo, strings.Fields(string(data))...) + return subsystems, nil + } + f, err := os.Open("/proc/cgroups") + if err != nil { + return nil, err + } + defer f.Close() + + subsystems := []string{} + + s := bufio.NewScanner(f) + for s.Scan() { + text := s.Text() + if text[0] != '#' { + parts := strings.Fields(text) + if len(parts) >= 4 && parts[3] != "0" { + subsystems = append(subsystems, parts[0]) + } + } + } + if err := s.Err(); err != nil { + return nil, err + } + return subsystems, nil +} + +// GetOwnCgroup returns the relative path to the cgroup docker is running in. +func GetOwnCgroup(subsystem string) (string, error) { + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return "", err + } + + return getControllerPath(subsystem, cgroups) +} + +func GetOwnCgroupPath(subsystem string) (string, error) { + cgroup, err := GetOwnCgroup(subsystem) + if err != nil { + return "", err + } + + return getCgroupPathHelper(subsystem, cgroup) +} + +func GetInitCgroup(subsystem string) (string, error) { + cgroups, err := ParseCgroupFile("/proc/1/cgroup") + if err != nil { + return "", err + } + + return getControllerPath(subsystem, cgroups) +} + +func GetInitCgroupPath(subsystem string) (string, error) { + cgroup, err := GetInitCgroup(subsystem) + if err != nil { + return "", err + } + + return getCgroupPathHelper(subsystem, cgroup) +} + +func getCgroupPathHelper(subsystem, cgroup string) (string, error) { + mnt, root, err := FindCgroupMountpointAndRoot("", subsystem) + if err != nil { + return "", err + } + + // This is needed for nested containers, because in /proc/self/cgroup we + // see paths from host, which don't exist in container. + relCgroup, err := filepath.Rel(root, cgroup) + if err != nil { + return "", err + } + + return filepath.Join(mnt, relCgroup), nil +} + +func readProcsFile(dir string) ([]int, error) { + f, err := os.Open(filepath.Join(dir, CgroupProcesses)) + if err != nil { + return nil, err + } + defer f.Close() + + var ( + s = bufio.NewScanner(f) + out = []int{} + ) + + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, pid) + } + } + return out, nil +} + +// ParseCgroupFile parses the given cgroup file, typically from +// /proc//cgroup, into a map of subgroups to cgroup names. +func ParseCgroupFile(path string) (map[string]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + return parseCgroupFromReader(f) +} + +// helper function for ParseCgroupFile to make testing easier +func parseCgroupFromReader(r io.Reader) (map[string]string, error) { + s := bufio.NewScanner(r) + cgroups := make(map[string]string) + + for s.Scan() { + text := s.Text() + // from cgroups(7): + // /proc/[pid]/cgroup + // ... + // For each cgroup hierarchy ... there is one entry + // containing three colon-separated fields of the form: + // hierarchy-ID:subsystem-list:cgroup-path + parts := strings.SplitN(text, ":", 3) + if len(parts) < 3 { + return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text) + } + + for _, subs := range strings.Split(parts[1], ",") { + cgroups[subs] = parts[2] + } + } + if err := s.Err(); err != nil { + return nil, err + } + + return cgroups, nil +} + +func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { + if IsCgroup2UnifiedMode() { + return "/", nil + } + + if p, ok := cgroups[subsystem]; ok { + return p, nil + } + + if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok { + return p, nil + } + + return "", NewNotFoundError(subsystem) +} + +func PathExists(path string) bool { + if _, err := os.Stat(path); err != nil { + return false + } + return true +} + +func EnterPid(cgroupPaths map[string]string, pid int) error { + for _, path := range cgroupPaths { + if PathExists(path) { + if err := WriteCgroupProc(path, pid); err != nil { + return err + } + } + } + return nil +} + +// RemovePaths iterates over the provided paths removing them. +// We trying to remove all paths five times with increasing delay between tries. +// If after all there are not removed cgroups - appropriate error will be +// returned. +func RemovePaths(paths map[string]string) (err error) { + delay := 10 * time.Millisecond + for i := 0; i < 5; i++ { + if i != 0 { + time.Sleep(delay) + delay *= 2 + } + for s, p := range paths { + os.RemoveAll(p) + // TODO: here probably should be logging + _, err := os.Stat(p) + // We need this strange way of checking cgroups existence because + // RemoveAll almost always returns error, even on already removed + // cgroups + if os.IsNotExist(err) { + delete(paths, s) + } + } + if len(paths) == 0 { + return nil + } + } + return fmt.Errorf("Failed to remove paths: %v", paths) +} + +func GetHugePageSize() ([]string, error) { + files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages") + if err != nil { + return []string{}, err + } + var fileNames []string + for _, st := range files { + fileNames = append(fileNames, st.Name()) + } + return getHugePageSizeFromFilenames(fileNames) +} + +func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) { + var pageSizes []string + for _, fileName := range fileNames { + nameArray := strings.Split(fileName, "-") + pageSize, err := units.RAMInBytes(nameArray[1]) + if err != nil { + return []string{}, err + } + sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, HugePageSizeUnitList) + pageSizes = append(pageSizes, sizeString) + } + + return pageSizes, nil +} + +// GetPids returns all pids, that were added to cgroup at path. +func GetPids(path string) ([]int, error) { + return readProcsFile(path) +} + +// GetAllPids returns all pids, that were added to cgroup at path and to all its +// subcgroups. +func GetAllPids(path string) ([]int, error) { + var pids []int + // collect pids from all sub-cgroups + err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error { + dir, file := filepath.Split(p) + if file != CgroupProcesses { + return nil + } + if iErr != nil { + return iErr + } + cPids, err := readProcsFile(dir) + if err != nil { + return err + } + pids = append(pids, cPids...) + return nil + }) + return pids, err +} + +// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file +func WriteCgroupProc(dir string, pid int) error { + // Normally dir should not be empty, one case is that cgroup subsystem + // is not mounted, we will get empty dir, and we want it fail here. + if dir == "" { + return fmt.Errorf("no such directory for %s", CgroupProcesses) + } + + // Dont attach any pid to the cgroup if -1 is specified as a pid + if pid == -1 { + return nil + } + + cgroupProcessesFile, err := os.OpenFile(filepath.Join(dir, CgroupProcesses), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0700) + if err != nil { + return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) + } + defer cgroupProcessesFile.Close() + + for i := 0; i < 5; i++ { + _, err = cgroupProcessesFile.WriteString(strconv.Itoa(pid)) + if err == nil { + return nil + } + + // EINVAL might mean that the task being added to cgroup.procs is in state + // TASK_NEW. We should attempt to do so again. + if isEINVAL(err) { + time.Sleep(30 * time.Millisecond) + continue + } + + return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) + } + return err +} + +func isEINVAL(err error) bool { + switch err := err.(type) { + case *os.PathError: + return err.Err == unix.EINVAL + default: + return false + } +} diff --git a/libcontainer/cgroups/utils_test.go b/libcontainer/cgroups/utils_test.go new file mode 100644 index 0000000..3214b9d --- /dev/null +++ b/libcontainer/cgroups/utils_test.go @@ -0,0 +1,459 @@ +// +build linux + +package cgroups + +import ( + "bytes" + "errors" + "fmt" + "reflect" + "strings" + "testing" +) + +const fedoraMountinfo = `15 35 0:3 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw +16 35 0:14 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel +17 35 0:5 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=8056484k,nr_inodes=2014121,mode=755 +18 16 0:15 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw +19 16 0:13 / /sys/fs/selinux rw,relatime shared:8 - selinuxfs selinuxfs rw +20 17 0:16 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel +21 17 0:10 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000 +22 35 0:17 / /run rw,nosuid,nodev shared:21 - tmpfs tmpfs rw,seclabel,mode=755 +23 16 0:18 / /sys/fs/cgroup rw,nosuid,nodev,noexec shared:9 - tmpfs tmpfs rw,seclabel,mode=755 +24 23 0:19 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd +25 16 0:20 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw +26 23 0:21 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,cpuset,clone_children +27 23 0:22 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,cpuacct,cpu,clone_children +28 23 0:23 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,memory,clone_children +29 23 0:24 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,devices,clone_children +30 23 0:25 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,freezer,clone_children +31 23 0:26 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,net_cls,clone_children +32 23 0:27 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,blkio,clone_children +33 23 0:28 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,perf_event,clone_children +34 23 0:29 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,hugetlb,clone_children +35 1 253:2 / / rw,relatime shared:1 - ext4 /dev/mapper/ssd-root--f20 rw,seclabel,data=ordered +36 15 0:30 / /proc/sys/fs/binfmt_misc rw,relatime shared:22 - autofs systemd-1 rw,fd=38,pgrp=1,timeout=300,minproto=5,maxproto=5,direct +37 17 0:12 / /dev/mqueue rw,relatime shared:23 - mqueue mqueue rw,seclabel +38 35 0:31 / /tmp rw shared:24 - tmpfs tmpfs rw,seclabel +39 17 0:32 / /dev/hugepages rw,relatime shared:25 - hugetlbfs hugetlbfs rw,seclabel +40 16 0:7 / /sys/kernel/debug rw,relatime shared:26 - debugfs debugfs rw +41 16 0:33 / /sys/kernel/config rw,relatime shared:27 - configfs configfs rw +42 35 0:34 / /var/lib/nfs/rpc_pipefs rw,relatime shared:28 - rpc_pipefs sunrpc rw +43 15 0:35 / /proc/fs/nfsd rw,relatime shared:29 - nfsd sunrpc rw +45 35 8:17 / /boot rw,relatime shared:30 - ext4 /dev/sdb1 rw,seclabel,data=ordered +46 35 253:4 / /home rw,relatime shared:31 - ext4 /dev/mapper/ssd-home rw,seclabel,data=ordered +47 35 253:5 / /var/lib/libvirt/images rw,noatime,nodiratime shared:32 - ext4 /dev/mapper/ssd-virt rw,seclabel,discard,data=ordered +48 35 253:12 / /mnt/old rw,relatime shared:33 - ext4 /dev/mapper/HelpDeskRHEL6-FedoraRoot rw,seclabel,data=ordered +121 22 0:36 / /run/user/1000/gvfs rw,nosuid,nodev,relatime shared:104 - fuse.gvfsd-fuse gvfsd-fuse rw,user_id=1000,group_id=1000 +124 16 0:37 / /sys/fs/fuse/connections rw,relatime shared:107 - fusectl fusectl rw +165 38 253:3 / /tmp/mnt rw,relatime shared:147 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered +167 35 253:15 / /var/lib/docker/devicemapper/mnt/aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,relatime shared:149 - ext4 /dev/mapper/docker-253:2-425882-aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,seclabel,discard,stripe=16,data=ordered +171 35 253:16 / /var/lib/docker/devicemapper/mnt/c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,relatime shared:153 - ext4 /dev/mapper/docker-253:2-425882-c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,seclabel,discard,stripe=16,data=ordered +175 35 253:17 / /var/lib/docker/devicemapper/mnt/1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,relatime shared:157 - ext4 /dev/mapper/docker-253:2-425882-1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,seclabel,discard,stripe=16,data=ordered +179 35 253:18 / /var/lib/docker/devicemapper/mnt/d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,relatime shared:161 - ext4 /dev/mapper/docker-253:2-425882-d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,seclabel,discard,stripe=16,data=ordered +183 35 253:19 / /var/lib/docker/devicemapper/mnt/6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,relatime shared:165 - ext4 /dev/mapper/docker-253:2-425882-6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,seclabel,discard,stripe=16,data=ordered +187 35 253:20 / /var/lib/docker/devicemapper/mnt/8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,relatime shared:169 - ext4 /dev/mapper/docker-253:2-425882-8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,seclabel,discard,stripe=16,data=ordered +191 35 253:21 / /var/lib/docker/devicemapper/mnt/c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,relatime shared:173 - ext4 /dev/mapper/docker-253:2-425882-c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,seclabel,discard,stripe=16,data=ordered +195 35 253:22 / /var/lib/docker/devicemapper/mnt/2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,relatime shared:177 - ext4 /dev/mapper/docker-253:2-425882-2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,seclabel,discard,stripe=16,data=ordered +199 35 253:23 / /var/lib/docker/devicemapper/mnt/37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,relatime shared:181 - ext4 /dev/mapper/docker-253:2-425882-37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,seclabel,discard,stripe=16,data=ordered +203 35 253:24 / /var/lib/docker/devicemapper/mnt/aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,relatime shared:185 - ext4 /dev/mapper/docker-253:2-425882-aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,seclabel,discard,stripe=16,data=ordered +207 35 253:25 / /var/lib/docker/devicemapper/mnt/928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,relatime shared:189 - ext4 /dev/mapper/docker-253:2-425882-928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,seclabel,discard,stripe=16,data=ordered +211 35 253:26 / /var/lib/docker/devicemapper/mnt/0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,relatime shared:193 - ext4 /dev/mapper/docker-253:2-425882-0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,seclabel,discard,stripe=16,data=ordered +215 35 253:27 / /var/lib/docker/devicemapper/mnt/d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,relatime shared:197 - ext4 /dev/mapper/docker-253:2-425882-d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,seclabel,discard,stripe=16,data=ordered +219 35 253:28 / /var/lib/docker/devicemapper/mnt/bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,relatime shared:201 - ext4 /dev/mapper/docker-253:2-425882-bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,seclabel,discard,stripe=16,data=ordered +223 35 253:29 / /var/lib/docker/devicemapper/mnt/7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,relatime shared:205 - ext4 /dev/mapper/docker-253:2-425882-7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,seclabel,discard,stripe=16,data=ordered +227 35 253:30 / /var/lib/docker/devicemapper/mnt/c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,relatime shared:209 - ext4 /dev/mapper/docker-253:2-425882-c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,seclabel,discard,stripe=16,data=ordered +231 35 253:31 / /var/lib/docker/devicemapper/mnt/8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,relatime shared:213 - ext4 /dev/mapper/docker-253:2-425882-8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,seclabel,discard,stripe=16,data=ordered +235 35 253:32 / /var/lib/docker/devicemapper/mnt/1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,relatime shared:217 - ext4 /dev/mapper/docker-253:2-425882-1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,seclabel,discard,stripe=16,data=ordered +239 35 253:33 / /var/lib/docker/devicemapper/mnt/e9aa60c60128cad1 rw,relatime shared:221 - ext4 /dev/mapper/docker-253:2-425882-e9aa60c60128cad1 rw,seclabel,discard,stripe=16,data=ordered +243 35 253:34 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,relatime shared:225 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,seclabel,discard,stripe=16,data=ordered +247 35 253:35 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,relatime shared:229 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,seclabel,discard,stripe=16,data=ordered +31 21 0:23 / /DATA/foo_bla_bla rw,relatime - cifs //foo/BLA\040BLA\040BLA/ rw,sec=ntlm,cache=loose,unc=\\foo\BLA BLA BLA,username=my_login,domain=mydomain.com,uid=12345678,forceuid,gid=12345678,forcegid,addr=10.1.30.10,file_mode=0755,dir_mode=0755,nounix,rsize=61440,wsize=65536,actimeo=1` + +const systemdMountinfo = `115 83 0:32 / / rw,relatime - aufs none rw,si=c0bd3d3,dio,dirperm1 +116 115 0:35 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw +117 115 0:36 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755 +118 117 0:37 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666 +119 115 0:38 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw +120 119 0:39 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755 +121 120 0:19 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd +122 120 0:20 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices +123 120 0:21 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer +124 120 0:22 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory +125 120 0:23 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls,net_prio +126 120 0:24 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio +127 120 0:25 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset,clone_children +128 120 0:26 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu,cpuacct +129 120 0:27 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event,release_agent=/run/cgmanager/agents/cgm-release-agent.perf_event +130 115 43:0 /var/lib/docker/volumes/a44a712176377f57c094397330ee04387284c478364eb25f4c3d25f775f25c26/_data /var/lib/docker rw,relatime - ext4 /dev/nbd0 rw,data=ordered +131 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/resolv.conf /etc/resolv.conf rw,relatime - ext4 /dev/nbd0 rw,data=ordered +132 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/hostname /etc/hostname rw,relatime - ext4 /dev/nbd0 rw,data=ordered +133 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/hosts /etc/hosts rw,relatime - ext4 /dev/nbd0 rw,data=ordered +134 117 0:33 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k +135 117 0:13 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw +136 117 0:12 /1 /dev/console rw,nosuid,noexec,relatime - devpts none rw,gid=5,mode=620,ptmxmode=000 +84 115 0:40 / /tmp rw,relatime - tmpfs none rw` + +const bedrockMountinfo = `120 17 0:28 / /sys/fs/cgroup ro,nosuid,nodev,noexec shared:16 - tmpfs tmpfs ro,mode=755 +124 28 0:28 / /bedrock/strata/arch/sys/fs/cgroup rw,nosuid,nodev,noexec shared:16 - tmpfs tmpfs ro,mode=755 +123 53 0:28 / /bedrock/strata/fallback/sys/fs/cgroup rw,nosuid,nodev,noexec shared:16 - tmpfs tmpfs ro,mode=755 +122 71 0:28 / /bedrock/strata/gentoo/sys/fs/cgroup rw,nosuid,nodev,noexec shared:16 - tmpfs tmpfs ro,mode=755 +121 89 0:28 / /bedrock/strata/kde/sys/fs/cgroup rw,nosuid,nodev,noexec shared:16 - tmpfs tmpfs ro,mode=755 +125 120 0:29 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd +129 124 0:29 / /bedrock/strata/arch/sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd +128 123 0:29 / /bedrock/strata/fallback/sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd +127 122 0:29 / /bedrock/strata/gentoo/sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd +126 121 0:29 / /bedrock/strata/kde/sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd +140 120 0:32 / /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:48 - cgroup cgroup rw,net_cls,net_prio +144 124 0:32 / /bedrock/strata/arch/sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:48 - cgroup cgroup rw,net_cls,net_prio +143 123 0:32 / /bedrock/strata/fallback/sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:48 - cgroup cgroup rw,net_cls,net_prio +142 122 0:32 / /bedrock/strata/gentoo/sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:48 - cgroup cgroup rw,net_cls,net_prio +141 121 0:32 / /bedrock/strata/kde/sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:48 - cgroup cgroup rw,net_cls,net_prio +145 120 0:33 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:49 - cgroup cgroup rw,blkio +149 124 0:33 / /bedrock/strata/arch/sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:49 - cgroup cgroup rw,blkio +148 123 0:33 / /bedrock/strata/fallback/sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:49 - cgroup cgroup rw,blkio +147 122 0:33 / /bedrock/strata/gentoo/sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:49 - cgroup cgroup rw,blkio +146 121 0:33 / /bedrock/strata/kde/sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:49 - cgroup cgroup rw,blkio +150 120 0:34 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:50 - cgroup cgroup rw,cpu,cpuacct +154 124 0:34 / /bedrock/strata/arch/sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:50 - cgroup cgroup rw,cpu,cpuacct +153 123 0:34 / /bedrock/strata/fallback/sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:50 - cgroup cgroup rw,cpu,cpuacct +152 122 0:34 / /bedrock/strata/gentoo/sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:50 - cgroup cgroup rw,cpu,cpuacct +151 121 0:34 / /bedrock/strata/kde/sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:50 - cgroup cgroup rw,cpu,cpuacct +155 120 0:35 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:51 - cgroup cgroup rw,cpuset +159 124 0:35 / /bedrock/strata/arch/sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:51 - cgroup cgroup rw,cpuset +158 123 0:35 / /bedrock/strata/fallback/sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:51 - cgroup cgroup rw,cpuset +157 122 0:35 / /bedrock/strata/gentoo/sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:51 - cgroup cgroup rw,cpuset +156 121 0:35 / /bedrock/strata/kde/sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:51 - cgroup cgroup rw,cpuset +160 120 0:36 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:52 - cgroup cgroup rw,devices +164 124 0:36 / /bedrock/strata/arch/sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:52 - cgroup cgroup rw,devices +163 123 0:36 / /bedrock/strata/fallback/sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:52 - cgroup cgroup rw,devices +162 122 0:36 / /bedrock/strata/gentoo/sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:52 - cgroup cgroup rw,devices +161 121 0:36 / /bedrock/strata/kde/sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:52 - cgroup cgroup rw,devices +165 120 0:37 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:53 - cgroup cgroup rw,memory +169 124 0:37 / /bedrock/strata/arch/sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:53 - cgroup cgroup rw,memory +168 123 0:37 / /bedrock/strata/fallback/sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:53 - cgroup cgroup rw,memory +167 122 0:37 / /bedrock/strata/gentoo/sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:53 - cgroup cgroup rw,memory +166 121 0:37 / /bedrock/strata/kde/sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:53 - cgroup cgroup rw,memory +170 120 0:38 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:54 - cgroup cgroup rw,freezer +174 124 0:38 / /bedrock/strata/arch/sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:54 - cgroup cgroup rw,freezer +173 123 0:38 / /bedrock/strata/fallback/sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:54 - cgroup cgroup rw,freezer +172 122 0:38 / /bedrock/strata/gentoo/sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:54 - cgroup cgroup rw,freezer +171 121 0:38 / /bedrock/strata/kde/sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:54 - cgroup cgroup rw,freezer +175 120 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:55 - cgroup cgroup rw,pids +179 124 0:39 / /bedrock/strata/arch/sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:55 - cgroup cgroup rw,pids +178 123 0:39 / /bedrock/strata/fallback/sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:55 - cgroup cgroup rw,pids +177 122 0:39 / /bedrock/strata/gentoo/sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:55 - cgroup cgroup rw,pids +176 121 0:39 / /bedrock/strata/kde/sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:55 - cgroup cgroup rw,pids +180 120 0:40 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:56 - cgroup cgroup rw,perf_event +184 124 0:40 / /bedrock/strata/arch/sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:56 - cgroup cgroup rw,perf_event +183 123 0:40 / /bedrock/strata/fallback/sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:56 - cgroup cgroup rw,perf_event +182 122 0:40 / /bedrock/strata/gentoo/sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:56 - cgroup cgroup rw,perf_event +181 121 0:40 / /bedrock/strata/kde/sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:56 - cgroup cgroup rw,perf_event` + +const cgroup2Mountinfo = `18 64 0:18 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel +19 64 0:4 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw +20 64 0:6 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=8171204k,nr_inodes=2042801,mode=755 +21 18 0:19 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw +22 20 0:20 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel +23 20 0:21 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000 +24 64 0:22 / /run rw,nosuid,nodev shared:24 - tmpfs tmpfs rw,seclabel,mode=755 +25 18 0:23 / /sys/fs/cgroup ro,nosuid,nodev,noexec shared:8 - tmpfs tmpfs ro,seclabel,mode=755 +26 25 0:24 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:9 - cgroup2 cgroup rw +27 18 0:25 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw,seclabel +28 18 0:26 / /sys/firmware/efi/efivars rw,nosuid,nodev,noexec,relatime shared:21 - efivarfs efivarfs rw +29 25 0:27 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,cpu,cpuacct +30 25 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,memory +31 25 0:29 / /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,net_cls,net_prio +32 25 0:30 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,blkio +33 25 0:31 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,perf_event +34 25 0:32 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,hugetlb +35 25 0:33 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,freezer +36 25 0:34 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,cpuset +37 25 0:35 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices +38 25 0:36 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,pids +61 18 0:37 / /sys/kernel/config rw,relatime shared:22 - configfs configfs rw +64 0 253:0 / / rw,relatime shared:1 - ext4 /dev/mapper/fedora_dhcp--16--129-root rw,seclabel,data=ordered +39 18 0:17 / /sys/fs/selinux rw,relatime shared:23 - selinuxfs selinuxfs rw +40 20 0:16 / /dev/mqueue rw,relatime shared:25 - mqueue mqueue rw,seclabel +41 20 0:39 / /dev/hugepages rw,relatime shared:26 - hugetlbfs hugetlbfs rw,seclabel +` + +func TestGetCgroupMounts(t *testing.T) { + type testData struct { + mountInfo string + root string + subsystems map[string]bool + } + testTable := []testData{ + { + mountInfo: fedoraMountinfo, + root: "/", + subsystems: map[string]bool{ + "cpuset": false, + "cpu": false, + "cpuacct": false, + "memory": false, + "devices": false, + "freezer": false, + "net_cls": false, + "blkio": false, + "perf_event": false, + "hugetlb": false, + }, + }, + { + mountInfo: systemdMountinfo, + root: "/system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope", + subsystems: map[string]bool{ + "cpuset": false, + "cpu": false, + "cpuacct": false, + "memory": false, + "devices": false, + "freezer": false, + "net_cls": false, + "blkio": false, + "perf_event": false, + }, + }, + { + mountInfo: bedrockMountinfo, + root: "/", + subsystems: map[string]bool{ + "cpuset": false, + "cpu": false, + "cpuacct": false, + "memory": false, + "devices": false, + "freezer": false, + "net_cls": false, + "blkio": false, + "perf_event": false, + }, + }, + } + for _, td := range testTable { + mi := bytes.NewBufferString(td.mountInfo) + cgMounts, err := getCgroupMountsHelper(td.subsystems, mi, false) + if err != nil { + t.Fatal(err) + } + cgMap := make(map[string]Mount) + for _, m := range cgMounts { + for _, ss := range m.Subsystems { + cgMap[ss] = m + } + } + for ss := range td.subsystems { + m, ok := cgMap[ss] + if !ok { + t.Fatalf("%s not found", ss) + } + if m.Root != td.root { + t.Fatalf("unexpected root for %s: %s", ss, m.Root) + } + if !strings.HasPrefix(m.Mountpoint, "/sys/fs/cgroup/") && !strings.Contains(m.Mountpoint, ss) { + t.Fatalf("unexpected mountpoint for %s: %s", ss, m.Mountpoint) + } + var ssFound bool + for _, mss := range m.Subsystems { + if mss == ss { + ssFound = true + break + } + } + if !ssFound { + t.Fatalf("subsystem %s not found in Subsystems field %v", ss, m.Subsystems) + } + } + } +} + +func BenchmarkGetCgroupMounts(b *testing.B) { + subsystems := map[string]bool{ + "cpuset": false, + "cpu": false, + "cpuacct": false, + "memory": false, + "devices": false, + "freezer": false, + "net_cls": false, + "blkio": false, + "perf_event": false, + "hugetlb": false, + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + mi := bytes.NewBufferString(fedoraMountinfo) + b.StartTimer() + if _, err := getCgroupMountsHelper(subsystems, mi, false); err != nil { + b.Fatal(err) + } + } +} + +func TestParseCgroupString(t *testing.T) { + testCases := []struct { + input string + expectedError error + expectedOutput map[string]string + }{ + { + // Taken from a CoreOS instance running systemd 225 with CPU/Mem + // accounting enabled in systemd + input: `9:blkio:/ +8:freezer:/ +7:perf_event:/ +6:devices:/system.slice/system-sshd.slice +5:cpuset:/ +4:cpu,cpuacct:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service +3:net_cls,net_prio:/ +2:memory:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service +1:name=systemd:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service`, + expectedOutput: map[string]string{ + "name=systemd": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service", + "blkio": "/", + "freezer": "/", + "perf_event": "/", + "devices": "/system.slice/system-sshd.slice", + "cpuset": "/", + "cpu": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service", + "cpuacct": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service", + "net_cls": "/", + "net_prio": "/", + "memory": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service", + }, + }, + { + input: `malformed input`, + expectedError: fmt.Errorf(`invalid cgroup entry: must contain at least two colons: malformed input`), + }, + } + + for ndx, testCase := range testCases { + out, err := parseCgroupFromReader(strings.NewReader(testCase.input)) + if err != nil { + if testCase.expectedError == nil || testCase.expectedError.Error() != err.Error() { + t.Errorf("%v: expected error %v, got error %v", ndx, testCase.expectedError, err) + } + } else { + if !reflect.DeepEqual(testCase.expectedOutput, out) { + t.Errorf("%v: expected output %v, got error %v", ndx, testCase.expectedOutput, out) + } + } + } + +} + +func TestIgnoreCgroup2Mount(t *testing.T) { + subsystems := map[string]bool{ + "cpuset": false, + "cpu": false, + "cpuacct": false, + "memory": false, + "devices": false, + "freezer": false, + "net_cls": false, + "blkio": false, + "perf_event": false, + "pids": false, + "name=systemd": false, + } + + mi := bytes.NewBufferString(cgroup2Mountinfo) + cgMounts, err := getCgroupMountsHelper(subsystems, mi, false) + if err != nil { + t.Fatal(err) + } + for _, m := range cgMounts { + if m.Mountpoint == "/sys/fs/cgroup/systemd" { + t.Errorf("parsed a cgroup2 mount at /sys/fs/cgroup/systemd instead of ignoring it") + } + } +} + +func TestGetClosestMountpointAncestor(t *testing.T) { + fakeMountInfo := ` 18 24 0:17 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw +100 99 1:31 / /foo/bar rw,relatime - fake fake rw,fake +100 99 1:31 / /foo/bar/baz2 rw,relatime - fake fake rw,fake +100 99 1:31 / /foo/bar/baz rw,relatime - fake fake rw,fake +100 99 1:31 / /foo/bar/bazza rw,relatime - fake fake rw,fake +100 99 1:31 / /foo/bar/baz3 rw,relatime - fake fake rw,fake +100 99 1:31 / /foo rw,relatime - fake fake rw,fake +100 99 1:31 / /unrelated rw,relatime - fake fake rw,fake +100 99 1:31 / / rw,relatime - fake fake rw,fake +` + testCases := []struct { + input string + output string + }{ + {input: "/foo/bar/baz/a/b/c", output: "/foo/bar/baz"}, + {input: "/foo/bar/baz", output: "/foo/bar/baz"}, + {input: "/foo/bar/bazza", output: "/foo/bar/bazza"}, + {input: "/a/b/c/d", output: "/"}, + } + + for _, c := range testCases { + mountpoint := GetClosestMountpointAncestor(c.input, fakeMountInfo) + if mountpoint != c.output { + t.Errorf("expected %s, got %s", c.output, mountpoint) + } + } +} + +func TestFindCgroupMountpointAndRoot(t *testing.T) { + fakeMountInfo := ` +35 27 0:29 / /foo rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices +35 27 0:29 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices +` + testCases := []struct { + cgroupPath string + output string + }{ + {cgroupPath: "/sys/fs", output: "/sys/fs/cgroup/devices"}, + {cgroupPath: "", output: "/foo"}, + } + + for _, c := range testCases { + mountpoint, _, _ := findCgroupMountpointAndRootFromReader(strings.NewReader(fakeMountInfo), c.cgroupPath, "devices") + if mountpoint != c.output { + t.Errorf("expected %s, got %s", c.output, mountpoint) + } + } +} + +func TestGetHugePageSizeImpl(t *testing.T) { + + testCases := []struct { + inputFiles []string + outputPageSizes []string + err error + }{ + { + inputFiles: []string{"hugepages-1048576kB", "hugepages-2048kB", "hugepages-32768kB", "hugepages-64kB"}, + outputPageSizes: []string{"1GB", "2MB", "32MB", "64KB"}, + err: nil, + }, + { + inputFiles: []string{}, + outputPageSizes: []string{}, + err: nil, + }, + { + inputFiles: []string{"hugepages-a"}, + outputPageSizes: []string{}, + err: errors.New("invalid size: 'a'"), + }, + } + + for _, c := range testCases { + pageSizes, err := getHugePageSizeFromFilenames(c.inputFiles) + if len(pageSizes) != 0 && len(c.outputPageSizes) != 0 && !reflect.DeepEqual(pageSizes, c.outputPageSizes) { + t.Errorf("expected %s, got %s", c.outputPageSizes, pageSizes) + } + if err != nil && err.Error() != c.err.Error() { + t.Errorf("expected error %s, got %s", c.err, err) + } + } +} diff --git a/libcontainer/configs/blkio_device.go b/libcontainer/configs/blkio_device.go new file mode 100644 index 0000000..fa195bf --- /dev/null +++ b/libcontainer/configs/blkio_device.go @@ -0,0 +1,66 @@ +package configs + +import "fmt" + +// blockIODevice holds major:minor format supported in blkio cgroup +type blockIODevice struct { + // Major is the device's major number + Major int64 `json:"major"` + // Minor is the device's minor number + Minor int64 `json:"minor"` +} + +// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair +type WeightDevice struct { + blockIODevice + // Weight is the bandwidth rate for the device, range is from 10 to 1000 + Weight uint16 `json:"weight"` + // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + LeafWeight uint16 `json:"leafWeight"` +} + +// NewWeightDevice returns a configured WeightDevice pointer +func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice { + wd := &WeightDevice{} + wd.Major = major + wd.Minor = minor + wd.Weight = weight + wd.LeafWeight = leafWeight + return wd +} + +// WeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) WeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight) +} + +// LeafWeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) LeafWeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight) +} + +// ThrottleDevice struct holds a `major:minor rate_per_second` pair +type ThrottleDevice struct { + blockIODevice + // Rate is the IO rate limit per cgroup per device + Rate uint64 `json:"rate"` +} + +// NewThrottleDevice returns a configured ThrottleDevice pointer +func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice { + td := &ThrottleDevice{} + td.Major = major + td.Minor = minor + td.Rate = rate + return td +} + +// String formats the struct to be writable to the cgroup specific file +func (td *ThrottleDevice) String() string { + return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate) +} + +// StringName formats the struct to be writable to the cgroup specific file +func (td *ThrottleDevice) StringName(name string) string { + return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate) +} diff --git a/libcontainer/configs/cgroup_linux.go b/libcontainer/configs/cgroup_linux.go new file mode 100644 index 0000000..58ed19c --- /dev/null +++ b/libcontainer/configs/cgroup_linux.go @@ -0,0 +1,130 @@ +package configs + +type FreezerState string + +const ( + Undefined FreezerState = "" + Frozen FreezerState = "FROZEN" + Thawed FreezerState = "THAWED" +) + +type Cgroup struct { + // Deprecated, use Path instead + Name string `json:"name,omitempty"` + + // name of parent of cgroup or slice + // Deprecated, use Path instead + Parent string `json:"parent,omitempty"` + + // Path specifies the path to cgroups that are created and/or joined by the container. + // The path is assumed to be relative to the host system cgroup mountpoint. + Path string `json:"path"` + + // ScopePrefix describes prefix for the scope name + ScopePrefix string `json:"scope_prefix"` + + // Paths represent the absolute cgroups paths to join. + // This takes precedence over Path. + Paths map[string]string + + // Resources contains various cgroups settings to apply + *Resources +} + +type Resources struct { + // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. + // Deprecated + AllowAllDevices *bool `json:"allow_all_devices,omitempty"` + // Deprecated + AllowedDevices []*Device `json:"allowed_devices,omitempty"` + // Deprecated + DeniedDevices []*Device `json:"denied_devices,omitempty"` + + Devices []*Device `json:"devices"` + + // Memory limit (in bytes) + Memory int64 `json:"memory"` + + // Memory reservation or soft_limit (in bytes) + MemoryReservation int64 `json:"memory_reservation"` + + // Total memory usage (memory + swap); set `-1` to enable unlimited swap + MemorySwap int64 `json:"memory_swap"` + + // Kernel memory limit (in bytes) + KernelMemory int64 `json:"kernel_memory"` + + // Kernel memory limit for TCP use (in bytes) + KernelMemoryTCP int64 `json:"kernel_memory_tcp"` + + // CPU shares (relative weight vs. other containers) + CpuShares uint64 `json:"cpu_shares"` + + // CPU hardcap limit (in usecs). Allowed cpu time in a given period. + CpuQuota int64 `json:"cpu_quota"` + + // CPU period to be used for hardcapping (in usecs). 0 to use system default. + CpuPeriod uint64 `json:"cpu_period"` + + // How many time CPU will use in realtime scheduling (in usecs). + CpuRtRuntime int64 `json:"cpu_rt_quota"` + + // CPU period to be used for realtime scheduling (in usecs). + CpuRtPeriod uint64 `json:"cpu_rt_period"` + + // CPU to use + CpusetCpus string `json:"cpuset_cpus"` + + // MEM to use + CpusetMems string `json:"cpuset_mems"` + + // Process limit; set <= `0' to disable limit. + PidsLimit int64 `json:"pids_limit"` + + // Specifies per cgroup weight, range is from 10 to 1000. + BlkioWeight uint16 `json:"blkio_weight"` + + // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + BlkioLeafWeight uint16 `json:"blkio_leaf_weight"` + + // Weight per cgroup per device, can override BlkioWeight. + BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"` + + // IO read rate limit per cgroup per device, bytes per second. + BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"` + + // IO write rate limit per cgroup per device, bytes per second. + BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"` + + // IO read rate limit per cgroup per device, IO per second. + BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"` + + // IO write rate limit per cgroup per device, IO per second. + BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"` + + // set the freeze value for the process + Freezer FreezerState `json:"freezer"` + + // Hugetlb limit (in bytes) + HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"` + + // Whether to disable OOM Killer + OomKillDisable bool `json:"oom_kill_disable"` + + // Tuning swappiness behaviour per cgroup + MemorySwappiness *uint64 `json:"memory_swappiness"` + + // Set priority of network traffic for container + NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` + + // Set class identifier for container's network packets + NetClsClassid uint32 `json:"net_cls_classid_u"` + + // Used on cgroups v2: + + // CpuWeight sets a proportional bandwidth limit. + CpuWeight uint64 `json:"cpu_weight"` + + // CpuMax sets she maximum bandwidth limit (format: max period). + CpuMax string `json:"cpu_max"` +} diff --git a/libcontainer/configs/cgroup_unsupported.go b/libcontainer/configs/cgroup_unsupported.go new file mode 100644 index 0000000..c0c23d7 --- /dev/null +++ b/libcontainer/configs/cgroup_unsupported.go @@ -0,0 +1,8 @@ +// +build !linux + +package configs + +// TODO Windows: This can ultimately be entirely factored out on Windows as +// cgroups are a Unix-specific construct. +type Cgroup struct { +} diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go new file mode 100644 index 0000000..24989e9 --- /dev/null +++ b/libcontainer/configs/config.go @@ -0,0 +1,354 @@ +package configs + +import ( + "bytes" + "encoding/json" + "fmt" + "os/exec" + "time" + + "github.com/opencontainers/runtime-spec/specs-go" + + "github.com/sirupsen/logrus" +) + +type Rlimit struct { + Type int `json:"type"` + Hard uint64 `json:"hard"` + Soft uint64 `json:"soft"` +} + +// IDMap represents UID/GID Mappings for User Namespaces. +type IDMap struct { + ContainerID int `json:"container_id"` + HostID int `json:"host_id"` + Size int `json:"size"` +} + +// Seccomp represents syscall restrictions +// By default, only the native architecture of the kernel is allowed to be used +// for syscalls. Additional architectures can be added by specifying them in +// Architectures. +type Seccomp struct { + DefaultAction Action `json:"default_action"` + Architectures []string `json:"architectures"` + Syscalls []*Syscall `json:"syscalls"` +} + +// Action is taken upon rule match in Seccomp +type Action int + +const ( + Kill Action = iota + 1 + Errno + Trap + Allow + Trace + Log +) + +// Operator is a comparison operator to be used when matching syscall arguments in Seccomp +type Operator int + +const ( + EqualTo Operator = iota + 1 + NotEqualTo + GreaterThan + GreaterThanOrEqualTo + LessThan + LessThanOrEqualTo + MaskEqualTo +) + +// Arg is a rule to match a specific syscall argument in Seccomp +type Arg struct { + Index uint `json:"index"` + Value uint64 `json:"value"` + ValueTwo uint64 `json:"value_two"` + Op Operator `json:"op"` +} + +// Syscall is a rule to match a syscall in Seccomp +type Syscall struct { + Name string `json:"name"` + Action Action `json:"action"` + Args []*Arg `json:"args"` +} + +// TODO Windows. Many of these fields should be factored out into those parts +// which are common across platforms, and those which are platform specific. + +// Config defines configuration options for executing a process inside a contained environment. +type Config struct { + // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs + // This is a common option when the container is running in ramdisk + NoPivotRoot bool `json:"no_pivot_root"` + + // ParentDeathSignal specifies the signal that is sent to the container's process in the case + // that the parent process dies. + ParentDeathSignal int `json:"parent_death_signal"` + + // Path to a directory containing the container's root filesystem. + Rootfs string `json:"rootfs"` + + // Readonlyfs will remount the container's rootfs as readonly where only externally mounted + // bind mounts are writtable. + Readonlyfs bool `json:"readonlyfs"` + + // Specifies the mount propagation flags to be applied to /. + RootPropagation int `json:"rootPropagation"` + + // Mounts specify additional source and destination paths that will be mounted inside the container's + // rootfs and mount namespace if specified + Mounts []*Mount `json:"mounts"` + + // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! + Devices []*Device `json:"devices"` + + MountLabel string `json:"mount_label"` + + // Hostname optionally sets the container's hostname if provided + Hostname string `json:"hostname"` + + // Namespaces specifies the container's namespaces that it should setup when cloning the init process + // If a namespace is not provided that namespace is shared from the container's parent process + Namespaces Namespaces `json:"namespaces"` + + // Capabilities specify the capabilities to keep when executing the process inside the container + // All capabilities not specified will be dropped from the processes capability mask + Capabilities *Capabilities `json:"capabilities"` + + // Networks specifies the container's network setup to be created + Networks []*Network `json:"networks"` + + // Routes can be specified to create entries in the route table as the container is started + Routes []*Route `json:"routes"` + + // Cgroups specifies specific cgroup settings for the various subsystems that the container is + // placed into to limit the resources the container has available + Cgroups *Cgroup `json:"cgroups"` + + // AppArmorProfile specifies the profile to apply to the process running in the container and is + // change at the time the process is execed + AppArmorProfile string `json:"apparmor_profile,omitempty"` + + // ProcessLabel specifies the label to apply to the process running in the container. It is + // commonly used by selinux + ProcessLabel string `json:"process_label,omitempty"` + + // Rlimits specifies the resource limits, such as max open files, to set in the container + // If Rlimits are not set, the container will inherit rlimits from the parent process + Rlimits []Rlimit `json:"rlimits,omitempty"` + + // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores + // for a process. Valid values are between the range [-1000, '1000'], where processes with + // higher scores are preferred for being killed. If it is unset then we don't touch the current + // value. + // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ + OomScoreAdj *int `json:"oom_score_adj,omitempty"` + + // UidMappings is an array of User ID mappings for User Namespaces + UidMappings []IDMap `json:"uid_mappings"` + + // GidMappings is an array of Group ID mappings for User Namespaces + GidMappings []IDMap `json:"gid_mappings"` + + // MaskPaths specifies paths within the container's rootfs to mask over with a bind + // mount pointing to /dev/null as to prevent reads of the file. + MaskPaths []string `json:"mask_paths"` + + // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only + // so that these files prevent any writes. + ReadonlyPaths []string `json:"readonly_paths"` + + // Sysctl is a map of properties and their values. It is the equivalent of using + // sysctl -w my.property.name value in Linux. + Sysctl map[string]string `json:"sysctl"` + + // Seccomp allows actions to be taken whenever a syscall is made within the container. + // A number of rules are given, each having an action to be taken if a syscall matches it. + // A default action to be taken if no rules match is also given. + Seccomp *Seccomp `json:"seccomp"` + + // NoNewPrivileges controls whether processes in the container can gain additional privileges. + NoNewPrivileges bool `json:"no_new_privileges,omitempty"` + + // Hooks are a collection of actions to perform at various container lifecycle events. + // CommandHooks are serialized to JSON, but other hooks are not. + Hooks *Hooks + + // Version is the version of opencontainer specification that is supported. + Version string `json:"version"` + + // Labels are user defined metadata that is stored in the config and populated on the state + Labels []string `json:"labels"` + + // NoNewKeyring will not allocated a new session keyring for the container. It will use the + // callers keyring in this case. + NoNewKeyring bool `json:"no_new_keyring"` + + // IntelRdt specifies settings for Intel RDT group that the container is placed into + // to limit the resources (e.g., L3 cache, memory bandwidth) the container has available + IntelRdt *IntelRdt `json:"intel_rdt,omitempty"` + + // RootlessEUID is set when the runc was launched with non-zero EUID. + // Note that RootlessEUID is set to false when launched with EUID=0 in userns. + // When RootlessEUID is set, runc creates a new userns for the container. + // (config.json needs to contain userns settings) + RootlessEUID bool `json:"rootless_euid,omitempty"` + + // RootlessCgroups is set when unlikely to have the full access to cgroups. + // When RootlessCgroups is set, cgroups errors are ignored. + RootlessCgroups bool `json:"rootless_cgroups,omitempty"` +} + +type Hooks struct { + // Prestart commands are executed after the container namespaces are created, + // but before the user supplied command is executed from init. + Prestart []Hook + + // Poststart commands are executed after the container init process starts. + Poststart []Hook + + // Poststop commands are executed after the container init process exits. + Poststop []Hook +} + +type Capabilities struct { + // Bounding is the set of capabilities checked by the kernel. + Bounding []string + // Effective is the set of capabilities checked by the kernel. + Effective []string + // Inheritable is the capabilities preserved across execve. + Inheritable []string + // Permitted is the limiting superset for effective capabilities. + Permitted []string + // Ambient is the ambient set of capabilities that are kept. + Ambient []string +} + +func (hooks *Hooks) UnmarshalJSON(b []byte) error { + var state struct { + Prestart []CommandHook + Poststart []CommandHook + Poststop []CommandHook + } + + if err := json.Unmarshal(b, &state); err != nil { + return err + } + + deserialize := func(shooks []CommandHook) (hooks []Hook) { + for _, shook := range shooks { + hooks = append(hooks, shook) + } + + return hooks + } + + hooks.Prestart = deserialize(state.Prestart) + hooks.Poststart = deserialize(state.Poststart) + hooks.Poststop = deserialize(state.Poststop) + return nil +} + +func (hooks Hooks) MarshalJSON() ([]byte, error) { + serialize := func(hooks []Hook) (serializableHooks []CommandHook) { + for _, hook := range hooks { + switch chook := hook.(type) { + case CommandHook: + serializableHooks = append(serializableHooks, chook) + default: + logrus.Warnf("cannot serialize hook of type %T, skipping", hook) + } + } + + return serializableHooks + } + + return json.Marshal(map[string]interface{}{ + "prestart": serialize(hooks.Prestart), + "poststart": serialize(hooks.Poststart), + "poststop": serialize(hooks.Poststop), + }) +} + +type Hook interface { + // Run executes the hook with the provided state. + Run(*specs.State) error +} + +// NewFunctionHook will call the provided function when the hook is run. +func NewFunctionHook(f func(*specs.State) error) FuncHook { + return FuncHook{ + run: f, + } +} + +type FuncHook struct { + run func(*specs.State) error +} + +func (f FuncHook) Run(s *specs.State) error { + return f.run(s) +} + +type Command struct { + Path string `json:"path"` + Args []string `json:"args"` + Env []string `json:"env"` + Dir string `json:"dir"` + Timeout *time.Duration `json:"timeout"` +} + +// NewCommandHook will execute the provided command when the hook is run. +func NewCommandHook(cmd Command) CommandHook { + return CommandHook{ + Command: cmd, + } +} + +type CommandHook struct { + Command +} + +func (c Command) Run(s *specs.State) error { + b, err := json.Marshal(s) + if err != nil { + return err + } + var stdout, stderr bytes.Buffer + cmd := exec.Cmd{ + Path: c.Path, + Args: c.Args, + Env: c.Env, + Stdin: bytes.NewReader(b), + Stdout: &stdout, + Stderr: &stderr, + } + if err := cmd.Start(); err != nil { + return err + } + errC := make(chan error, 1) + go func() { + err := cmd.Wait() + if err != nil { + err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) + } + errC <- err + }() + var timerCh <-chan time.Time + if c.Timeout != nil { + timer := time.NewTimer(*c.Timeout) + defer timer.Stop() + timerCh = timer.C + } + select { + case err := <-errC: + return err + case <-timerCh: + cmd.Process.Kill() + cmd.Wait() + return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds()) + } +} diff --git a/libcontainer/configs/config_linux.go b/libcontainer/configs/config_linux.go new file mode 100644 index 0000000..07da108 --- /dev/null +++ b/libcontainer/configs/config_linux.go @@ -0,0 +1,61 @@ +package configs + +import "fmt" + +// HostUID gets the translated uid for the process on host which could be +// different when user namespaces are enabled. +func (c Config) HostUID(containerId int) (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.UidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.") + } + id, found := c.hostIDFromMapping(containerId, c.UidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.") + } + return id, nil + } + // Return unchanged id. + return containerId, nil +} + +// HostRootUID gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c Config) HostRootUID() (int, error) { + return c.HostUID(0) +} + +// HostGID gets the translated gid for the process on host which could be +// different when user namespaces are enabled. +func (c Config) HostGID(containerId int) (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.GidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") + } + id, found := c.hostIDFromMapping(containerId, c.GidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.") + } + return id, nil + } + // Return unchanged id. + return containerId, nil +} + +// HostRootGID gets the root gid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c Config) HostRootGID() (int, error) { + return c.HostGID(0) +} + +// Utility function that gets a host ID for a container ID from user namespace map +// if that ID is present in the map. +func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { + for _, m := range uMap { + if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { + hostID := m.HostID + (containerID - m.ContainerID) + return hostID, true + } + } + return -1, false +} diff --git a/libcontainer/configs/config_linux_test.go b/libcontainer/configs/config_linux_test.go new file mode 100644 index 0000000..9c5f0fe --- /dev/null +++ b/libcontainer/configs/config_linux_test.go @@ -0,0 +1,130 @@ +package configs + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "testing" +) + +func loadConfig(name string) (*Config, error) { + f, err := os.Open(filepath.Join("../sample_configs", name)) + if err != nil { + return nil, err + } + defer f.Close() + + var container *Config + if err := json.NewDecoder(f).Decode(&container); err != nil { + return nil, err + } + + // Check that a config doesn't contain extra fields + var configMap, abstractMap map[string]interface{} + + if _, err := f.Seek(0, 0); err != nil { + return nil, err + } + + if err := json.NewDecoder(f).Decode(&abstractMap); err != nil { + return nil, err + } + + configData, err := json.Marshal(&container) + if err != nil { + return nil, err + } + + if err := json.Unmarshal(configData, &configMap); err != nil { + return nil, err + } + + for k := range configMap { + delete(abstractMap, k) + } + + if len(abstractMap) != 0 { + return nil, fmt.Errorf("unknown fields: %s", abstractMap) + } + + return container, nil +} + +func TestRemoveNamespace(t *testing.T) { + ns := Namespaces{ + {Type: NEWNET}, + } + if !ns.Remove(NEWNET) { + t.Fatal("NEWNET was not removed") + } + if len(ns) != 0 { + t.Fatalf("namespaces should have 0 items but reports %d", len(ns)) + } +} + +func TestHostRootUIDNoUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{}, + } + uid, err := config.HostRootUID() + if err != nil { + t.Fatal(err) + } + if uid != 0 { + t.Fatalf("expected uid 0 with no USERNS but received %d", uid) + } +} + +func TestHostRootUIDWithUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{{Type: NEWUSER}}, + UidMappings: []IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 1, + }, + }, + } + uid, err := config.HostRootUID() + if err != nil { + t.Fatal(err) + } + if uid != 1000 { + t.Fatalf("expected uid 1000 with no USERNS but received %d", uid) + } +} + +func TestHostRootGIDNoUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{}, + } + uid, err := config.HostRootGID() + if err != nil { + t.Fatal(err) + } + if uid != 0 { + t.Fatalf("expected gid 0 with no USERNS but received %d", uid) + } +} + +func TestHostRootGIDWithUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{{Type: NEWUSER}}, + GidMappings: []IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 1, + }, + }, + } + uid, err := config.HostRootGID() + if err != nil { + t.Fatal(err) + } + if uid != 1000 { + t.Fatalf("expected gid 1000 with no USERNS but received %d", uid) + } +} diff --git a/libcontainer/configs/config_test.go b/libcontainer/configs/config_test.go new file mode 100644 index 0000000..c89a764 --- /dev/null +++ b/libcontainer/configs/config_test.go @@ -0,0 +1,195 @@ +package configs_test + +import ( + "encoding/json" + "fmt" + "os" + "reflect" + "testing" + "time" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runtime-spec/specs-go" +) + +func TestUnmarshalHooks(t *testing.T) { + timeout := time.Second + + prestartCmd := configs.NewCommandHook(configs.Command{ + Path: "/var/vcap/hooks/prestart", + Args: []string{"--pid=123"}, + Env: []string{"FOO=BAR"}, + Dir: "/var/vcap", + Timeout: &timeout, + }) + prestart, err := json.Marshal(prestartCmd.Command) + if err != nil { + t.Fatal(err) + } + + hook := configs.Hooks{} + err = hook.UnmarshalJSON([]byte(fmt.Sprintf(`{"Prestart" :[%s]}`, prestart))) + if err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(hook.Prestart[0], prestartCmd) { + t.Errorf("Expected prestart to equal %+v but it was %+v", + prestartCmd, hook.Prestart[0]) + } +} + +func TestUnmarshalHooksWithInvalidData(t *testing.T) { + hook := configs.Hooks{} + err := hook.UnmarshalJSON([]byte(`{invalid-json}`)) + if err == nil { + t.Error("Expected error to occur but it was nil") + } +} + +func TestMarshalHooks(t *testing.T) { + timeout := time.Second + + prestartCmd := configs.NewCommandHook(configs.Command{ + Path: "/var/vcap/hooks/prestart", + Args: []string{"--pid=123"}, + Env: []string{"FOO=BAR"}, + Dir: "/var/vcap", + Timeout: &timeout, + }) + + hook := configs.Hooks{ + Prestart: []configs.Hook{prestartCmd}, + } + hooks, err := hook.MarshalJSON() + if err != nil { + t.Fatal(err) + } + + h := `{"poststart":null,"poststop":null,"prestart":[{"path":"/var/vcap/hooks/prestart","args":["--pid=123"],"env":["FOO=BAR"],"dir":"/var/vcap","timeout":1000000000}]}` + if string(hooks) != h { + t.Errorf("Expected hooks %s to equal %s", string(hooks), h) + } +} + +func TestMarshalUnmarshalHooks(t *testing.T) { + timeout := time.Second + + prestart := configs.NewCommandHook(configs.Command{ + Path: "/var/vcap/hooks/prestart", + Args: []string{"--pid=123"}, + Env: []string{"FOO=BAR"}, + Dir: "/var/vcap", + Timeout: &timeout, + }) + + hook := configs.Hooks{ + Prestart: []configs.Hook{prestart}, + } + hooks, err := hook.MarshalJSON() + if err != nil { + t.Fatal(err) + } + + umMhook := configs.Hooks{} + err = umMhook.UnmarshalJSON(hooks) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(umMhook.Prestart[0], prestart) { + t.Errorf("Expected hooks to be equal after mashaling -> unmarshaling them: %+v, %+v", umMhook.Prestart[0], prestart) + } +} + +func TestMarshalHooksWithUnexpectedType(t *testing.T) { + fHook := configs.NewFunctionHook(func(*specs.State) error { + return nil + }) + hook := configs.Hooks{ + Prestart: []configs.Hook{fHook}, + } + hooks, err := hook.MarshalJSON() + if err != nil { + t.Fatal(err) + } + + h := `{"poststart":null,"poststop":null,"prestart":null}` + if string(hooks) != h { + t.Errorf("Expected hooks %s to equal %s", string(hooks), h) + } +} + +func TestFuncHookRun(t *testing.T) { + state := &specs.State{ + Version: "1", + ID: "1", + Status: "created", + Pid: 1, + Bundle: "/bundle", + } + + fHook := configs.NewFunctionHook(func(s *specs.State) error { + if !reflect.DeepEqual(state, s) { + t.Errorf("Expected state %+v to equal %+v", state, s) + } + return nil + }) + + fHook.Run(state) +} + +func TestCommandHookRun(t *testing.T) { + state := &specs.State{ + Version: "1", + ID: "1", + Status: "created", + Pid: 1, + Bundle: "/bundle", + } + timeout := time.Second + + cmdHook := configs.NewCommandHook(configs.Command{ + Path: os.Args[0], + Args: []string{os.Args[0], "-test.run=TestHelperProcess"}, + Env: []string{"FOO=BAR"}, + Dir: "/", + Timeout: &timeout, + }) + + err := cmdHook.Run(state) + if err != nil { + t.Errorf(fmt.Sprintf("Expected error to not occur but it was %+v", err)) + } +} + +func TestCommandHookRunTimeout(t *testing.T) { + state := &specs.State{ + Version: "1", + ID: "1", + Status: "created", + Pid: 1, + Bundle: "/bundle", + } + timeout := (10 * time.Millisecond) + + cmdHook := configs.NewCommandHook(configs.Command{ + Path: os.Args[0], + Args: []string{os.Args[0], "-test.run=TestHelperProcessWithTimeout"}, + Env: []string{"FOO=BAR"}, + Dir: "/", + Timeout: &timeout, + }) + + err := cmdHook.Run(state) + if err == nil { + t.Error("Expected error to occur but it was nil") + } +} + +func TestHelperProcess(*testing.T) { + fmt.Println("Helper Process") + os.Exit(0) +} +func TestHelperProcessWithTimeout(*testing.T) { + time.Sleep(time.Second) +} diff --git a/libcontainer/configs/config_windows_test.go b/libcontainer/configs/config_windows_test.go new file mode 100644 index 0000000..1a0c8fa --- /dev/null +++ b/libcontainer/configs/config_windows_test.go @@ -0,0 +1,3 @@ +package configs + +// All current tests are for Unix-specific functionality diff --git a/libcontainer/configs/device.go b/libcontainer/configs/device.go new file mode 100644 index 0000000..8701bb2 --- /dev/null +++ b/libcontainer/configs/device.go @@ -0,0 +1,57 @@ +package configs + +import ( + "fmt" + "os" +) + +const ( + Wildcard = -1 +) + +// TODO Windows: This can be factored out in the future + +type Device struct { + // Device type, block, char, etc. + Type rune `json:"type"` + + // Path to the device. + Path string `json:"path"` + + // Major is the device's major number. + Major int64 `json:"major"` + + // Minor is the device's minor number. + Minor int64 `json:"minor"` + + // Cgroup permissions format, rwm. + Permissions string `json:"permissions"` + + // FileMode permission bits for the device. + FileMode os.FileMode `json:"file_mode"` + + // Uid of the device. + Uid uint32 `json:"uid"` + + // Gid of the device. + Gid uint32 `json:"gid"` + + // Write the file to the allowed list + Allow bool `json:"allow"` +} + +func (d *Device) CgroupString() string { + return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) +} + +func (d *Device) Mkdev() int { + return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) +} + +// deviceNumberString converts the device number to a string return result. +func deviceNumberString(number int64) string { + if number == Wildcard { + return "*" + } + return fmt.Sprint(number) +} diff --git a/libcontainer/configs/device_defaults.go b/libcontainer/configs/device_defaults.go new file mode 100644 index 0000000..e4f423c --- /dev/null +++ b/libcontainer/configs/device_defaults.go @@ -0,0 +1,111 @@ +// +build linux + +package configs + +var ( + // DefaultSimpleDevices are devices that are to be both allowed and created. + DefaultSimpleDevices = []*Device{ + // /dev/null and zero + { + Path: "/dev/null", + Type: 'c', + Major: 1, + Minor: 3, + Permissions: "rwm", + FileMode: 0666, + }, + { + Path: "/dev/zero", + Type: 'c', + Major: 1, + Minor: 5, + Permissions: "rwm", + FileMode: 0666, + }, + + { + Path: "/dev/full", + Type: 'c', + Major: 1, + Minor: 7, + Permissions: "rwm", + FileMode: 0666, + }, + + // consoles and ttys + { + Path: "/dev/tty", + Type: 'c', + Major: 5, + Minor: 0, + Permissions: "rwm", + FileMode: 0666, + }, + + // /dev/urandom,/dev/random + { + Path: "/dev/urandom", + Type: 'c', + Major: 1, + Minor: 9, + Permissions: "rwm", + FileMode: 0666, + }, + { + Path: "/dev/random", + Type: 'c', + Major: 1, + Minor: 8, + Permissions: "rwm", + FileMode: 0666, + }, + } + DefaultAllowedDevices = append([]*Device{ + // allow mknod for any device + { + Type: 'c', + Major: Wildcard, + Minor: Wildcard, + Permissions: "m", + }, + { + Type: 'b', + Major: Wildcard, + Minor: Wildcard, + Permissions: "m", + }, + + { + Path: "/dev/console", + Type: 'c', + Major: 5, + Minor: 1, + Permissions: "rwm", + }, + // /dev/pts/ - pts namespaces are "coming soon" + { + Path: "", + Type: 'c', + Major: 136, + Minor: Wildcard, + Permissions: "rwm", + }, + { + Path: "", + Type: 'c', + Major: 5, + Minor: 2, + Permissions: "rwm", + }, + + // tuntap + { + Path: "", + Type: 'c', + Major: 10, + Minor: 200, + Permissions: "rwm", + }, + }, DefaultSimpleDevices...) + DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...) +) diff --git a/libcontainer/configs/hugepage_limit.go b/libcontainer/configs/hugepage_limit.go new file mode 100644 index 0000000..d302163 --- /dev/null +++ b/libcontainer/configs/hugepage_limit.go @@ -0,0 +1,9 @@ +package configs + +type HugepageLimit struct { + // which type of hugepage to limit. + Pagesize string `json:"page_size"` + + // usage limit for hugepage. + Limit uint64 `json:"limit"` +} diff --git a/libcontainer/configs/intelrdt.go b/libcontainer/configs/intelrdt.go new file mode 100644 index 0000000..57e9f03 --- /dev/null +++ b/libcontainer/configs/intelrdt.go @@ -0,0 +1,13 @@ +package configs + +type IntelRdt struct { + // The schema for L3 cache id and capacity bitmask (CBM) + // Format: "L3:=;=;..." + L3CacheSchema string `json:"l3_cache_schema,omitempty"` + + // The schema of memory bandwidth per L3 cache id + // Format: "MB:=bandwidth0;=bandwidth1;..." + // The unit of memory bandwidth is specified in "percentages" by + // default, and in "MBps" if MBA Software Controller is enabled. + MemBwSchema string `json:"memBwSchema,omitempty"` +} diff --git a/libcontainer/configs/interface_priority_map.go b/libcontainer/configs/interface_priority_map.go new file mode 100644 index 0000000..9a0395e --- /dev/null +++ b/libcontainer/configs/interface_priority_map.go @@ -0,0 +1,14 @@ +package configs + +import ( + "fmt" +) + +type IfPrioMap struct { + Interface string `json:"interface"` + Priority int64 `json:"priority"` +} + +func (i *IfPrioMap) CgroupString() string { + return fmt.Sprintf("%s %d", i.Interface, i.Priority) +} diff --git a/libcontainer/configs/mount.go b/libcontainer/configs/mount.go new file mode 100644 index 0000000..670757d --- /dev/null +++ b/libcontainer/configs/mount.go @@ -0,0 +1,39 @@ +package configs + +const ( + // EXT_COPYUP is a directive to copy up the contents of a directory when + // a tmpfs is mounted over it. + EXT_COPYUP = 1 << iota +) + +type Mount struct { + // Source path for the mount. + Source string `json:"source"` + + // Destination path for the mount inside the container. + Destination string `json:"destination"` + + // Device the mount is for. + Device string `json:"device"` + + // Mount flags. + Flags int `json:"flags"` + + // Propagation Flags + PropagationFlags []int `json:"propagation_flags"` + + // Mount data applied to the mount. + Data string `json:"data"` + + // Relabel source if set, "z" indicates shared, "Z" indicates unshared. + Relabel string `json:"relabel"` + + // Extensions are additional flags that are specific to runc. + Extensions int `json:"extensions"` + + // Optional Command to be run before Source is mounted. + PremountCmds []Command `json:"premount_cmds"` + + // Optional Command to be run after Source is mounted. + PostmountCmds []Command `json:"postmount_cmds"` +} diff --git a/libcontainer/configs/namespaces.go b/libcontainer/configs/namespaces.go new file mode 100644 index 0000000..a3329a3 --- /dev/null +++ b/libcontainer/configs/namespaces.go @@ -0,0 +1,5 @@ +package configs + +type NamespaceType string + +type Namespaces []Namespace diff --git a/libcontainer/configs/namespaces_linux.go b/libcontainer/configs/namespaces_linux.go new file mode 100644 index 0000000..1bbaef9 --- /dev/null +++ b/libcontainer/configs/namespaces_linux.go @@ -0,0 +1,126 @@ +package configs + +import ( + "fmt" + "os" + "sync" +) + +const ( + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" + NEWCGROUP NamespaceType = "NEWCGROUP" +) + +var ( + nsLock sync.Mutex + supportedNamespaces = make(map[NamespaceType]bool) +) + +// NsName converts the namespace type to its filename +func NsName(ns NamespaceType) string { + switch ns { + case NEWNET: + return "net" + case NEWNS: + return "mnt" + case NEWPID: + return "pid" + case NEWIPC: + return "ipc" + case NEWUSER: + return "user" + case NEWUTS: + return "uts" + case NEWCGROUP: + return "cgroup" + } + return "" +} + +// IsNamespaceSupported returns whether a namespace is available or +// not +func IsNamespaceSupported(ns NamespaceType) bool { + nsLock.Lock() + defer nsLock.Unlock() + supported, ok := supportedNamespaces[ns] + if ok { + return supported + } + nsFile := NsName(ns) + // if the namespace type is unknown, just return false + if nsFile == "" { + return false + } + _, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile)) + // a namespace is supported if it exists and we have permissions to read it + supported = err == nil + supportedNamespaces[ns] = supported + return supported +} + +func NamespaceTypes() []NamespaceType { + return []NamespaceType{ + NEWUSER, // Keep user NS always first, don't move it. + NEWIPC, + NEWUTS, + NEWNET, + NEWPID, + NEWNS, + NEWCGROUP, + } +} + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { + Type NamespaceType `json:"type"` + Path string `json:"path"` +} + +func (n *Namespace) GetPath(pid int) string { + return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type)) +} + +func (n *Namespaces) Remove(t NamespaceType) bool { + i := n.index(t) + if i == -1 { + return false + } + *n = append((*n)[:i], (*n)[i+1:]...) + return true +} + +func (n *Namespaces) Add(t NamespaceType, path string) { + i := n.index(t) + if i == -1 { + *n = append(*n, Namespace{Type: t, Path: path}) + return + } + (*n)[i].Path = path +} + +func (n *Namespaces) index(t NamespaceType) int { + for i, ns := range *n { + if ns.Type == t { + return i + } + } + return -1 +} + +func (n *Namespaces) Contains(t NamespaceType) bool { + return n.index(t) != -1 +} + +func (n *Namespaces) PathOf(t NamespaceType) string { + i := n.index(t) + if i == -1 { + return "" + } + return (*n)[i].Path +} diff --git a/libcontainer/configs/namespaces_syscall.go b/libcontainer/configs/namespaces_syscall.go new file mode 100644 index 0000000..2dc7adf --- /dev/null +++ b/libcontainer/configs/namespaces_syscall.go @@ -0,0 +1,32 @@ +// +build linux + +package configs + +import "golang.org/x/sys/unix" + +func (n *Namespace) Syscall() int { + return namespaceInfo[n.Type] +} + +var namespaceInfo = map[NamespaceType]int{ + NEWNET: unix.CLONE_NEWNET, + NEWNS: unix.CLONE_NEWNS, + NEWUSER: unix.CLONE_NEWUSER, + NEWIPC: unix.CLONE_NEWIPC, + NEWUTS: unix.CLONE_NEWUTS, + NEWPID: unix.CLONE_NEWPID, + NEWCGROUP: unix.CLONE_NEWCGROUP, +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This function returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + var flag int + for _, v := range *n { + if v.Path != "" { + continue + } + flag |= namespaceInfo[v.Type] + } + return uintptr(flag) +} diff --git a/libcontainer/configs/namespaces_syscall_unsupported.go b/libcontainer/configs/namespaces_syscall_unsupported.go new file mode 100644 index 0000000..5d9a5c8 --- /dev/null +++ b/libcontainer/configs/namespaces_syscall_unsupported.go @@ -0,0 +1,13 @@ +// +build !linux,!windows + +package configs + +func (n *Namespace) Syscall() int { + panic("No namespace syscall support") +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This function returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + panic("No namespace syscall support") +} diff --git a/libcontainer/configs/namespaces_unsupported.go b/libcontainer/configs/namespaces_unsupported.go new file mode 100644 index 0000000..19bf713 --- /dev/null +++ b/libcontainer/configs/namespaces_unsupported.go @@ -0,0 +1,8 @@ +// +build !linux + +package configs + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { +} diff --git a/libcontainer/configs/network.go b/libcontainer/configs/network.go new file mode 100644 index 0000000..ccdb228 --- /dev/null +++ b/libcontainer/configs/network.go @@ -0,0 +1,72 @@ +package configs + +// Network defines configuration for a container's networking stack +// +// The network configuration can be omitted from a container causing the +// container to be setup with the host's networking stack +type Network struct { + // Type sets the networks type, commonly veth and loopback + Type string `json:"type"` + + // Name of the network interface + Name string `json:"name"` + + // The bridge to use. + Bridge string `json:"bridge"` + + // MacAddress contains the MAC address to set on the network interface + MacAddress string `json:"mac_address"` + + // Address contains the IPv4 and mask to set on the network interface + Address string `json:"address"` + + // Gateway sets the gateway address that is used as the default for the interface + Gateway string `json:"gateway"` + + // IPv6Address contains the IPv6 and mask to set on the network interface + IPv6Address string `json:"ipv6_address"` + + // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface + IPv6Gateway string `json:"ipv6_gateway"` + + // Mtu sets the mtu value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + Mtu int `json:"mtu"` + + // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + TxQueueLen int `json:"txqueuelen"` + + // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the + // container. + HostInterfaceName string `json:"host_interface_name"` + + // HairpinMode specifies if hairpin NAT should be enabled on the virtual interface + // bridge port in the case of type veth + // Note: This is unsupported on some systems. + // Note: This does not apply to loopback interfaces. + HairpinMode bool `json:"hairpin_mode"` +} + +// Routes can be specified to create entries in the route table as the container is started +// +// All of destination, source, and gateway should be either IPv4 or IPv6. +// One of the three options must be present, and omitted entries will use their +// IP family default for the route table. For IPv4 for example, setting the +// gateway to 1.2.3.4 and the interface to eth0 will set up a standard +// destination of 0.0.0.0(or *) when viewed in the route table. +type Route struct { + // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 + Destination string `json:"destination"` + + // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 + Source string `json:"source"` + + // Sets the gateway. Accepts IPv4 and IPv6 + Gateway string `json:"gateway"` + + // The device to set this route up for, for example: eth0 + InterfaceName string `json:"interface_name"` +} diff --git a/libcontainer/configs/validate/rootless.go b/libcontainer/configs/validate/rootless.go new file mode 100644 index 0000000..393d9e8 --- /dev/null +++ b/libcontainer/configs/validate/rootless.go @@ -0,0 +1,89 @@ +package validate + +import ( + "fmt" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +// rootlessEUID makes sure that the config can be applied when runc +// is being executed as a non-root user (euid != 0) in the current user namespace. +func (v *ConfigValidator) rootlessEUID(config *configs.Config) error { + if err := rootlessEUIDMappings(config); err != nil { + return err + } + if err := rootlessEUIDMount(config); err != nil { + return err + } + + // XXX: We currently can't verify the user config at all, because + // configs.Config doesn't store the user-related configs. So this + // has to be verified by setupUser() in init_linux.go. + + return nil +} + +func hasIDMapping(id int, mappings []configs.IDMap) bool { + for _, m := range mappings { + if id >= m.ContainerID && id < m.ContainerID+m.Size { + return true + } + } + return false +} + +func rootlessEUIDMappings(config *configs.Config) error { + if !config.Namespaces.Contains(configs.NEWUSER) { + return fmt.Errorf("rootless container requires user namespaces") + } + + if len(config.UidMappings) == 0 { + return fmt.Errorf("rootless containers requires at least one UID mapping") + } + if len(config.GidMappings) == 0 { + return fmt.Errorf("rootless containers requires at least one GID mapping") + } + return nil +} + +// mount verifies that the user isn't trying to set up any mounts they don't have +// the rights to do. In addition, it makes sure that no mount has a `uid=` or +// `gid=` option that doesn't resolve to root. +func rootlessEUIDMount(config *configs.Config) error { + // XXX: We could whitelist allowed devices at this point, but I'm not + // convinced that's a good idea. The kernel is the best arbiter of + // access control. + + for _, mount := range config.Mounts { + // Check that the options list doesn't contain any uid= or gid= entries + // that don't resolve to root. + for _, opt := range strings.Split(mount.Data, ",") { + if strings.HasPrefix(opt, "uid=") { + var uid int + n, err := fmt.Sscanf(opt, "uid=%d", &uid) + if n != 1 || err != nil { + // Ignore unknown mount options. + continue + } + if !hasIDMapping(uid, config.UidMappings) { + return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers") + } + } + + if strings.HasPrefix(opt, "gid=") { + var gid int + n, err := fmt.Sscanf(opt, "gid=%d", &gid) + if n != 1 || err != nil { + // Ignore unknown mount options. + continue + } + if !hasIDMapping(gid, config.GidMappings) { + return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers") + } + } + } + } + + return nil +} diff --git a/libcontainer/configs/validate/rootless_test.go b/libcontainer/configs/validate/rootless_test.go new file mode 100644 index 0000000..59d1557 --- /dev/null +++ b/libcontainer/configs/validate/rootless_test.go @@ -0,0 +1,155 @@ +package validate + +import ( + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +func rootlessEUIDConfig() *configs.Config { + return &configs.Config{ + Rootfs: "/var", + RootlessEUID: true, + RootlessCgroups: true, + Namespaces: configs.Namespaces( + []configs.Namespace{ + {Type: configs.NEWUSER}, + }, + ), + UidMappings: []configs.IDMap{ + { + HostID: 1337, + ContainerID: 0, + Size: 1, + }, + }, + GidMappings: []configs.IDMap{ + { + HostID: 7331, + ContainerID: 0, + Size: 1, + }, + }, + } +} + +func TestValidateRootlessEUID(t *testing.T) { + validator := New() + + config := rootlessEUIDConfig() + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur: %+v", err) + } +} + +/* rootlessEUIDMappings */ + +func TestValidateRootlessEUIDUserns(t *testing.T) { + validator := New() + + config := rootlessEUIDConfig() + config.Namespaces = nil + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if user namespaces not set") + } +} + +func TestValidateRootlessEUIDMappingUid(t *testing.T) { + validator := New() + + config := rootlessEUIDConfig() + config.UidMappings = nil + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if no uid mappings provided") + } +} + +func TestValidateNonZeroEUIDMappingGid(t *testing.T) { + validator := New() + + config := rootlessEUIDConfig() + config.GidMappings = nil + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if no gid mappings provided") + } +} + +/* rootlessEUIDMount() */ + +func TestValidateRootlessEUIDMountUid(t *testing.T) { + config := rootlessEUIDConfig() + validator := New() + + config.Mounts = []*configs.Mount{ + { + Source: "devpts", + Destination: "/dev/pts", + Device: "devpts", + }, + } + + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when uid= not set in mount options: %+v", err) + } + + config.Mounts[0].Data = "uid=5" + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur when setting uid=5 in mount options") + } + + config.Mounts[0].Data = "uid=0" + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when setting uid=0 in mount options: %+v", err) + } + + config.Mounts[0].Data = "uid=2" + config.UidMappings[0].Size = 10 + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when setting uid=2 in mount options and UidMapping[0].size is 10") + } + + config.Mounts[0].Data = "uid=20" + config.UidMappings[0].Size = 10 + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur when setting uid=20 in mount options and UidMapping[0].size is 10") + } +} + +func TestValidateRootlessEUIDMountGid(t *testing.T) { + config := rootlessEUIDConfig() + validator := New() + + config.Mounts = []*configs.Mount{ + { + Source: "devpts", + Destination: "/dev/pts", + Device: "devpts", + }, + } + + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when gid= not set in mount options: %+v", err) + } + + config.Mounts[0].Data = "gid=5" + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur when setting gid=5 in mount options") + } + + config.Mounts[0].Data = "gid=0" + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when setting gid=0 in mount options: %+v", err) + } + + config.Mounts[0].Data = "gid=5" + config.GidMappings[0].Size = 10 + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when setting gid=5 in mount options and GidMapping[0].size is 10") + } + + config.Mounts[0].Data = "gid=11" + config.GidMappings[0].Size = 10 + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur when setting gid=11 in mount options and GidMapping[0].size is 10") + } +} diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go new file mode 100644 index 0000000..3b42f30 --- /dev/null +++ b/libcontainer/configs/validate/validator.go @@ -0,0 +1,245 @@ +package validate + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" + selinux "github.com/opencontainers/selinux/go-selinux" +) + +type Validator interface { + Validate(*configs.Config) error +} + +func New() Validator { + return &ConfigValidator{} +} + +type ConfigValidator struct { +} + +func (v *ConfigValidator) Validate(config *configs.Config) error { + if err := v.rootfs(config); err != nil { + return err + } + if err := v.network(config); err != nil { + return err + } + if err := v.hostname(config); err != nil { + return err + } + if err := v.security(config); err != nil { + return err + } + if err := v.usernamespace(config); err != nil { + return err + } + if err := v.cgroupnamespace(config); err != nil { + return err + } + if err := v.sysctl(config); err != nil { + return err + } + if err := v.intelrdt(config); err != nil { + return err + } + if config.RootlessEUID { + if err := v.rootlessEUID(config); err != nil { + return err + } + } + return nil +} + +// rootfs validates if the rootfs is an absolute path and is not a symlink +// to the container's root filesystem. +func (v *ConfigValidator) rootfs(config *configs.Config) error { + if _, err := os.Stat(config.Rootfs); err != nil { + if os.IsNotExist(err) { + return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs) + } + return err + } + cleaned, err := filepath.Abs(config.Rootfs) + if err != nil { + return err + } + if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil { + return err + } + if filepath.Clean(config.Rootfs) != cleaned { + return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs) + } + return nil +} + +func (v *ConfigValidator) network(config *configs.Config) error { + if !config.Namespaces.Contains(configs.NEWNET) { + if len(config.Networks) > 0 || len(config.Routes) > 0 { + return fmt.Errorf("unable to apply network settings without a private NET namespace") + } + } + return nil +} + +func (v *ConfigValidator) hostname(config *configs.Config) error { + if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { + return fmt.Errorf("unable to set hostname without a private UTS namespace") + } + return nil +} + +func (v *ConfigValidator) security(config *configs.Config) error { + // restrict sys without mount namespace + if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) && + !config.Namespaces.Contains(configs.NEWNS) { + return fmt.Errorf("unable to restrict sys entries without a private MNT namespace") + } + if config.ProcessLabel != "" && !selinux.GetEnabled() { + return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported") + } + + return nil +} + +func (v *ConfigValidator) usernamespace(config *configs.Config) error { + if config.Namespaces.Contains(configs.NEWUSER) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + return fmt.Errorf("USER namespaces aren't enabled in the kernel") + } + } else { + if config.UidMappings != nil || config.GidMappings != nil { + return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config") + } + } + return nil +} + +func (v *ConfigValidator) cgroupnamespace(config *configs.Config) error { + if config.Namespaces.Contains(configs.NEWCGROUP) { + if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) { + return fmt.Errorf("cgroup namespaces aren't enabled in the kernel") + } + } + return nil +} + +// sysctl validates that the specified sysctl keys are valid or not. +// /proc/sys isn't completely namespaced and depending on which namespaces +// are specified, a subset of sysctls are permitted. +func (v *ConfigValidator) sysctl(config *configs.Config) error { + validSysctlMap := map[string]bool{ + "kernel.msgmax": true, + "kernel.msgmnb": true, + "kernel.msgmni": true, + "kernel.sem": true, + "kernel.shmall": true, + "kernel.shmmax": true, + "kernel.shmmni": true, + "kernel.shm_rmid_forced": true, + } + + for s := range config.Sysctl { + if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") { + if config.Namespaces.Contains(configs.NEWIPC) { + continue + } else { + return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s) + } + } + if strings.HasPrefix(s, "net.") { + if config.Namespaces.Contains(configs.NEWNET) { + if path := config.Namespaces.PathOf(configs.NEWNET); path != "" { + if err := checkHostNs(s, path); err != nil { + return err + } + } + continue + } else { + return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s) + } + } + if config.Namespaces.Contains(configs.NEWUTS) { + switch s { + case "kernel.domainname": + // This is namespaced and there's no explicit OCI field for it. + continue + case "kernel.hostname": + // This is namespaced but there's a conflicting (dedicated) OCI field for it. + return fmt.Errorf("sysctl %q is not allowed as it conflicts with the OCI %q field", s, "hostname") + } + } + return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s) + } + + return nil +} + +func (v *ConfigValidator) intelrdt(config *configs.Config) error { + if config.IntelRdt != nil { + if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() { + return fmt.Errorf("intelRdt is specified in config, but Intel RDT is not supported or enabled") + } + + if !intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema != "" { + return fmt.Errorf("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled") + } + if !intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema != "" { + return fmt.Errorf("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled") + } + + if intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema == "" { + return fmt.Errorf("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty") + } + if intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema == "" { + return fmt.Errorf("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty") + } + } + + return nil +} + +func isSymbolicLink(path string) (bool, error) { + fi, err := os.Lstat(path) + if err != nil { + return false, err + } + + return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil +} + +// checkHostNs checks whether network sysctl is used in host namespace. +func checkHostNs(sysctlConfig string, path string) error { + var currentProcessNetns = "/proc/self/ns/net" + // readlink on the current processes network namespace + destOfCurrentProcess, err := os.Readlink(currentProcessNetns) + if err != nil { + return fmt.Errorf("read soft link %q error", currentProcessNetns) + } + + // First check if the provided path is a symbolic link + symLink, err := isSymbolicLink(path) + if err != nil { + return fmt.Errorf("could not check that %q is a symlink: %v", path, err) + } + + if symLink == false { + // The provided namespace is not a symbolic link, + // it is not the host namespace. + return nil + } + + // readlink on the path provided in the struct + destOfContainer, err := os.Readlink(path) + if err != nil { + return fmt.Errorf("read soft link %q error", path) + } + if destOfContainer == destOfCurrentProcess { + return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig) + } + return nil +} diff --git a/libcontainer/configs/validate/validator_test.go b/libcontainer/configs/validate/validator_test.go new file mode 100644 index 0000000..f6826fb --- /dev/null +++ b/libcontainer/configs/validate/validator_test.go @@ -0,0 +1,267 @@ +package validate_test + +import ( + "os" + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/configs/validate" +) + +func TestValidate(t *testing.T) { + config := &configs.Config{ + Rootfs: "/var", + } + + validator := validate.New() + err := validator.Validate(config) + if err != nil { + t.Errorf("Expected error to not occur: %+v", err) + } +} + +func TestValidateWithInvalidRootfs(t *testing.T) { + dir := "rootfs" + os.Symlink("/var", dir) + defer os.Remove(dir) + + config := &configs.Config{ + Rootfs: dir, + } + + validator := validate.New() + err := validator.Validate(config) + if err == nil { + t.Error("Expected error to occur but it was nil") + } +} + +func TestValidateNetworkWithoutNETNamespace(t *testing.T) { + network := &configs.Network{Type: "loopback"} + config := &configs.Config{ + Rootfs: "/var", + Namespaces: []configs.Namespace{}, + Networks: []*configs.Network{network}, + } + + validator := validate.New() + err := validator.Validate(config) + if err == nil { + t.Error("Expected error to occur but it was nil") + } +} + +func TestValidateNetworkRoutesWithoutNETNamespace(t *testing.T) { + route := &configs.Route{Gateway: "255.255.255.0"} + config := &configs.Config{ + Rootfs: "/var", + Namespaces: []configs.Namespace{}, + Routes: []*configs.Route{route}, + } + + validator := validate.New() + err := validator.Validate(config) + if err == nil { + t.Error("Expected error to occur but it was nil") + } +} + +func TestValidateHostname(t *testing.T) { + config := &configs.Config{ + Rootfs: "/var", + Hostname: "runc", + Namespaces: configs.Namespaces( + []configs.Namespace{ + {Type: configs.NEWUTS}, + }, + ), + } + + validator := validate.New() + err := validator.Validate(config) + if err != nil { + t.Errorf("Expected error to not occur: %+v", err) + } +} + +func TestValidateHostnameWithoutUTSNamespace(t *testing.T) { + config := &configs.Config{ + Rootfs: "/var", + Hostname: "runc", + } + + validator := validate.New() + err := validator.Validate(config) + if err == nil { + t.Error("Expected error to occur but it was nil") + } +} + +func TestValidateSecurityWithMaskPaths(t *testing.T) { + config := &configs.Config{ + Rootfs: "/var", + MaskPaths: []string{"/proc/kcore"}, + Namespaces: configs.Namespaces( + []configs.Namespace{ + {Type: configs.NEWNS}, + }, + ), + } + + validator := validate.New() + err := validator.Validate(config) + if err != nil { + t.Errorf("Expected error to not occur: %+v", err) + } +} + +func TestValidateSecurityWithROPaths(t *testing.T) { + config := &configs.Config{ + Rootfs: "/var", + ReadonlyPaths: []string{"/proc/sys"}, + Namespaces: configs.Namespaces( + []configs.Namespace{ + {Type: configs.NEWNS}, + }, + ), + } + + validator := validate.New() + err := validator.Validate(config) + if err != nil { + t.Errorf("Expected error to not occur: %+v", err) + } +} + +func TestValidateSecurityWithoutNEWNS(t *testing.T) { + config := &configs.Config{ + Rootfs: "/var", + MaskPaths: []string{"/proc/kcore"}, + ReadonlyPaths: []string{"/proc/sys"}, + } + + validator := validate.New() + err := validator.Validate(config) + if err == nil { + t.Error("Expected error to occur but it was nil") + } +} + +func TestValidateUsernamespace(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + config := &configs.Config{ + Rootfs: "/var", + Namespaces: configs.Namespaces( + []configs.Namespace{ + {Type: configs.NEWUSER}, + }, + ), + } + + validator := validate.New() + err := validator.Validate(config) + if err != nil { + t.Errorf("expected error to not occur %+v", err) + } +} + +func TestValidateUsernamespaceWithoutUserNS(t *testing.T) { + uidMap := configs.IDMap{ContainerID: 123} + config := &configs.Config{ + Rootfs: "/var", + UidMappings: []configs.IDMap{uidMap}, + } + + validator := validate.New() + err := validator.Validate(config) + if err == nil { + t.Error("Expected error to occur but it was nil") + } +} + +func TestValidateSysctl(t *testing.T) { + sysctl := map[string]string{ + "fs.mqueue.ctl": "ctl", + "net.ctl": "ctl", + "kernel.ctl": "ctl", + } + + for k, v := range sysctl { + config := &configs.Config{ + Rootfs: "/var", + Sysctl: map[string]string{k: v}, + } + + validator := validate.New() + err := validator.Validate(config) + if err == nil { + t.Error("Expected error to occur but it was nil") + } + } +} + +func TestValidateValidSysctl(t *testing.T) { + sysctl := map[string]string{ + "fs.mqueue.ctl": "ctl", + "net.ctl": "ctl", + "kernel.msgmax": "ctl", + } + + for k, v := range sysctl { + config := &configs.Config{ + Rootfs: "/var", + Sysctl: map[string]string{k: v}, + Namespaces: []configs.Namespace{ + { + Type: configs.NEWNET, + }, + { + Type: configs.NEWIPC, + }, + }, + } + + validator := validate.New() + err := validator.Validate(config) + if err != nil { + t.Errorf("Expected error to not occur with {%s=%s} but got: %q", k, v, err) + } + } +} + +func TestValidateSysctlWithSameNs(t *testing.T) { + config := &configs.Config{ + Rootfs: "/var", + Sysctl: map[string]string{"net.ctl": "ctl"}, + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/proc/self/ns/net", + }, + }, + ), + } + + validator := validate.New() + err := validator.Validate(config) + if err == nil { + t.Error("Expected error to occur but it was nil") + } +} + +func TestValidateSysctlWithoutNETNamespace(t *testing.T) { + config := &configs.Config{ + Rootfs: "/var", + Sysctl: map[string]string{"net.ctl": "ctl"}, + Namespaces: []configs.Namespace{}, + } + + validator := validate.New() + err := validator.Validate(config) + if err == nil { + t.Error("Expected error to occur but it was nil") + } +} diff --git a/libcontainer/console_linux.go b/libcontainer/console_linux.go new file mode 100644 index 0000000..9997e93 --- /dev/null +++ b/libcontainer/console_linux.go @@ -0,0 +1,41 @@ +package libcontainer + +import ( + "os" + + "golang.org/x/sys/unix" +) + +// mount initializes the console inside the rootfs mounting with the specified mount label +// and applying the correct ownership of the console. +func mountConsole(slavePath string) error { + oldMask := unix.Umask(0000) + defer unix.Umask(oldMask) + f, err := os.Create("/dev/console") + if err != nil && !os.IsExist(err) { + return err + } + if f != nil { + f.Close() + } + return unix.Mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "") +} + +// dupStdio opens the slavePath for the console and dups the fds to the current +// processes stdio, fd 0,1,2. +func dupStdio(slavePath string) error { + fd, err := unix.Open(slavePath, unix.O_RDWR, 0) + if err != nil { + return &os.PathError{ + Op: "open", + Path: slavePath, + Err: err, + } + } + for _, i := range []int{0, 1, 2} { + if err := unix.Dup3(fd, i, 0); err != nil { + return err + } + } + return nil +} diff --git a/libcontainer/container.go b/libcontainer/container.go new file mode 100644 index 0000000..ba7541c --- /dev/null +++ b/libcontainer/container.go @@ -0,0 +1,173 @@ +// Package libcontainer provides a native Go implementation for creating containers +// with namespaces, cgroups, capabilities, and filesystem access controls. +// It allows you to manage the lifecycle of the container performing additional operations +// after the container is created. +package libcontainer + +import ( + "os" + "time" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runtime-spec/specs-go" +) + +// Status is the status of a container. +type Status int + +const ( + // Created is the status that denotes the container exists but has not been run yet. + Created Status = iota + // Running is the status that denotes the container exists and is running. + Running + // Pausing is the status that denotes the container exists, it is in the process of being paused. + Pausing + // Paused is the status that denotes the container exists, but all its processes are paused. + Paused + // Stopped is the status that denotes the container does not have a created or running process. + Stopped +) + +func (s Status) String() string { + switch s { + case Created: + return "created" + case Running: + return "running" + case Pausing: + return "pausing" + case Paused: + return "paused" + case Stopped: + return "stopped" + default: + return "unknown" + } +} + +// BaseState represents the platform agnostic pieces relating to a +// running container's state +type BaseState struct { + // ID is the container ID. + ID string `json:"id"` + + // InitProcessPid is the init process id in the parent namespace. + InitProcessPid int `json:"init_process_pid"` + + // InitProcessStartTime is the init process start time in clock cycles since boot time. + InitProcessStartTime uint64 `json:"init_process_start"` + + // Created is the unix timestamp for the creation time of the container in UTC + Created time.Time `json:"created"` + + // Config is the container's configuration. + Config configs.Config `json:"config"` +} + +// BaseContainer is a libcontainer container object. +// +// Each container is thread-safe within the same process. Since a container can +// be destroyed by a separate process, any function may return that the container +// was not found. BaseContainer includes methods that are platform agnostic. +type BaseContainer interface { + // Returns the ID of the container + ID() string + + // Returns the current status of the container. + // + // errors: + // ContainerNotExists - Container no longer exists, + // Systemerror - System error. + Status() (Status, error) + + // State returns the current container's state information. + // + // errors: + // SystemError - System error. + State() (*State, error) + + // OCIState returns the current container's state information. + // + // errors: + // SystemError - System error. + OCIState() (*specs.State, error) + + // Returns the current config of the container. + Config() configs.Config + + // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. + // + // errors: + // ContainerNotExists - Container no longer exists, + // Systemerror - System error. + // + // Some of the returned PIDs may no longer refer to processes in the Container, unless + // the Container state is PAUSED in which case every PID in the slice is valid. + Processes() ([]int, error) + + // Returns statistics for the container. + // + // errors: + // ContainerNotExists - Container no longer exists, + // Systemerror - System error. + Stats() (*Stats, error) + + // Set resources of container as configured + // + // We can use this to change resources when containers are running. + // + // errors: + // SystemError - System error. + Set(config configs.Config) error + + // Start a process inside the container. Returns error if process fails to + // start. You can track process lifecycle with passed Process structure. + // + // errors: + // ContainerNotExists - Container no longer exists, + // ConfigInvalid - config is invalid, + // ContainerPaused - Container is paused, + // SystemError - System error. + Start(process *Process) (err error) + + // Run immediately starts the process inside the container. Returns error if process + // fails to start. It does not block waiting for the exec fifo after start returns but + // opens the fifo after start returns. + // + // errors: + // ContainerNotExists - Container no longer exists, + // ConfigInvalid - config is invalid, + // ContainerPaused - Container is paused, + // SystemError - System error. + Run(process *Process) (err error) + + // Destroys the container, if its in a valid state, after killing any + // remaining running processes. + // + // Any event registrations are removed before the container is destroyed. + // No error is returned if the container is already destroyed. + // + // Running containers must first be stopped using Signal(..). + // Paused containers must first be resumed using Resume(..). + // + // errors: + // ContainerNotStopped - Container is still running, + // ContainerPaused - Container is paused, + // SystemError - System error. + Destroy() error + + // Signal sends the provided signal code to the container's initial process. + // + // If all is specified the signal is sent to all processes in the container + // including the initial process. + // + // errors: + // SystemError - System error. + Signal(s os.Signal, all bool) error + + // Exec signals the container to exec the users process at the end of the init. + // + // errors: + // SystemError - System error. + Exec() error +} diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go new file mode 100644 index 0000000..fe70c93 --- /dev/null +++ b/libcontainer/container_linux.go @@ -0,0 +1,2060 @@ +// +build linux + +package libcontainer + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "io/ioutil" + "net" + "os" + "os/exec" + "path/filepath" + "reflect" + "strings" + "sync" + "syscall" // only for SysProcAttr and Signal + "time" + + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/opencontainers/runtime-spec/specs-go" + + criurpc "github.com/checkpoint-restore/go-criu/rpc" + "github.com/golang/protobuf/proto" + "github.com/sirupsen/logrus" + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +const stdioFdCount = 3 + +type linuxContainer struct { + id string + root string + config *configs.Config + cgroupManager cgroups.Manager + intelRdtManager intelrdt.Manager + initPath string + initArgs []string + initProcess parentProcess + initProcessStartTime uint64 + criuPath string + newuidmapPath string + newgidmapPath string + m sync.Mutex + criuVersion int + state containerState + created time.Time +} + +// State represents a running container's state +type State struct { + BaseState + + // Platform specific fields below here + + // Specified if the container was started under the rootless mode. + // Set to true if BaseState.Config.RootlessEUID && BaseState.Config.RootlessCgroups + Rootless bool `json:"rootless"` + + // Path to all the cgroups setup for a container. Key is cgroup subsystem name + // with the value as the path. + CgroupPaths map[string]string `json:"cgroup_paths"` + + // NamespacePaths are filepaths to the container's namespaces. Key is the namespace type + // with the value as the path. + NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"` + + // Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore + ExternalDescriptors []string `json:"external_descriptors,omitempty"` + + // Intel RDT "resource control" filesystem path + IntelRdtPath string `json:"intel_rdt_path"` +} + +// Container is a libcontainer container object. +// +// Each container is thread-safe within the same process. Since a container can +// be destroyed by a separate process, any function may return that the container +// was not found. +type Container interface { + BaseContainer + + // Methods below here are platform specific + + // Checkpoint checkpoints the running container's state to disk using the criu(8) utility. + // + // errors: + // Systemerror - System error. + Checkpoint(criuOpts *CriuOpts) error + + // Restore restores the checkpointed container to a running state using the criu(8) utility. + // + // errors: + // Systemerror - System error. + Restore(process *Process, criuOpts *CriuOpts) error + + // If the Container state is RUNNING or CREATED, sets the Container state to PAUSING and pauses + // the execution of any user processes. Asynchronously, when the container finished being paused the + // state is changed to PAUSED. + // If the Container state is PAUSED, do nothing. + // + // errors: + // ContainerNotExists - Container no longer exists, + // ContainerNotRunning - Container not running or created, + // Systemerror - System error. + Pause() error + + // If the Container state is PAUSED, resumes the execution of any user processes in the + // Container before setting the Container state to RUNNING. + // If the Container state is RUNNING, do nothing. + // + // errors: + // ContainerNotExists - Container no longer exists, + // ContainerNotPaused - Container is not paused, + // Systemerror - System error. + Resume() error + + // NotifyOOM returns a read-only channel signaling when the container receives an OOM notification. + // + // errors: + // Systemerror - System error. + NotifyOOM() (<-chan struct{}, error) + + // NotifyMemoryPressure returns a read-only channel signaling when the container reaches a given pressure level + // + // errors: + // Systemerror - System error. + NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) +} + +// ID returns the container's unique ID +func (c *linuxContainer) ID() string { + return c.id +} + +// Config returns the container's configuration +func (c *linuxContainer) Config() configs.Config { + return *c.config +} + +func (c *linuxContainer) Status() (Status, error) { + c.m.Lock() + defer c.m.Unlock() + return c.currentStatus() +} + +func (c *linuxContainer) State() (*State, error) { + c.m.Lock() + defer c.m.Unlock() + return c.currentState() +} + +func (c *linuxContainer) OCIState() (*specs.State, error) { + c.m.Lock() + defer c.m.Unlock() + return c.currentOCIState() +} + +func (c *linuxContainer) Processes() ([]int, error) { + pids, err := c.cgroupManager.GetAllPids() + if err != nil { + return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups") + } + return pids, nil +} + +func (c *linuxContainer) Stats() (*Stats, error) { + var ( + err error + stats = &Stats{} + ) + if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { + return stats, newSystemErrorWithCause(err, "getting container stats from cgroups") + } + if c.intelRdtManager != nil { + if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil { + return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats") + } + } + for _, iface := range c.config.Networks { + switch iface.Type { + case "veth": + istats, err := getNetworkInterfaceStats(iface.HostInterfaceName) + if err != nil { + return stats, newSystemErrorWithCausef(err, "getting network stats for interface %q", iface.HostInterfaceName) + } + stats.Interfaces = append(stats.Interfaces, istats) + } + } + return stats, nil +} + +func (c *linuxContainer) Set(config configs.Config) error { + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() + if err != nil { + return err + } + if status == Stopped { + return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning) + } + if err := c.cgroupManager.Set(&config); err != nil { + // Set configs back + if err2 := c.cgroupManager.Set(c.config); err2 != nil { + logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2) + } + return err + } + if c.intelRdtManager != nil { + if err := c.intelRdtManager.Set(&config); err != nil { + // Set configs back + if err2 := c.intelRdtManager.Set(c.config); err2 != nil { + logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2) + } + return err + } + } + // After config setting succeed, update config and states + c.config = &config + _, err = c.updateState(nil) + return err +} + +func (c *linuxContainer) Start(process *Process) error { + c.m.Lock() + defer c.m.Unlock() + if process.Init { + if err := c.createExecFifo(); err != nil { + return err + } + } + if err := c.start(process); err != nil { + if process.Init { + c.deleteExecFifo() + } + return err + } + return nil +} + +func (c *linuxContainer) Run(process *Process) error { + if err := c.Start(process); err != nil { + return err + } + if process.Init { + return c.exec() + } + return nil +} + +func (c *linuxContainer) Exec() error { + c.m.Lock() + defer c.m.Unlock() + return c.exec() +} + +func (c *linuxContainer) exec() error { + path := filepath.Join(c.root, execFifoFilename) + pid := c.initProcess.pid() + blockingFifoOpenCh := awaitFifoOpen(path) + for { + select { + case result := <-blockingFifoOpenCh: + return handleFifoResult(result) + + case <-time.After(time.Millisecond * 100): + stat, err := system.Stat(pid) + if err != nil || stat.State == system.Zombie { + // could be because process started, ran, and completed between our 100ms timeout and our system.Stat() check. + // see if the fifo exists and has data (with a non-blocking open, which will succeed if the writing process is complete). + if err := handleFifoResult(fifoOpen(path, false)); err != nil { + return errors.New("container process is already dead") + } + return nil + } + } + } +} + +func readFromExecFifo(execFifo io.Reader) error { + data, err := ioutil.ReadAll(execFifo) + if err != nil { + return err + } + if len(data) <= 0 { + return fmt.Errorf("cannot start an already running container") + } + return nil +} + +func awaitFifoOpen(path string) <-chan openResult { + fifoOpened := make(chan openResult) + go func() { + result := fifoOpen(path, true) + fifoOpened <- result + }() + return fifoOpened +} + +func fifoOpen(path string, block bool) openResult { + flags := os.O_RDONLY + if !block { + flags |= syscall.O_NONBLOCK + } + f, err := os.OpenFile(path, flags, 0) + if err != nil { + return openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")} + } + return openResult{file: f} +} + +func handleFifoResult(result openResult) error { + if result.err != nil { + return result.err + } + f := result.file + defer f.Close() + if err := readFromExecFifo(f); err != nil { + return err + } + return os.Remove(f.Name()) +} + +type openResult struct { + file *os.File + err error +} + +func (c *linuxContainer) start(process *Process) error { + parent, err := c.newParentProcess(process) + if err != nil { + return newSystemErrorWithCause(err, "creating new parent process") + } + parent.forwardChildLogs() + if err := parent.start(); err != nil { + // terminate the process to ensure that it properly is reaped. + if err := ignoreTerminateErrors(parent.terminate()); err != nil { + logrus.Warn(err) + } + return newSystemErrorWithCause(err, "starting container process") + } + // generate a timestamp indicating when the container was started + c.created = time.Now().UTC() + if process.Init { + c.state = &createdState{ + c: c, + } + state, err := c.updateState(parent) + if err != nil { + return err + } + c.initProcessStartTime = state.InitProcessStartTime + + if c.config.Hooks != nil { + s, err := c.currentOCIState() + if err != nil { + return err + } + for i, hook := range c.config.Hooks.Poststart { + if err := hook.Run(s); err != nil { + if err := ignoreTerminateErrors(parent.terminate()); err != nil { + logrus.Warn(err) + } + return newSystemErrorWithCausef(err, "running poststart hook %d", i) + } + } + } + } + return nil +} + +func (c *linuxContainer) Signal(s os.Signal, all bool) error { + if all { + return signalAllProcesses(c.cgroupManager, s) + } + status, err := c.currentStatus() + if err != nil { + return err + } + // to avoid a PID reuse attack + if status == Running || status == Created || status == Paused { + if err := c.initProcess.signal(s); err != nil { + return newSystemErrorWithCause(err, "signaling init process") + } + return nil + } + return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning) +} + +func (c *linuxContainer) createExecFifo() error { + rootuid, err := c.Config().HostRootUID() + if err != nil { + return err + } + rootgid, err := c.Config().HostRootGID() + if err != nil { + return err + } + + fifoName := filepath.Join(c.root, execFifoFilename) + if _, err := os.Stat(fifoName); err == nil { + return fmt.Errorf("exec fifo %s already exists", fifoName) + } + oldMask := unix.Umask(0000) + if err := unix.Mkfifo(fifoName, 0622); err != nil { + unix.Umask(oldMask) + return err + } + unix.Umask(oldMask) + return os.Chown(fifoName, rootuid, rootgid) +} + +func (c *linuxContainer) deleteExecFifo() { + fifoName := filepath.Join(c.root, execFifoFilename) + os.Remove(fifoName) +} + +// includeExecFifo opens the container's execfifo as a pathfd, so that the +// container cannot access the statedir (and the FIFO itself remains +// un-opened). It then adds the FifoFd to the given exec.Cmd as an inherited +// fd, with _LIBCONTAINER_FIFOFD set to its fd number. +func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error { + fifoName := filepath.Join(c.root, execFifoFilename) + fifoFd, err := unix.Open(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + return err + } + + cmd.ExtraFiles = append(cmd.ExtraFiles, os.NewFile(uintptr(fifoFd), fifoName)) + cmd.Env = append(cmd.Env, + fmt.Sprintf("_LIBCONTAINER_FIFOFD=%d", stdioFdCount+len(cmd.ExtraFiles)-1)) + return nil +} + +func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) { + parentInitPipe, childInitPipe, err := utils.NewSockPair("init") + if err != nil { + return nil, newSystemErrorWithCause(err, "creating new init pipe") + } + messageSockPair := filePair{parentInitPipe, childInitPipe} + + parentLogPipe, childLogPipe, err := os.Pipe() + if err != nil { + return nil, fmt.Errorf("Unable to create the log pipe: %s", err) + } + logFilePair := filePair{parentLogPipe, childLogPipe} + + cmd, err := c.commandTemplate(p, childInitPipe, childLogPipe) + if err != nil { + return nil, newSystemErrorWithCause(err, "creating new command template") + } + if !p.Init { + return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair) + } + + // We only set up fifoFd if we're not doing a `runc exec`. The historic + // reason for this is that previously we would pass a dirfd that allowed + // for container rootfs escape (and not doing it in `runc exec` avoided + // that problem), but we no longer do that. However, there's no need to do + // this for `runc exec` so we just keep it this way to be safe. + if err := c.includeExecFifo(cmd); err != nil { + return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup") + } + return c.newInitProcess(p, cmd, messageSockPair, logFilePair) +} + +func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) (*exec.Cmd, error) { + cmd := exec.Command(c.initPath, c.initArgs[1:]...) + cmd.Args[0] = c.initArgs[0] + cmd.Stdin = p.Stdin + cmd.Stdout = p.Stdout + cmd.Stderr = p.Stderr + cmd.Dir = c.config.Rootfs + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &syscall.SysProcAttr{} + } + cmd.Env = append(cmd.Env, fmt.Sprintf("GOMAXPROCS=%s", os.Getenv("GOMAXPROCS"))) + cmd.ExtraFiles = append(cmd.ExtraFiles, p.ExtraFiles...) + if p.ConsoleSocket != nil { + cmd.ExtraFiles = append(cmd.ExtraFiles, p.ConsoleSocket) + cmd.Env = append(cmd.Env, + fmt.Sprintf("_LIBCONTAINER_CONSOLE=%d", stdioFdCount+len(cmd.ExtraFiles)-1), + ) + } + cmd.ExtraFiles = append(cmd.ExtraFiles, childInitPipe) + cmd.Env = append(cmd.Env, + fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1), + fmt.Sprintf("_LIBCONTAINER_STATEDIR=%s", c.root), + ) + + cmd.ExtraFiles = append(cmd.ExtraFiles, childLogPipe) + cmd.Env = append(cmd.Env, + fmt.Sprintf("_LIBCONTAINER_LOGPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1), + fmt.Sprintf("_LIBCONTAINER_LOGLEVEL=%s", p.LogLevel), + ) + + // NOTE: when running a container with no PID namespace and the parent process spawning the container is + // PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason + // even with the parent still running. + if c.config.ParentDeathSignal > 0 { + cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal) + } + return cmd, nil +} + +func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) { + cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard)) + nsMaps := make(map[configs.NamespaceType]string) + for _, ns := range c.config.Namespaces { + if ns.Path != "" { + nsMaps[ns.Type] = ns.Path + } + } + _, sharePidns := nsMaps[configs.NEWPID] + data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps) + if err != nil { + return nil, err + } + init := &initProcess{ + cmd: cmd, + messageSockPair: messageSockPair, + logFilePair: logFilePair, + manager: c.cgroupManager, + intelRdtManager: c.intelRdtManager, + config: c.newInitConfig(p), + container: c, + process: p, + bootstrapData: data, + sharePidns: sharePidns, + } + c.initProcess = init + return init, nil +} + +func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*setnsProcess, error) { + cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns)) + state, err := c.currentState() + if err != nil { + return nil, newSystemErrorWithCause(err, "getting container's current state") + } + // for setns process, we don't have to set cloneflags as the process namespaces + // will only be set via setns syscall + data, err := c.bootstrapData(0, state.NamespacePaths) + if err != nil { + return nil, err + } + return &setnsProcess{ + cmd: cmd, + cgroupPaths: c.cgroupManager.GetPaths(), + rootlessCgroups: c.config.RootlessCgroups, + intelRdtPath: state.IntelRdtPath, + messageSockPair: messageSockPair, + logFilePair: logFilePair, + config: c.newInitConfig(p), + process: p, + bootstrapData: data, + }, nil +} + +func (c *linuxContainer) newInitConfig(process *Process) *initConfig { + cfg := &initConfig{ + Config: c.config, + Args: process.Args, + Env: process.Env, + User: process.User, + AdditionalGroups: process.AdditionalGroups, + Cwd: process.Cwd, + Capabilities: process.Capabilities, + PassedFilesCount: len(process.ExtraFiles), + ContainerId: c.ID(), + NoNewPrivileges: c.config.NoNewPrivileges, + RootlessEUID: c.config.RootlessEUID, + RootlessCgroups: c.config.RootlessCgroups, + AppArmorProfile: c.config.AppArmorProfile, + ProcessLabel: c.config.ProcessLabel, + Rlimits: c.config.Rlimits, + } + if process.NoNewPrivileges != nil { + cfg.NoNewPrivileges = *process.NoNewPrivileges + } + if process.AppArmorProfile != "" { + cfg.AppArmorProfile = process.AppArmorProfile + } + if process.Label != "" { + cfg.ProcessLabel = process.Label + } + if len(process.Rlimits) > 0 { + cfg.Rlimits = process.Rlimits + } + cfg.CreateConsole = process.ConsoleSocket != nil + cfg.ConsoleWidth = process.ConsoleWidth + cfg.ConsoleHeight = process.ConsoleHeight + return cfg +} + +func (c *linuxContainer) Destroy() error { + c.m.Lock() + defer c.m.Unlock() + return c.state.destroy() +} + +func (c *linuxContainer) Pause() error { + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() + if err != nil { + return err + } + switch status { + case Running, Created: + if err := c.cgroupManager.Freeze(configs.Frozen); err != nil { + return err + } + return c.state.transition(&pausedState{ + c: c, + }) + } + return newGenericError(fmt.Errorf("container not running or created: %s", status), ContainerNotRunning) +} + +func (c *linuxContainer) Resume() error { + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() + if err != nil { + return err + } + if status != Paused { + return newGenericError(fmt.Errorf("container not paused"), ContainerNotPaused) + } + if err := c.cgroupManager.Freeze(configs.Thawed); err != nil { + return err + } + return c.state.transition(&runningState{ + c: c, + }) +} + +func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { + // XXX(cyphar): This requires cgroups. + if c.config.RootlessCgroups { + logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups") + } + return notifyOnOOM(c.cgroupManager.GetPaths()) +} + +func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) { + // XXX(cyphar): This requires cgroups. + if c.config.RootlessCgroups { + logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups") + } + return notifyMemoryPressure(c.cgroupManager.GetPaths(), level) +} + +var criuFeatures *criurpc.CriuFeatures + +func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.CriuOpts, criuFeat *criurpc.CriuFeatures) error { + + var t criurpc.CriuReqType + t = criurpc.CriuReqType_FEATURE_CHECK + + // criu 1.8 => 10800 + if err := c.checkCriuVersion(10800); err != nil { + // Feature checking was introduced with CRIU 1.8. + // Ignore the feature check if an older CRIU version is used + // and just act as before. + // As all automated PR testing is done using CRIU 1.7 this + // code will not be tested by automated PR testing. + return nil + } + + // make sure the features we are looking for are really not from + // some previous check + criuFeatures = nil + + req := &criurpc.CriuReq{ + Type: &t, + // Theoretically this should not be necessary but CRIU + // segfaults if Opts is empty. + // Fixed in CRIU 2.12 + Opts: rpcOpts, + Features: criuFeat, + } + + err := c.criuSwrk(nil, req, criuOpts, false, nil) + if err != nil { + logrus.Debugf("%s", err) + return fmt.Errorf("CRIU feature check failed") + } + + logrus.Debugf("Feature check says: %s", criuFeatures) + missingFeatures := false + + // The outer if checks if the fields actually exist + if (criuFeat.MemTrack != nil) && + (criuFeatures.MemTrack != nil) { + // The inner if checks if they are set to true + if *criuFeat.MemTrack && !*criuFeatures.MemTrack { + missingFeatures = true + logrus.Debugf("CRIU does not support MemTrack") + } + } + + // This needs to be repeated for every new feature check. + // Is there a way to put this in a function. Reflection? + if (criuFeat.LazyPages != nil) && + (criuFeatures.LazyPages != nil) { + if *criuFeat.LazyPages && !*criuFeatures.LazyPages { + missingFeatures = true + logrus.Debugf("CRIU does not support LazyPages") + } + } + + if missingFeatures { + return fmt.Errorf("CRIU is missing features") + } + + return nil +} + +func parseCriuVersion(path string) (int, error) { + var x, y, z int + + out, err := exec.Command(path, "-V").Output() + if err != nil { + return 0, fmt.Errorf("Unable to execute CRIU command: %s", path) + } + + x = 0 + y = 0 + z = 0 + if ep := strings.Index(string(out), "-"); ep >= 0 { + // criu Git version format + var version string + if sp := strings.Index(string(out), "GitID"); sp > 0 { + version = string(out)[sp:ep] + } else { + return 0, fmt.Errorf("Unable to parse the CRIU version: %s", path) + } + + n, err := fmt.Sscanf(version, "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2 + if err != nil { + n, err = fmt.Sscanf(version, "GitID: v%d.%d", &x, &y) // 1.6 + y++ + } else { + z++ + } + if n < 2 || err != nil { + return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", version, n, err) + } + } else { + // criu release version format + n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2 + if err != nil { + n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6 + } + if n < 2 || err != nil { + return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err) + } + } + + return x*10000 + y*100 + z, nil +} + +func compareCriuVersion(criuVersion int, minVersion int) error { + // simple function to perform the actual version compare + if criuVersion < minVersion { + return fmt.Errorf("CRIU version %d must be %d or higher", criuVersion, minVersion) + } + + return nil +} + +// This is used to store the result of criu version RPC +var criuVersionRPC *criurpc.CriuVersion + +// checkCriuVersion checks Criu version greater than or equal to minVersion +func (c *linuxContainer) checkCriuVersion(minVersion int) error { + + // If the version of criu has already been determined there is no need + // to ask criu for the version again. Use the value from c.criuVersion. + if c.criuVersion != 0 { + return compareCriuVersion(c.criuVersion, minVersion) + } + + // First try if this version of CRIU support the version RPC. + // The CRIU version RPC was introduced with CRIU 3.0. + + // First, reset the variable for the RPC answer to nil + criuVersionRPC = nil + + var t criurpc.CriuReqType + t = criurpc.CriuReqType_VERSION + req := &criurpc.CriuReq{ + Type: &t, + } + + err := c.criuSwrk(nil, req, nil, false, nil) + if err != nil { + return fmt.Errorf("CRIU version check failed: %s", err) + } + + if criuVersionRPC != nil { + logrus.Debugf("CRIU version: %s", criuVersionRPC) + // major and minor are always set + c.criuVersion = int(*criuVersionRPC.Major) * 10000 + c.criuVersion += int(*criuVersionRPC.Minor) * 100 + if criuVersionRPC.Sublevel != nil { + c.criuVersion += int(*criuVersionRPC.Sublevel) + } + if criuVersionRPC.Gitid != nil { + // runc's convention is that a CRIU git release is + // always the same as increasing the minor by 1 + c.criuVersion -= (c.criuVersion % 100) + c.criuVersion += 100 + } + return compareCriuVersion(c.criuVersion, minVersion) + } + + // This is CRIU without the version RPC and therefore + // older than 3.0. Parsing the output is required. + + // This can be remove once runc does not work with criu older than 3.0 + + c.criuVersion, err = parseCriuVersion(c.criuPath) + if err != nil { + return err + } + + return compareCriuVersion(c.criuVersion, minVersion) +} + +const descriptorsFilename = "descriptors.json" + +func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) { + mountDest := m.Destination + if strings.HasPrefix(mountDest, c.config.Rootfs) { + mountDest = mountDest[len(c.config.Rootfs):] + } + + extMnt := &criurpc.ExtMountMap{ + Key: proto.String(mountDest), + Val: proto.String(mountDest), + } + req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) +} + +func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error { + for _, path := range c.config.MaskPaths { + fi, err := os.Stat(fmt.Sprintf("/proc/%d/root/%s", c.initProcess.pid(), path)) + if err != nil { + if os.IsNotExist(err) { + continue + } + return err + } + if fi.IsDir() { + continue + } + + extMnt := &criurpc.ExtMountMap{ + Key: proto.String(path), + Val: proto.String("/dev/null"), + } + req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) + } + return nil +} + +func waitForCriuLazyServer(r *os.File, status string) error { + + data := make([]byte, 1) + _, err := r.Read(data) + if err != nil { + return err + } + fd, err := os.OpenFile(status, os.O_TRUNC|os.O_WRONLY, os.ModeAppend) + if err != nil { + return err + } + _, err = fd.Write(data) + if err != nil { + return err + } + fd.Close() + + return nil +} + +func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) { + // CRIU will evaluate a configuration starting with release 3.11. + // Settings in the configuration file will overwrite RPC settings. + // Look for annotations. The annotation 'org.criu.config' + // specifies if CRIU should use a different, container specific + // configuration file. + _, annotations := utils.Annotations(c.config.Labels) + configFile, exists := annotations["org.criu.config"] + if exists { + // If the annotation 'org.criu.config' exists and is set + // to a non-empty string, tell CRIU to use that as a + // configuration file. If the file does not exist, CRIU + // will just ignore it. + if configFile != "" { + rpcOpts.ConfigFile = proto.String(configFile) + } + // If 'org.criu.config' exists and is set to an empty + // string, a runc specific CRIU configuration file will + // be not set at all. + } else { + // If the mentioned annotation has not been found, specify + // a default CRIU configuration file. + rpcOpts.ConfigFile = proto.String("/etc/criu/runc.conf") + } +} + +func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { + c.m.Lock() + defer c.m.Unlock() + + // Checkpoint is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS(). + // (CLI prints a warning) + // TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has + // support for doing unprivileged dumps, but the setup of + // rootless containers might make this complicated. + + // criu 1.5.2 => 10502 + if err := c.checkCriuVersion(10502); err != nil { + return err + } + + if criuOpts.ImagesDirectory == "" { + return fmt.Errorf("invalid directory to save checkpoint") + } + + // Since a container can be C/R'ed multiple times, + // the checkpoint directory may already exist. + if err := os.Mkdir(criuOpts.ImagesDirectory, 0700); err != nil && !os.IsExist(err) { + return err + } + + if criuOpts.WorkDirectory == "" { + criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work") + } + + if err := os.Mkdir(criuOpts.WorkDirectory, 0700); err != nil && !os.IsExist(err) { + return err + } + + workDir, err := os.Open(criuOpts.WorkDirectory) + if err != nil { + return err + } + defer workDir.Close() + + imageDir, err := os.Open(criuOpts.ImagesDirectory) + if err != nil { + return err + } + defer imageDir.Close() + + rpcOpts := criurpc.CriuOpts{ + ImagesDirFd: proto.Int32(int32(imageDir.Fd())), + WorkDirFd: proto.Int32(int32(workDir.Fd())), + LogLevel: proto.Int32(4), + LogFile: proto.String("dump.log"), + Root: proto.String(c.config.Rootfs), + ManageCgroups: proto.Bool(true), + NotifyScripts: proto.Bool(true), + Pid: proto.Int32(int32(c.initProcess.pid())), + ShellJob: proto.Bool(criuOpts.ShellJob), + LeaveRunning: proto.Bool(criuOpts.LeaveRunning), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + FileLocks: proto.Bool(criuOpts.FileLocks), + EmptyNs: proto.Uint32(criuOpts.EmptyNs), + OrphanPtsMaster: proto.Bool(true), + AutoDedup: proto.Bool(criuOpts.AutoDedup), + LazyPages: proto.Bool(criuOpts.LazyPages), + } + + c.handleCriuConfigurationFile(&rpcOpts) + + // If the container is running in a network namespace and has + // a path to the network namespace configured, we will dump + // that network namespace as an external namespace and we + // will expect that the namespace exists during restore. + // This basically means that CRIU will ignore the namespace + // and expect to be setup correctly. + nsPath := c.config.Namespaces.PathOf(configs.NEWNET) + if nsPath != "" { + // For this to work we need at least criu 3.11.0 => 31100. + // As there was already a successful version check we will + // not error out if it fails. runc will just behave as it used + // to do and ignore external network namespaces. + err := c.checkCriuVersion(31100) + if err == nil { + // CRIU expects the information about an external namespace + // like this: --external net[]: + // This is always 'extRootNetNS'. + var netns syscall.Stat_t + err = syscall.Stat(nsPath, &netns) + if err != nil { + return err + } + criuExternal := fmt.Sprintf("net[%d]:extRootNetNS", netns.Ino) + rpcOpts.External = append(rpcOpts.External, criuExternal) + } + } + + fcg := c.cgroupManager.GetPaths()["freezer"] + if fcg != "" { + rpcOpts.FreezeCgroup = proto.String(fcg) + } + + // append optional criu opts, e.g., page-server and port + if criuOpts.PageServer.Address != "" && criuOpts.PageServer.Port != 0 { + rpcOpts.Ps = &criurpc.CriuPageServerInfo{ + Address: proto.String(criuOpts.PageServer.Address), + Port: proto.Int32(criuOpts.PageServer.Port), + } + } + + //pre-dump may need parentImage param to complete iterative migration + if criuOpts.ParentImage != "" { + rpcOpts.ParentImg = proto.String(criuOpts.ParentImage) + rpcOpts.TrackMem = proto.Bool(true) + } + + // append optional manage cgroups mode + if criuOpts.ManageCgroupsMode != 0 { + // criu 1.7 => 10700 + if err := c.checkCriuVersion(10700); err != nil { + return err + } + mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode) + rpcOpts.ManageCgroupsMode = &mode + } + + var t criurpc.CriuReqType + if criuOpts.PreDump { + feat := criurpc.CriuFeatures{ + MemTrack: proto.Bool(true), + } + + if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil { + return err + } + + t = criurpc.CriuReqType_PRE_DUMP + } else { + t = criurpc.CriuReqType_DUMP + } + req := &criurpc.CriuReq{ + Type: &t, + Opts: &rpcOpts, + } + + if criuOpts.LazyPages { + // lazy migration requested; check if criu supports it + feat := criurpc.CriuFeatures{ + LazyPages: proto.Bool(true), + } + + if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil { + return err + } + + statusRead, statusWrite, err := os.Pipe() + if err != nil { + return err + } + rpcOpts.StatusFd = proto.Int32(int32(statusWrite.Fd())) + go waitForCriuLazyServer(statusRead, criuOpts.StatusFd) + } + + //no need to dump these information in pre-dump + if !criuOpts.PreDump { + for _, m := range c.config.Mounts { + switch m.Device { + case "bind": + c.addCriuDumpMount(req, m) + case "cgroup": + binds, err := getCgroupMounts(m) + if err != nil { + return err + } + for _, b := range binds { + c.addCriuDumpMount(req, b) + } + } + } + + if err := c.addMaskPaths(req); err != nil { + return err + } + + for _, node := range c.config.Devices { + m := &configs.Mount{Destination: node.Path, Source: node.Path} + c.addCriuDumpMount(req, m) + } + + // Write the FD info to a file in the image directory + fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors()) + if err != nil { + return err + } + + err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0600) + if err != nil { + return err + } + } + + err = c.criuSwrk(nil, req, criuOpts, false, nil) + if err != nil { + return err + } + return nil +} + +func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mount) { + mountDest := m.Destination + if strings.HasPrefix(mountDest, c.config.Rootfs) { + mountDest = mountDest[len(c.config.Rootfs):] + } + + extMnt := &criurpc.ExtMountMap{ + Key: proto.String(mountDest), + Val: proto.String(m.Source), + } + req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) +} + +func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) { + for _, iface := range c.config.Networks { + switch iface.Type { + case "veth": + veth := new(criurpc.CriuVethPair) + veth.IfOut = proto.String(iface.HostInterfaceName) + veth.IfIn = proto.String(iface.Name) + req.Opts.Veths = append(req.Opts.Veths, veth) + case "loopback": + // Do nothing + } + } + for _, i := range criuOpts.VethPairs { + veth := new(criurpc.CriuVethPair) + veth.IfOut = proto.String(i.HostInterfaceName) + veth.IfIn = proto.String(i.ContainerInterfaceName) + req.Opts.Veths = append(req.Opts.Veths, veth) + } +} + +// makeCriuRestoreMountpoints makes the actual mountpoints for the +// restore using CRIU. This function is inspired from the code in +// rootfs_linux.go +func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error { + switch m.Device { + case "cgroup": + // Do nothing for cgroup, CRIU should handle it + case "bind": + // The prepareBindMount() function checks if source + // exists. So it cannot be used for other filesystem types. + if err := prepareBindMount(m, c.config.Rootfs); err != nil { + return err + } + default: + // for all other file-systems just create the mountpoints + dest, err := securejoin.SecureJoin(c.config.Rootfs, m.Destination) + if err != nil { + return err + } + if err := checkProcMount(c.config.Rootfs, dest, ""); err != nil { + return err + } + m.Destination = dest + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + } + return nil +} + +// isPathInPrefixList is a small function for CRIU restore to make sure +// mountpoints, which are on a tmpfs, are not created in the roofs +func isPathInPrefixList(path string, prefix []string) bool { + for _, p := range prefix { + if strings.HasPrefix(path, p+"/") { + return false + } + } + return true +} + +// prepareCriuRestoreMounts tries to set up the rootfs of the +// container to be restored in the same way runc does it for +// initial container creation. Even for a read-only rootfs container +// runc modifies the rootfs to add mountpoints which do not exist. +// This function also creates missing mountpoints as long as they +// are not on top of a tmpfs, as CRIU will restore tmpfs content anyway. +func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error { + // First get a list of a all tmpfs mounts + tmpfs := []string{} + for _, m := range mounts { + switch m.Device { + case "tmpfs": + tmpfs = append(tmpfs, m.Destination) + } + } + // Now go through all mounts and create the mountpoints + // if the mountpoints are not on a tmpfs, as CRIU will + // restore the complete tmpfs content from its checkpoint. + for _, m := range mounts { + if isPathInPrefixList(m.Destination, tmpfs) { + if err := c.makeCriuRestoreMountpoints(m); err != nil { + return err + } + } + } + return nil +} + +func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { + c.m.Lock() + defer c.m.Unlock() + + var extraFiles []*os.File + + // Restore is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS(). + // (CLI prints a warning) + // TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have + // support for unprivileged restore at the moment. + + // criu 1.5.2 => 10502 + if err := c.checkCriuVersion(10502); err != nil { + return err + } + if criuOpts.WorkDirectory == "" { + criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work") + } + // Since a container can be C/R'ed multiple times, + // the work directory may already exist. + if err := os.Mkdir(criuOpts.WorkDirectory, 0700); err != nil && !os.IsExist(err) { + return err + } + workDir, err := os.Open(criuOpts.WorkDirectory) + if err != nil { + return err + } + defer workDir.Close() + if criuOpts.ImagesDirectory == "" { + return fmt.Errorf("invalid directory to restore checkpoint") + } + imageDir, err := os.Open(criuOpts.ImagesDirectory) + if err != nil { + return err + } + defer imageDir.Close() + // CRIU has a few requirements for a root directory: + // * it must be a mount point + // * its parent must not be overmounted + // c.config.Rootfs is bind-mounted to a temporary directory + // to satisfy these requirements. + root := filepath.Join(c.root, "criu-root") + if err := os.Mkdir(root, 0755); err != nil { + return err + } + defer os.Remove(root) + root, err = filepath.EvalSymlinks(root) + if err != nil { + return err + } + err = unix.Mount(c.config.Rootfs, root, "", unix.MS_BIND|unix.MS_REC, "") + if err != nil { + return err + } + defer unix.Unmount(root, unix.MNT_DETACH) + t := criurpc.CriuReqType_RESTORE + req := &criurpc.CriuReq{ + Type: &t, + Opts: &criurpc.CriuOpts{ + ImagesDirFd: proto.Int32(int32(imageDir.Fd())), + WorkDirFd: proto.Int32(int32(workDir.Fd())), + EvasiveDevices: proto.Bool(true), + LogLevel: proto.Int32(4), + LogFile: proto.String("restore.log"), + RstSibling: proto.Bool(true), + Root: proto.String(root), + ManageCgroups: proto.Bool(true), + NotifyScripts: proto.Bool(true), + ShellJob: proto.Bool(criuOpts.ShellJob), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), + FileLocks: proto.Bool(criuOpts.FileLocks), + EmptyNs: proto.Uint32(criuOpts.EmptyNs), + OrphanPtsMaster: proto.Bool(true), + AutoDedup: proto.Bool(criuOpts.AutoDedup), + LazyPages: proto.Bool(criuOpts.LazyPages), + }, + } + + c.handleCriuConfigurationFile(req.Opts) + + // Same as during checkpointing. If the container has a specific network namespace + // assigned to it, this now expects that the checkpoint will be restored in a + // already created network namespace. + nsPath := c.config.Namespaces.PathOf(configs.NEWNET) + if nsPath != "" { + // For this to work we need at least criu 3.11.0 => 31100. + // As there was already a successful version check we will + // not error out if it fails. runc will just behave as it used + // to do and ignore external network namespaces. + err := c.checkCriuVersion(31100) + if err == nil { + // CRIU wants the information about an existing network namespace + // like this: --inherit-fd fd[]: + // The needs to be the same as during checkpointing. + // We are always using 'extRootNetNS' as the key in this. + netns, err := os.Open(nsPath) + defer netns.Close() + if err != nil { + logrus.Errorf("If a specific network namespace is defined it must exist: %s", err) + return fmt.Errorf("Requested network namespace %v does not exist", nsPath) + } + inheritFd := new(criurpc.InheritFd) + inheritFd.Key = proto.String("extRootNetNS") + // The offset of four is necessary because 0, 1, 2 and 3 is already + // used by stdin, stdout, stderr, 'criu swrk' socket. + inheritFd.Fd = proto.Int32(int32(4 + len(extraFiles))) + req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd) + // All open FDs need to be transferred to CRIU via extraFiles + extraFiles = append(extraFiles, netns) + } + } + + // This will modify the rootfs of the container in the same way runc + // modifies the container during initial creation. + if err := c.prepareCriuRestoreMounts(c.config.Mounts); err != nil { + return err + } + + for _, m := range c.config.Mounts { + switch m.Device { + case "bind": + c.addCriuRestoreMount(req, m) + case "cgroup": + binds, err := getCgroupMounts(m) + if err != nil { + return err + } + for _, b := range binds { + c.addCriuRestoreMount(req, b) + } + } + } + + if len(c.config.MaskPaths) > 0 { + m := &configs.Mount{Destination: "/dev/null", Source: "/dev/null"} + c.addCriuRestoreMount(req, m) + } + + for _, node := range c.config.Devices { + m := &configs.Mount{Destination: node.Path, Source: node.Path} + c.addCriuRestoreMount(req, m) + } + + if criuOpts.EmptyNs&unix.CLONE_NEWNET == 0 { + c.restoreNetwork(req, criuOpts) + } + + // append optional manage cgroups mode + if criuOpts.ManageCgroupsMode != 0 { + // criu 1.7 => 10700 + if err := c.checkCriuVersion(10700); err != nil { + return err + } + mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode) + req.Opts.ManageCgroupsMode = &mode + } + + var ( + fds []string + fdJSON []byte + ) + if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil { + return err + } + + if err := json.Unmarshal(fdJSON, &fds); err != nil { + return err + } + for i := range fds { + if s := fds[i]; strings.Contains(s, "pipe:") { + inheritFd := new(criurpc.InheritFd) + inheritFd.Key = proto.String(s) + inheritFd.Fd = proto.Int32(int32(i)) + req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd) + } + } + return c.criuSwrk(process, req, criuOpts, true, extraFiles) +} + +func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { + // XXX: Do we need to deal with this case? AFAIK criu still requires root. + if err := c.cgroupManager.Apply(pid); err != nil { + return err + } + + if err := c.cgroupManager.Set(c.config); err != nil { + return newSystemError(err) + } + + path := fmt.Sprintf("/proc/%d/cgroup", pid) + cgroupsPaths, err := cgroups.ParseCgroupFile(path) + if err != nil { + return err + } + + for c, p := range cgroupsPaths { + cgroupRoot := &criurpc.CgroupRoot{ + Ctrl: proto.String(c), + Path: proto.String(p), + } + req.Opts.CgRoot = append(req.Opts.CgRoot, cgroupRoot) + } + + return nil +} + +func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool, extraFiles []*os.File) error { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0) + if err != nil { + return err + } + + var logPath string + if opts != nil { + logPath = filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile()) + } else { + // For the VERSION RPC 'opts' is set to 'nil' and therefore + // opts.WorkDirectory does not exist. Set logPath to "". + logPath = "" + } + criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client") + criuClientFileCon, err := net.FileConn(criuClient) + criuClient.Close() + if err != nil { + return err + } + + criuClientCon := criuClientFileCon.(*net.UnixConn) + defer criuClientCon.Close() + + criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server") + defer criuServer.Close() + + args := []string{"swrk", "3"} + if c.criuVersion != 0 { + // If the CRIU Version is still '0' then this is probably + // the initial CRIU run to detect the version. Skip it. + logrus.Debugf("Using CRIU %d at: %s", c.criuVersion, c.criuPath) + } + logrus.Debugf("Using CRIU with following args: %s", args) + cmd := exec.Command(c.criuPath, args...) + if process != nil { + cmd.Stdin = process.Stdin + cmd.Stdout = process.Stdout + cmd.Stderr = process.Stderr + } + cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer) + if extraFiles != nil { + cmd.ExtraFiles = append(cmd.ExtraFiles, extraFiles...) + } + + if err := cmd.Start(); err != nil { + return err + } + criuServer.Close() + + defer func() { + criuClientCon.Close() + _, err := cmd.Process.Wait() + if err != nil { + return + } + }() + + if applyCgroups { + err := c.criuApplyCgroups(cmd.Process.Pid, req) + if err != nil { + return err + } + } + + var extFds []string + if process != nil { + extFds, err = getPipeFds(cmd.Process.Pid) + if err != nil { + return err + } + } + + logrus.Debugf("Using CRIU in %s mode", req.GetType().String()) + // In the case of criurpc.CriuReqType_FEATURE_CHECK req.GetOpts() + // should be empty. For older CRIU versions it still will be + // available but empty. criurpc.CriuReqType_VERSION actually + // has no req.GetOpts(). + if !(req.GetType() == criurpc.CriuReqType_FEATURE_CHECK || + req.GetType() == criurpc.CriuReqType_VERSION) { + + val := reflect.ValueOf(req.GetOpts()) + v := reflect.Indirect(val) + for i := 0; i < v.NumField(); i++ { + st := v.Type() + name := st.Field(i).Name + if strings.HasPrefix(name, "XXX_") { + continue + } + value := val.MethodByName("Get" + name).Call([]reflect.Value{}) + logrus.Debugf("CRIU option %s with value %v", name, value[0]) + } + } + data, err := proto.Marshal(req) + if err != nil { + return err + } + _, err = criuClientCon.Write(data) + if err != nil { + return err + } + + buf := make([]byte, 10*4096) + oob := make([]byte, 4096) + for true { + n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob) + if err != nil { + return err + } + if n == 0 { + return fmt.Errorf("unexpected EOF") + } + if n == len(buf) { + return fmt.Errorf("buffer is too small") + } + + resp := new(criurpc.CriuResp) + err = proto.Unmarshal(buf[:n], resp) + if err != nil { + return err + } + if !resp.GetSuccess() { + typeString := req.GetType().String() + if typeString == "VERSION" { + // If the VERSION RPC fails this probably means that the CRIU + // version is too old for this RPC. Just return 'nil'. + return nil + } + return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath) + } + + t := resp.GetType() + switch { + case t == criurpc.CriuReqType_VERSION: + logrus.Debugf("CRIU version: %s", resp) + criuVersionRPC = resp.GetVersion() + break + case t == criurpc.CriuReqType_FEATURE_CHECK: + logrus.Debugf("Feature check says: %s", resp) + criuFeatures = resp.GetFeatures() + case t == criurpc.CriuReqType_NOTIFY: + if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil { + return err + } + t = criurpc.CriuReqType_NOTIFY + req = &criurpc.CriuReq{ + Type: &t, + NotifySuccess: proto.Bool(true), + } + data, err = proto.Marshal(req) + if err != nil { + return err + } + _, err = criuClientCon.Write(data) + if err != nil { + return err + } + continue + case t == criurpc.CriuReqType_RESTORE: + case t == criurpc.CriuReqType_DUMP: + case t == criurpc.CriuReqType_PRE_DUMP: + default: + return fmt.Errorf("unable to parse the response %s", resp.String()) + } + + break + } + + criuClientCon.CloseWrite() + // cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors. + // Here we want to wait only the CRIU process. + st, err := cmd.Process.Wait() + if err != nil { + return err + } + + // In pre-dump mode CRIU is in a loop and waits for + // the final DUMP command. + // The current runc pre-dump approach, however, is + // start criu in PRE_DUMP once for a single pre-dump + // and not the whole series of pre-dump, pre-dump, ...m, dump + // If we got the message CriuReqType_PRE_DUMP it means + // CRIU was successful and we need to forcefully stop CRIU + if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP { + return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath) + } + return nil +} + +// block any external network activity +func lockNetwork(config *configs.Config) error { + for _, config := range config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + + if err := strategy.detach(config); err != nil { + return err + } + } + return nil +} + +func unlockNetwork(config *configs.Config) error { + for _, config := range config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + if err = strategy.attach(config); err != nil { + return err + } + } + return nil +} + +func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error { + notify := resp.GetNotify() + if notify == nil { + return fmt.Errorf("invalid response: %s", resp.String()) + } + logrus.Debugf("notify: %s\n", notify.GetScript()) + switch { + case notify.GetScript() == "post-dump": + f, err := os.Create(filepath.Join(c.root, "checkpoint")) + if err != nil { + return err + } + f.Close() + case notify.GetScript() == "network-unlock": + if err := unlockNetwork(c.config); err != nil { + return err + } + case notify.GetScript() == "network-lock": + if err := lockNetwork(c.config); err != nil { + return err + } + case notify.GetScript() == "setup-namespaces": + if c.config.Hooks != nil { + s, err := c.currentOCIState() + if err != nil { + return nil + } + s.Pid = int(notify.GetPid()) + for i, hook := range c.config.Hooks.Prestart { + if err := hook.Run(s); err != nil { + return newSystemErrorWithCausef(err, "running prestart hook %d", i) + } + } + } + case notify.GetScript() == "post-restore": + pid := notify.GetPid() + r, err := newRestoredProcess(int(pid), fds) + if err != nil { + return err + } + process.ops = r + if err := c.state.transition(&restoredState{ + imageDir: opts.ImagesDirectory, + c: c, + }); err != nil { + return err + } + // create a timestamp indicating when the restored checkpoint was started + c.created = time.Now().UTC() + if _, err := c.updateState(r); err != nil { + return err + } + if err := os.Remove(filepath.Join(c.root, "checkpoint")); err != nil { + if !os.IsNotExist(err) { + logrus.Error(err) + } + } + case notify.GetScript() == "orphan-pts-master": + scm, err := unix.ParseSocketControlMessage(oob) + if err != nil { + return err + } + fds, err := unix.ParseUnixRights(&scm[0]) + if err != nil { + return err + } + + master := os.NewFile(uintptr(fds[0]), "orphan-pts-master") + defer master.Close() + + // While we can access console.master, using the API is a good idea. + if err := utils.SendFd(process.ConsoleSocket, master.Name(), master.Fd()); err != nil { + return err + } + } + return nil +} + +func (c *linuxContainer) updateState(process parentProcess) (*State, error) { + if process != nil { + c.initProcess = process + } + state, err := c.currentState() + if err != nil { + return nil, err + } + err = c.saveState(state) + if err != nil { + return nil, err + } + return state, nil +} + +func (c *linuxContainer) saveState(s *State) error { + f, err := os.Create(filepath.Join(c.root, stateFilename)) + if err != nil { + return err + } + defer f.Close() + return utils.WriteJSON(f, s) +} + +func (c *linuxContainer) deleteState() error { + return os.Remove(filepath.Join(c.root, stateFilename)) +} + +func (c *linuxContainer) currentStatus() (Status, error) { + if err := c.refreshState(); err != nil { + return -1, err + } + return c.state.status(), nil +} + +// refreshState needs to be called to verify that the current state on the +// container is what is true. Because consumers of libcontainer can use it +// out of process we need to verify the container's status based on runtime +// information and not rely on our in process info. +func (c *linuxContainer) refreshState() error { + paused, err := c.isPaused() + if err != nil { + return err + } + if paused { + return c.state.transition(&pausedState{c: c}) + } + t, err := c.runType() + if err != nil { + return err + } + switch t { + case Created: + return c.state.transition(&createdState{c: c}) + case Running: + return c.state.transition(&runningState{c: c}) + } + return c.state.transition(&stoppedState{c: c}) +} + +func (c *linuxContainer) runType() (Status, error) { + if c.initProcess == nil { + return Stopped, nil + } + pid := c.initProcess.pid() + stat, err := system.Stat(pid) + if err != nil { + return Stopped, nil + } + if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead { + return Stopped, nil + } + // We'll create exec fifo and blocking on it after container is created, + // and delete it after start container. + if _, err := os.Stat(filepath.Join(c.root, execFifoFilename)); err == nil { + return Created, nil + } + return Running, nil +} + +func (c *linuxContainer) isPaused() (bool, error) { + fcg := c.cgroupManager.GetPaths()["freezer"] + if fcg == "" { + // A container doesn't have a freezer cgroup + return false, nil + } + pausedState := "FROZEN" + filename := "freezer.state" + if cgroups.IsCgroup2UnifiedMode() { + filename = "cgroup.freeze" + pausedState = "1" + } + + data, err := ioutil.ReadFile(filepath.Join(fcg, filename)) + if err != nil { + // If freezer cgroup is not mounted, the container would just be not paused. + if os.IsNotExist(err) || err == syscall.ENODEV { + return false, nil + } + return false, newSystemErrorWithCause(err, "checking if container is paused") + } + return bytes.Equal(bytes.TrimSpace(data), []byte(pausedState)), nil +} + +func (c *linuxContainer) currentState() (*State, error) { + var ( + startTime uint64 + externalDescriptors []string + pid = -1 + ) + if c.initProcess != nil { + pid = c.initProcess.pid() + startTime, _ = c.initProcess.startTime() + externalDescriptors = c.initProcess.externalDescriptors() + } + intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID()) + if err != nil { + intelRdtPath = "" + } + state := &State{ + BaseState: BaseState{ + ID: c.ID(), + Config: *c.config, + InitProcessPid: pid, + InitProcessStartTime: startTime, + Created: c.created, + }, + Rootless: c.config.RootlessEUID && c.config.RootlessCgroups, + CgroupPaths: c.cgroupManager.GetPaths(), + IntelRdtPath: intelRdtPath, + NamespacePaths: make(map[configs.NamespaceType]string), + ExternalDescriptors: externalDescriptors, + } + if pid > 0 { + for _, ns := range c.config.Namespaces { + state.NamespacePaths[ns.Type] = ns.GetPath(pid) + } + for _, nsType := range configs.NamespaceTypes() { + if !configs.IsNamespaceSupported(nsType) { + continue + } + if _, ok := state.NamespacePaths[nsType]; !ok { + ns := configs.Namespace{Type: nsType} + state.NamespacePaths[ns.Type] = ns.GetPath(pid) + } + } + } + return state, nil +} + +func (c *linuxContainer) currentOCIState() (*specs.State, error) { + bundle, annotations := utils.Annotations(c.config.Labels) + state := &specs.State{ + Version: specs.Version, + ID: c.ID(), + Bundle: bundle, + Annotations: annotations, + } + status, err := c.currentStatus() + if err != nil { + return nil, err + } + state.Status = status.String() + if status != Stopped { + if c.initProcess != nil { + state.Pid = c.initProcess.pid() + } + } + return state, nil +} + +// orderNamespacePaths sorts namespace paths into a list of paths that we +// can setns in order. +func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) { + paths := []string{} + for _, ns := range configs.NamespaceTypes() { + + // Remove namespaces that we don't need to join. + if !c.config.Namespaces.Contains(ns) { + continue + } + + if p, ok := namespaces[ns]; ok && p != "" { + // check if the requested namespace is supported + if !configs.IsNamespaceSupported(ns) { + return nil, newSystemError(fmt.Errorf("namespace %s is not supported", ns)) + } + // only set to join this namespace if it exists + if _, err := os.Lstat(p); err != nil { + return nil, newSystemErrorWithCausef(err, "running lstat on namespace path %q", p) + } + // do not allow namespace path with comma as we use it to separate + // the namespace paths + if strings.ContainsRune(p, ',') { + return nil, newSystemError(fmt.Errorf("invalid path %s", p)) + } + paths = append(paths, fmt.Sprintf("%s:%s", configs.NsName(ns), p)) + } + + } + + return paths, nil +} + +func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) { + data := bytes.NewBuffer(nil) + for _, im := range idMap { + line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size) + if _, err := data.WriteString(line); err != nil { + return nil, err + } + } + return data.Bytes(), nil +} + +// bootstrapData encodes the necessary data in netlink binary format +// as a io.Reader. +// Consumer can write the data to a bootstrap program +// such as one that uses nsenter package to bootstrap the container's +// init process correctly, i.e. with correct namespaces, uid/gid +// mapping etc. +func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) { + // create the netlink message + r := nl.NewNetlinkRequest(int(InitMsg), 0) + + // write cloneFlags + r.AddData(&Int32msg{ + Type: CloneFlagsAttr, + Value: uint32(cloneFlags), + }) + + // write custom namespace paths + if len(nsMaps) > 0 { + nsPaths, err := c.orderNamespacePaths(nsMaps) + if err != nil { + return nil, err + } + r.AddData(&Bytemsg{ + Type: NsPathsAttr, + Value: []byte(strings.Join(nsPaths, ",")), + }) + } + + // write namespace paths only when we are not joining an existing user ns + _, joinExistingUser := nsMaps[configs.NEWUSER] + if !joinExistingUser { + // write uid mappings + if len(c.config.UidMappings) > 0 { + if c.config.RootlessEUID && c.newuidmapPath != "" { + r.AddData(&Bytemsg{ + Type: UidmapPathAttr, + Value: []byte(c.newuidmapPath), + }) + } + b, err := encodeIDMapping(c.config.UidMappings) + if err != nil { + return nil, err + } + r.AddData(&Bytemsg{ + Type: UidmapAttr, + Value: b, + }) + } + + // write gid mappings + if len(c.config.GidMappings) > 0 { + b, err := encodeIDMapping(c.config.GidMappings) + if err != nil { + return nil, err + } + r.AddData(&Bytemsg{ + Type: GidmapAttr, + Value: b, + }) + if c.config.RootlessEUID && c.newgidmapPath != "" { + r.AddData(&Bytemsg{ + Type: GidmapPathAttr, + Value: []byte(c.newgidmapPath), + }) + } + if requiresRootOrMappingTool(c.config) { + r.AddData(&Boolmsg{ + Type: SetgroupAttr, + Value: true, + }) + } + } + } + + if c.config.OomScoreAdj != nil { + // write oom_score_adj + r.AddData(&Bytemsg{ + Type: OomScoreAdjAttr, + Value: []byte(fmt.Sprintf("%d", *c.config.OomScoreAdj)), + }) + } + + // write rootless + r.AddData(&Boolmsg{ + Type: RootlessEUIDAttr, + Value: c.config.RootlessEUID, + }) + + return bytes.NewReader(r.Serialize()), nil +} + +// ignoreTerminateErrors returns nil if the given err matches an error known +// to indicate that the terminate occurred successfully or err was nil, otherwise +// err is returned unaltered. +func ignoreTerminateErrors(err error) error { + if err == nil { + return nil + } + s := err.Error() + switch { + case strings.Contains(s, "process already finished"), strings.Contains(s, "Wait was already called"): + return nil + } + return err +} + +func requiresRootOrMappingTool(c *configs.Config) bool { + gidMap := []configs.IDMap{ + {ContainerID: 0, HostID: os.Getegid(), Size: 1}, + } + return !reflect.DeepEqual(c.GidMappings, gidMap) +} diff --git a/libcontainer/container_linux_test.go b/libcontainer/container_linux_test.go new file mode 100644 index 0000000..f8af05d --- /dev/null +++ b/libcontainer/container_linux_test.go @@ -0,0 +1,372 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "io/ioutil" + "os" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/libcontainer/system" +) + +type mockCgroupManager struct { + pids []int + allPids []int + stats *cgroups.Stats + paths map[string]string +} + +type mockIntelRdtManager struct { + stats *intelrdt.Stats + path string +} + +func (m *mockCgroupManager) GetPids() ([]int, error) { + return m.pids, nil +} + +func (m *mockCgroupManager) GetAllPids() ([]int, error) { + return m.allPids, nil +} + +func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) { + return m.stats, nil +} + +func (m *mockCgroupManager) Apply(pid int) error { + return nil +} + +func (m *mockCgroupManager) Set(container *configs.Config) error { + return nil +} + +func (m *mockCgroupManager) Destroy() error { + return nil +} + +func (m *mockCgroupManager) GetPaths() map[string]string { + return m.paths +} + +func (m *mockCgroupManager) GetUnifiedPath() (string, error) { + return "", fmt.Errorf("unimplemented") +} + +func (m *mockCgroupManager) Freeze(state configs.FreezerState) error { + return nil +} +func (m *mockCgroupManager) GetCgroups() (*configs.Cgroup, error) { + return nil, nil +} + +func (m *mockIntelRdtManager) Apply(pid int) error { + return nil +} + +func (m *mockIntelRdtManager) GetStats() (*intelrdt.Stats, error) { + return m.stats, nil +} + +func (m *mockIntelRdtManager) Destroy() error { + return nil +} + +func (m *mockIntelRdtManager) GetPath() string { + return m.path +} + +func (m *mockIntelRdtManager) Set(container *configs.Config) error { + return nil +} + +func (m *mockIntelRdtManager) GetCgroups() (*configs.Cgroup, error) { + return nil, nil +} + +type mockProcess struct { + _pid int + started uint64 +} + +func (m *mockProcess) terminate() error { + return nil +} + +func (m *mockProcess) pid() int { + return m._pid +} + +func (m *mockProcess) startTime() (uint64, error) { + return m.started, nil +} + +func (m *mockProcess) start() error { + return nil +} + +func (m *mockProcess) wait() (*os.ProcessState, error) { + return nil, nil +} + +func (m *mockProcess) signal(_ os.Signal) error { + return nil +} + +func (m *mockProcess) externalDescriptors() []string { + return []string{} +} + +func (m *mockProcess) setExternalDescriptors(newFds []string) { +} + +func (m *mockProcess) forwardChildLogs() { +} + +func TestGetContainerPids(t *testing.T) { + container := &linuxContainer{ + id: "myid", + config: &configs.Config{}, + cgroupManager: &mockCgroupManager{allPids: []int{1, 2, 3}}, + } + pids, err := container.Processes() + if err != nil { + t.Fatal(err) + } + for i, expected := range []int{1, 2, 3} { + if pids[i] != expected { + t.Fatalf("expected pid %d but received %d", expected, pids[i]) + } + } +} + +func TestGetContainerStats(t *testing.T) { + container := &linuxContainer{ + id: "myid", + config: &configs.Config{}, + cgroupManager: &mockCgroupManager{ + pids: []int{1, 2, 3}, + stats: &cgroups.Stats{ + MemoryStats: cgroups.MemoryStats{ + Usage: cgroups.MemoryData{ + Usage: 1024, + }, + }, + }, + }, + intelRdtManager: &mockIntelRdtManager{ + stats: &intelrdt.Stats{ + L3CacheSchema: "L3:0=f;1=f0", + MemBwSchema: "MB:0=20;1=70", + }, + }, + } + stats, err := container.Stats() + if err != nil { + t.Fatal(err) + } + if stats.CgroupStats == nil { + t.Fatal("cgroup stats are nil") + } + if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 { + t.Fatalf("expected memory usage 1024 but received %d", stats.CgroupStats.MemoryStats.Usage.Usage) + } + if intelrdt.IsCatEnabled() { + if stats.IntelRdtStats == nil { + t.Fatal("intel rdt stats are nil") + } + if stats.IntelRdtStats.L3CacheSchema != "L3:0=f;1=f0" { + t.Fatalf("expected L3CacheSchema L3:0=f;1=f0 but received %s", stats.IntelRdtStats.L3CacheSchema) + } + } + if intelrdt.IsMbaEnabled() { + if stats.IntelRdtStats == nil { + t.Fatal("intel rdt stats are nil") + } + if stats.IntelRdtStats.MemBwSchema != "MB:0=20;1=70" { + t.Fatalf("expected MemBwSchema MB:0=20;1=70 but received %s", stats.IntelRdtStats.MemBwSchema) + } + } +} + +func TestGetContainerState(t *testing.T) { + var ( + pid = os.Getpid() + expectedMemoryPath = "/sys/fs/cgroup/memory/myid" + expectedNetworkPath = fmt.Sprintf("/proc/%d/ns/net", pid) + expectedIntelRdtPath = "/sys/fs/resctrl/myid" + ) + container := &linuxContainer{ + id: "myid", + config: &configs.Config{ + Namespaces: []configs.Namespace{ + {Type: configs.NEWPID}, + {Type: configs.NEWNS}, + {Type: configs.NEWNET, Path: expectedNetworkPath}, + {Type: configs.NEWUTS}, + // emulate host for IPC + //{Type: configs.NEWIPC}, + {Type: configs.NEWCGROUP}, + }, + }, + initProcess: &mockProcess{ + _pid: pid, + started: 10, + }, + cgroupManager: &mockCgroupManager{ + pids: []int{1, 2, 3}, + stats: &cgroups.Stats{ + MemoryStats: cgroups.MemoryStats{ + Usage: cgroups.MemoryData{ + Usage: 1024, + }, + }, + }, + paths: map[string]string{ + "memory": expectedMemoryPath, + }, + }, + intelRdtManager: &mockIntelRdtManager{ + stats: &intelrdt.Stats{ + L3CacheSchema: "L3:0=f0;1=f", + MemBwSchema: "MB:0=70;1=20", + }, + path: expectedIntelRdtPath, + }, + } + container.state = &createdState{c: container} + state, err := container.State() + if err != nil { + t.Fatal(err) + } + if state.InitProcessPid != pid { + t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid) + } + if state.InitProcessStartTime != 10 { + t.Fatalf("expected process start time 10 but received %d", state.InitProcessStartTime) + } + paths := state.CgroupPaths + if paths == nil { + t.Fatal("cgroup paths should not be nil") + } + if memPath := paths["memory"]; memPath != expectedMemoryPath { + t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath) + } + if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() { + intelRdtPath := state.IntelRdtPath + if intelRdtPath == "" { + t.Fatal("intel rdt path should not be empty") + } + if intelRdtPath != expectedIntelRdtPath { + t.Fatalf("expected intel rdt path %q but received %q", expectedIntelRdtPath, intelRdtPath) + } + } + for _, ns := range container.config.Namespaces { + path := state.NamespacePaths[ns.Type] + if path == "" { + t.Fatalf("expected non nil namespace path for %s", ns.Type) + } + if ns.Type == configs.NEWNET { + if path != expectedNetworkPath { + t.Fatalf("expected path %q but received %q", expectedNetworkPath, path) + } + } else { + file := "" + switch ns.Type { + case configs.NEWNET: + file = "net" + case configs.NEWNS: + file = "mnt" + case configs.NEWPID: + file = "pid" + case configs.NEWIPC: + file = "ipc" + case configs.NEWUSER: + file = "user" + case configs.NEWUTS: + file = "uts" + case configs.NEWCGROUP: + file = "cgroup" + } + expected := fmt.Sprintf("/proc/%d/ns/%s", pid, file) + if expected != path { + t.Fatalf("expected path %q but received %q", expected, path) + } + } + } +} + +func TestGetContainerStateAfterUpdate(t *testing.T) { + var ( + pid = os.Getpid() + ) + stat, err := system.Stat(pid) + if err != nil { + t.Fatal(err) + } + + rootDir, err := ioutil.TempDir("", "TestGetContainerStateAfterUpdate") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(rootDir) + + container := &linuxContainer{ + root: rootDir, + id: "myid", + config: &configs.Config{ + Namespaces: []configs.Namespace{ + {Type: configs.NEWPID}, + {Type: configs.NEWNS}, + {Type: configs.NEWNET}, + {Type: configs.NEWUTS}, + {Type: configs.NEWIPC}, + }, + Cgroups: &configs.Cgroup{ + Resources: &configs.Resources{ + Memory: 1024, + }, + }, + }, + initProcess: &mockProcess{ + _pid: pid, + started: stat.StartTime, + }, + cgroupManager: &mockCgroupManager{}, + } + container.state = &createdState{c: container} + state, err := container.State() + if err != nil { + t.Fatal(err) + } + if state.InitProcessPid != pid { + t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid) + } + if state.InitProcessStartTime != stat.StartTime { + t.Fatalf("expected process start time %d but received %d", stat.StartTime, state.InitProcessStartTime) + } + if state.Config.Cgroups.Resources.Memory != 1024 { + t.Fatalf("expected Memory to be 1024 but received %q", state.Config.Cgroups.Memory) + } + + // Set initProcessStartTime so we fake to be running + container.initProcessStartTime = state.InitProcessStartTime + container.state = &runningState{c: container} + newConfig := container.Config() + newConfig.Cgroups.Resources.Memory = 2048 + if err := container.Set(newConfig); err != nil { + t.Fatal(err) + } + state, err = container.State() + if err != nil { + t.Fatal(err) + } + if state.Config.Cgroups.Resources.Memory != 2048 { + t.Fatalf("expected Memory to be 2048 but received %q", state.Config.Cgroups.Memory) + } +} diff --git a/libcontainer/criu_opts_linux.go b/libcontainer/criu_opts_linux.go new file mode 100644 index 0000000..a2e344f --- /dev/null +++ b/libcontainer/criu_opts_linux.go @@ -0,0 +1,40 @@ +package libcontainer + +// cgroup restoring strategy provided by criu +type cgMode uint32 + +const ( + CRIU_CG_MODE_SOFT cgMode = 3 + iota // restore cgroup properties if only dir created by criu + CRIU_CG_MODE_FULL // always restore all cgroups and their properties + CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system + CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT +) + +type CriuPageServerInfo struct { + Address string // IP address of CRIU page server + Port int32 // port number of CRIU page server +} + +type VethPairName struct { + ContainerInterfaceName string + HostInterfaceName string +} + +type CriuOpts struct { + ImagesDirectory string // directory for storing image files + WorkDirectory string // directory to cd and write logs/pidfiles/stats to + ParentImage string // directory for storing parent image files in pre-dump and dump + LeaveRunning bool // leave container in running state after checkpoint + TcpEstablished bool // checkpoint/restore established TCP connections + ExternalUnixConnections bool // allow external unix connections + ShellJob bool // allow to dump and restore shell jobs + FileLocks bool // handle file locks, for safety + PreDump bool // call criu predump to perform iterative checkpoint + PageServer CriuPageServerInfo // allow to dump to criu page server + VethPairs []VethPairName // pass the veth to criu when restore + ManageCgroupsMode cgMode // dump or restore cgroup mode + EmptyNs uint32 // don't c/r properties for namespace from this mask + AutoDedup bool // auto deduplication for incremental dumps + LazyPages bool // restore memory pages lazily using userfaultfd + StatusFd string // fd for feedback when lazy server is ready +} diff --git a/libcontainer/devices/devices.go b/libcontainer/devices/devices.go new file mode 100644 index 0000000..5dabe06 --- /dev/null +++ b/libcontainer/devices/devices.go @@ -0,0 +1,110 @@ +package devices + +import ( + "errors" + "io/ioutil" + "os" + "path/filepath" + + "github.com/opencontainers/runc/libcontainer/configs" + "golang.org/x/sys/unix" +) + +var ( + // ErrNotADevice denotes that a file is not a valid linux device. + ErrNotADevice = errors.New("not a device node") +) + +// Testing dependencies +var ( + unixLstat = unix.Lstat + ioutilReadDir = ioutil.ReadDir +) + +// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the +// information about a linux device and return that information as a Device struct. +func DeviceFromPath(path, permissions string) (*configs.Device, error) { + var stat unix.Stat_t + err := unixLstat(path, &stat) + if err != nil { + return nil, err + } + + var ( + devNumber = uint64(stat.Rdev) + major = unix.Major(devNumber) + minor = unix.Minor(devNumber) + ) + if major == 0 { + return nil, ErrNotADevice + } + + var ( + devType rune + mode = stat.Mode + ) + switch { + case mode&unix.S_IFBLK == unix.S_IFBLK: + devType = 'b' + case mode&unix.S_IFCHR == unix.S_IFCHR: + devType = 'c' + } + return &configs.Device{ + Type: devType, + Path: path, + Major: int64(major), + Minor: int64(minor), + Permissions: permissions, + FileMode: os.FileMode(mode), + Uid: stat.Uid, + Gid: stat.Gid, + }, nil +} + +// HostDevices returns all devices that can be found under /dev directory. +func HostDevices() ([]*configs.Device, error) { + return GetDevices("/dev") +} + +// GetDevices recursively traverses a directory specified by path +// and returns all devices found there. +func GetDevices(path string) ([]*configs.Device, error) { + files, err := ioutilReadDir(path) + if err != nil { + return nil, err + } + var out []*configs.Device + for _, f := range files { + switch { + case f.IsDir(): + switch f.Name() { + // ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825 + // ".udev" added to address https://github.com/opencontainers/runc/issues/2093 + case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts", ".udev": + continue + default: + sub, err := GetDevices(filepath.Join(path, f.Name())) + if err != nil { + return nil, err + } + + out = append(out, sub...) + continue + } + case f.Name() == "console": + continue + } + device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm") + if err != nil { + if err == ErrNotADevice { + continue + } + if os.IsNotExist(err) { + continue + } + return nil, err + } + out = append(out, device) + } + return out, nil +} diff --git a/libcontainer/devices/devices_test.go b/libcontainer/devices/devices_test.go new file mode 100644 index 0000000..0afa9d9 --- /dev/null +++ b/libcontainer/devices/devices_test.go @@ -0,0 +1,63 @@ +package devices + +import ( + "errors" + "os" + "testing" + + "golang.org/x/sys/unix" +) + +func TestDeviceFromPathLstatFailure(t *testing.T) { + testError := errors.New("test error") + + // Override unix.Lstat to inject error. + unixLstat = func(path string, stat *unix.Stat_t) error { + return testError + } + + _, err := DeviceFromPath("", "") + if err != testError { + t.Fatalf("Unexpected error %v, expected %v", err, testError) + } +} + +func TestHostDevicesIoutilReadDirFailure(t *testing.T) { + testError := errors.New("test error") + + // Override ioutil.ReadDir to inject error. + ioutilReadDir = func(dirname string) ([]os.FileInfo, error) { + return nil, testError + } + + _, err := HostDevices() + if err != testError { + t.Fatalf("Unexpected error %v, expected %v", err, testError) + } +} + +func TestHostDevicesIoutilReadDirDeepFailure(t *testing.T) { + testError := errors.New("test error") + called := false + + // Override ioutil.ReadDir to inject error after the first call. + ioutilReadDir = func(dirname string) ([]os.FileInfo, error) { + if called { + return nil, testError + } + called = true + + // Provoke a second call. + fi, err := os.Lstat("/tmp") + if err != nil { + t.Fatalf("Unexpected error %v", err) + } + + return []os.FileInfo{fi}, nil + } + + _, err := HostDevices() + if err != testError { + t.Fatalf("Unexpected error %v, expected %v", err, testError) + } +} diff --git a/libcontainer/error.go b/libcontainer/error.go new file mode 100644 index 0000000..21a3789 --- /dev/null +++ b/libcontainer/error.go @@ -0,0 +1,70 @@ +package libcontainer + +import "io" + +// ErrorCode is the API error code type. +type ErrorCode int + +// API error codes. +const ( + // Factory errors + IdInUse ErrorCode = iota + InvalidIdFormat + + // Container errors + ContainerNotExists + ContainerPaused + ContainerNotStopped + ContainerNotRunning + ContainerNotPaused + + // Process errors + NoProcessOps + + // Common errors + ConfigInvalid + ConsoleExists + SystemError +) + +func (c ErrorCode) String() string { + switch c { + case IdInUse: + return "Id already in use" + case InvalidIdFormat: + return "Invalid format" + case ContainerPaused: + return "Container paused" + case ConfigInvalid: + return "Invalid configuration" + case SystemError: + return "System error" + case ContainerNotExists: + return "Container does not exist" + case ContainerNotStopped: + return "Container is not stopped" + case ContainerNotRunning: + return "Container is not running" + case ConsoleExists: + return "Console exists for process" + case ContainerNotPaused: + return "Container is not paused" + case NoProcessOps: + return "No process operations" + default: + return "Unknown error" + } +} + +// Error is the API error type. +type Error interface { + error + + // Returns an error if it failed to write the detail of the Error to w. + // The detail of the Error may include the error message and a + // representation of the stack trace. + Detail(w io.Writer) error + + // Returns the error code for this error. + Code() ErrorCode +} diff --git a/libcontainer/error_test.go b/libcontainer/error_test.go new file mode 100644 index 0000000..36841ad --- /dev/null +++ b/libcontainer/error_test.go @@ -0,0 +1,25 @@ +package libcontainer + +import "testing" + +func TestErrorCode(t *testing.T) { + codes := map[ErrorCode]string{ + IdInUse: "Id already in use", + InvalidIdFormat: "Invalid format", + ContainerPaused: "Container paused", + ConfigInvalid: "Invalid configuration", + SystemError: "System error", + ContainerNotExists: "Container does not exist", + ContainerNotStopped: "Container is not stopped", + ContainerNotRunning: "Container is not running", + ConsoleExists: "Console exists for process", + ContainerNotPaused: "Container is not paused", + NoProcessOps: "No process operations", + } + + for code, expected := range codes { + if actual := code.String(); actual != expected { + t.Fatalf("expected string %q but received %q", expected, actual) + } + } +} diff --git a/libcontainer/factory.go b/libcontainer/factory.go new file mode 100644 index 0000000..0986cd7 --- /dev/null +++ b/libcontainer/factory.go @@ -0,0 +1,44 @@ +package libcontainer + +import ( + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Factory interface { + // Creates a new container with the given id and starts the initial process inside it. + // id must be a string containing only letters, digits and underscores and must contain + // between 1 and 1024 characters, inclusive. + // + // The id must not already be in use by an existing container. Containers created using + // a factory with the same path (and filesystem) must have distinct ids. + // + // Returns the new container with a running process. + // + // errors: + // IdInUse - id is already in use by a container + // InvalidIdFormat - id has incorrect format + // ConfigInvalid - config is invalid + // Systemerror - System error + // + // On error, any partially created container parts are cleaned up (the operation is atomic). + Create(id string, config *configs.Config) (Container, error) + + // Load takes an ID for an existing container and returns the container information + // from the state. This presents a read only view of the container. + // + // errors: + // Path does not exist + // System error + Load(id string) (Container, error) + + // StartInitialization is an internal API to libcontainer used during the reexec of the + // container. + // + // Errors: + // Pipe connection error + // System error + StartInitialization() error + + // Type returns info string about factory type (e.g. lxc, libcontainer...) + Type() string +} diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go new file mode 100644 index 0000000..437633c --- /dev/null +++ b/libcontainer/factory_linux.go @@ -0,0 +1,427 @@ +// +build linux + +package libcontainer + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "runtime/debug" + "strconv" + + "github.com/cyphar/filepath-securejoin" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/cgroups/fs2" + "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/configs/validate" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/libcontainer/mount" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/pkg/errors" + + "golang.org/x/sys/unix" +) + +const ( + stateFilename = "state.json" + execFifoFilename = "exec.fifo" +) + +var idRegex = regexp.MustCompile(`^[\w+-\.]+$`) + +// InitArgs returns an options func to configure a LinuxFactory with the +// provided init binary path and arguments. +func InitArgs(args ...string) func(*LinuxFactory) error { + return func(l *LinuxFactory) (err error) { + if len(args) > 0 { + // Resolve relative paths to ensure that its available + // after directory changes. + if args[0], err = filepath.Abs(args[0]); err != nil { + return newGenericError(err, ConfigInvalid) + } + } + + l.InitArgs = args + return nil + } +} + +// SystemdCgroups is an options func to configure a LinuxFactory to return +// containers that use systemd to create and manage cgroups. +func SystemdCgroups(l *LinuxFactory) error { + systemdCgroupsManager, err := systemd.NewSystemdCgroupsManager() + if err != nil { + return err + } + l.NewCgroupsManager = systemdCgroupsManager + return nil +} + +func getUnifiedPath(paths map[string]string) string { + unifiedPath := "" + for k, v := range paths { + if unifiedPath == "" { + unifiedPath = v + } else if v != unifiedPath { + panic(errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v)) + } + } + // can be empty + return unifiedPath +} + +func cgroupfs2(l *LinuxFactory, rootless bool) error { + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + m, err := fs2.NewManager(config, getUnifiedPath(paths), rootless) + if err != nil { + panic(err) + } + return m + } + return nil +} + +// Cgroupfs is an options func to configure a LinuxFactory to return containers +// that use the native cgroups filesystem implementation to create and manage +// cgroups. +func Cgroupfs(l *LinuxFactory) error { + if cgroups.IsCgroup2UnifiedMode() { + return cgroupfs2(l, false) + } + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &fs.Manager{ + Cgroups: config, + Paths: paths, + } + } + return nil +} + +// RootlessCgroupfs is an options func to configure a LinuxFactory to return +// containers that use the native cgroups filesystem implementation to create +// and manage cgroups. The difference between RootlessCgroupfs and Cgroupfs is +// that RootlessCgroupfs can transparently handle permission errors that occur +// during rootless container (including euid=0 in userns) setup (while still allowing cgroup usage if +// they've been set up properly). +func RootlessCgroupfs(l *LinuxFactory) error { + if cgroups.IsCgroup2UnifiedMode() { + return cgroupfs2(l, true) + } + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &fs.Manager{ + Cgroups: config, + Rootless: true, + Paths: paths, + } + } + return nil +} + +// IntelRdtfs is an options func to configure a LinuxFactory to return +// containers that use the Intel RDT "resource control" filesystem to +// create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth). +func IntelRdtFs(l *LinuxFactory) error { + l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager { + return &intelrdt.IntelRdtManager{ + Config: config, + Id: id, + Path: path, + } + } + return nil +} + +// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs. +func TmpfsRoot(l *LinuxFactory) error { + mounted, err := mount.Mounted(l.Root) + if err != nil { + return err + } + if !mounted { + if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil { + return err + } + } + return nil +} + +// CriuPath returns an option func to configure a LinuxFactory with the +// provided criupath +func CriuPath(criupath string) func(*LinuxFactory) error { + return func(l *LinuxFactory) error { + l.CriuPath = criupath + return nil + } +} + +// New returns a linux based container factory based in the root directory and +// configures the factory with the provided option funcs. +func New(root string, options ...func(*LinuxFactory) error) (Factory, error) { + if root != "" { + if err := os.MkdirAll(root, 0700); err != nil { + return nil, newGenericError(err, SystemError) + } + } + l := &LinuxFactory{ + Root: root, + InitPath: "/proc/self/exe", + InitArgs: []string{os.Args[0], "init"}, + Validator: validate.New(), + CriuPath: "criu", + } + Cgroupfs(l) + for _, opt := range options { + if opt == nil { + continue + } + if err := opt(l); err != nil { + return nil, err + } + } + return l, nil +} + +// LinuxFactory implements the default factory interface for linux based systems. +type LinuxFactory struct { + // Root directory for the factory to store state. + Root string + + // InitPath is the path for calling the init responsibilities for spawning + // a container. + InitPath string + + // InitArgs are arguments for calling the init responsibilities for spawning + // a container. + InitArgs []string + + // CriuPath is the path to the criu binary used for checkpoint and restore of + // containers. + CriuPath string + + // New{u,g}uidmapPath is the path to the binaries used for mapping with + // rootless containers. + NewuidmapPath string + NewgidmapPath string + + // Validator provides validation to container configurations. + Validator validate.Validator + + // NewCgroupsManager returns an initialized cgroups manager for a single container. + NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager + + // NewIntelRdtManager returns an initialized Intel RDT manager for a single container. + NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager +} + +func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { + if l.Root == "" { + return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) + } + if err := l.validateID(id); err != nil { + return nil, err + } + if err := l.Validator.Validate(config); err != nil { + return nil, newGenericError(err, ConfigInvalid) + } + containerRoot, err := securejoin.SecureJoin(l.Root, id) + if err != nil { + return nil, err + } + if _, err := os.Stat(containerRoot); err == nil { + return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse) + } else if !os.IsNotExist(err) { + return nil, newGenericError(err, SystemError) + } + if err := os.MkdirAll(containerRoot, 0711); err != nil { + return nil, newGenericError(err, SystemError) + } + if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil { + return nil, newGenericError(err, SystemError) + } + c := &linuxContainer{ + id: id, + root: containerRoot, + config: config, + initPath: l.InitPath, + initArgs: l.InitArgs, + criuPath: l.CriuPath, + newuidmapPath: l.NewuidmapPath, + newgidmapPath: l.NewgidmapPath, + cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), + } + if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() { + c.intelRdtManager = l.NewIntelRdtManager(config, id, "") + } + c.state = &stoppedState{c: c} + return c, nil +} + +func (l *LinuxFactory) Load(id string) (Container, error) { + if l.Root == "" { + return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) + } + //when load, we need to check id is valid or not. + if err := l.validateID(id); err != nil { + return nil, err + } + containerRoot, err := securejoin.SecureJoin(l.Root, id) + if err != nil { + return nil, err + } + state, err := l.loadState(containerRoot, id) + if err != nil { + return nil, err + } + r := &nonChildProcess{ + processPid: state.InitProcessPid, + processStartTime: state.InitProcessStartTime, + fds: state.ExternalDescriptors, + } + c := &linuxContainer{ + initProcess: r, + initProcessStartTime: state.InitProcessStartTime, + id: id, + config: &state.Config, + initPath: l.InitPath, + initArgs: l.InitArgs, + criuPath: l.CriuPath, + newuidmapPath: l.NewuidmapPath, + newgidmapPath: l.NewgidmapPath, + cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths), + root: containerRoot, + created: state.Created, + } + c.state = &loadedState{c: c} + if err := c.refreshState(); err != nil { + return nil, err + } + if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() { + c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath) + } + return c, nil +} + +func (l *LinuxFactory) Type() string { + return "libcontainer" +} + +// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state +// This is a low level implementation detail of the reexec and should not be consumed externally +func (l *LinuxFactory) StartInitialization() (err error) { + var ( + pipefd, fifofd int + consoleSocket *os.File + envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE") + envFifoFd = os.Getenv("_LIBCONTAINER_FIFOFD") + envConsole = os.Getenv("_LIBCONTAINER_CONSOLE") + ) + + // Get the INITPIPE. + pipefd, err = strconv.Atoi(envInitPipe) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err) + } + + var ( + pipe = os.NewFile(uintptr(pipefd), "pipe") + it = initType(os.Getenv("_LIBCONTAINER_INITTYPE")) + ) + defer pipe.Close() + + // Only init processes have FIFOFD. + fifofd = -1 + if it == initStandard { + if fifofd, err = strconv.Atoi(envFifoFd); err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err) + } + } + + if envConsole != "" { + console, err := strconv.Atoi(envConsole) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err) + } + consoleSocket = os.NewFile(uintptr(console), "console-socket") + defer consoleSocket.Close() + } + + // clear the current process's environment to clean any libcontainer + // specific env vars. + os.Clearenv() + + defer func() { + // We have an error during the initialization of the container's init, + // send it back to the parent process in the form of an initError. + if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil { + fmt.Fprintln(os.Stderr, err) + return + } + if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil { + fmt.Fprintln(os.Stderr, err) + return + } + }() + defer func() { + if e := recover(); e != nil { + err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack())) + } + }() + + i, err := newContainerInit(it, pipe, consoleSocket, fifofd) + if err != nil { + return err + } + + // If Init succeeds, syscall.Exec will not return, hence none of the defers will be called. + return i.Init() +} + +func (l *LinuxFactory) loadState(root, id string) (*State, error) { + stateFilePath, err := securejoin.SecureJoin(root, stateFilename) + if err != nil { + return nil, err + } + f, err := os.Open(stateFilePath) + if err != nil { + if os.IsNotExist(err) { + return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists) + } + return nil, newGenericError(err, SystemError) + } + defer f.Close() + var state *State + if err := json.NewDecoder(f).Decode(&state); err != nil { + return nil, newGenericError(err, SystemError) + } + return state, nil +} + +func (l *LinuxFactory) validateID(id string) error { + if !idRegex.MatchString(id) || string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) { + return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat) + } + + return nil +} + +// NewuidmapPath returns an option func to configure a LinuxFactory with the +// provided .. +func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error { + return func(l *LinuxFactory) error { + l.NewuidmapPath = newuidmapPath + return nil + } +} + +// NewgidmapPath returns an option func to configure a LinuxFactory with the +// provided .. +func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error { + return func(l *LinuxFactory) error { + l.NewgidmapPath = newgidmapPath + return nil + } +} diff --git a/libcontainer/factory_linux_test.go b/libcontainer/factory_linux_test.go new file mode 100644 index 0000000..8d0ca8a --- /dev/null +++ b/libcontainer/factory_linux_test.go @@ -0,0 +1,235 @@ +// +build linux + +package libcontainer + +import ( + "io/ioutil" + "os" + "path/filepath" + "reflect" + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/mount" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/opencontainers/runtime-spec/specs-go" + + "golang.org/x/sys/unix" +) + +func newTestRoot() (string, error) { + dir, err := ioutil.TempDir("", "libcontainer") + if err != nil { + return "", err + } + return dir, nil +} + +func TestFactoryNew(t *testing.T) { + root, rerr := newTestRoot() + if rerr != nil { + t.Fatal(rerr) + } + defer os.RemoveAll(root) + factory, err := New(root, Cgroupfs) + if err != nil { + t.Fatal(err) + } + if factory == nil { + t.Fatal("factory should not be nil") + } + lfactory, ok := factory.(*LinuxFactory) + if !ok { + t.Fatal("expected linux factory returned on linux based systems") + } + if lfactory.Root != root { + t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) + } + + if factory.Type() != "libcontainer" { + t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer") + } +} + +func TestFactoryNewIntelRdt(t *testing.T) { + root, rerr := newTestRoot() + if rerr != nil { + t.Fatal(rerr) + } + defer os.RemoveAll(root) + factory, err := New(root, Cgroupfs, IntelRdtFs) + if err != nil { + t.Fatal(err) + } + if factory == nil { + t.Fatal("factory should not be nil") + } + lfactory, ok := factory.(*LinuxFactory) + if !ok { + t.Fatal("expected linux factory returned on linux based systems") + } + if lfactory.Root != root { + t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) + } + + if factory.Type() != "libcontainer" { + t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer") + } +} + +func TestFactoryNewTmpfs(t *testing.T) { + root, rerr := newTestRoot() + if rerr != nil { + t.Fatal(rerr) + } + defer os.RemoveAll(root) + factory, err := New(root, Cgroupfs, TmpfsRoot) + if err != nil { + t.Fatal(err) + } + if factory == nil { + t.Fatal("factory should not be nil") + } + lfactory, ok := factory.(*LinuxFactory) + if !ok { + t.Fatal("expected linux factory returned on linux based systems") + } + if lfactory.Root != root { + t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) + } + + if factory.Type() != "libcontainer" { + t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer") + } + mounted, err := mount.Mounted(lfactory.Root) + if err != nil { + t.Fatal(err) + } + if !mounted { + t.Fatalf("Factory Root is not mounted") + } + mounts, err := mount.GetMounts() + if err != nil { + t.Fatal(err) + } + var found bool + for _, m := range mounts { + if m.Mountpoint == lfactory.Root { + if m.Fstype != "tmpfs" { + t.Fatalf("Fstype of root: %s, expected %s", m.Fstype, "tmpfs") + } + if m.Source != "tmpfs" { + t.Fatalf("Source of root: %s, expected %s", m.Source, "tmpfs") + } + found = true + } + } + if !found { + t.Fatalf("Factory Root is not listed in mounts list") + } + defer unix.Unmount(root, unix.MNT_DETACH) +} + +func TestFactoryLoadNotExists(t *testing.T) { + root, rerr := newTestRoot() + if rerr != nil { + t.Fatal(rerr) + } + defer os.RemoveAll(root) + factory, err := New(root, Cgroupfs) + if err != nil { + t.Fatal(err) + } + _, err = factory.Load("nocontainer") + if err == nil { + t.Fatal("expected nil error loading non-existing container") + } + lerr, ok := err.(Error) + if !ok { + t.Fatal("expected libcontainer error type") + } + if lerr.Code() != ContainerNotExists { + t.Fatalf("expected error code %s but received %s", ContainerNotExists, lerr.Code()) + } +} + +func TestFactoryLoadContainer(t *testing.T) { + root, err := newTestRoot() + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(root) + // setup default container config and state for mocking + var ( + id = "1" + expectedHooks = &configs.Hooks{ + Prestart: []configs.Hook{ + configs.CommandHook{Command: configs.Command{Path: "prestart-hook"}}, + }, + Poststart: []configs.Hook{ + configs.CommandHook{Command: configs.Command{Path: "poststart-hook"}}, + }, + Poststop: []configs.Hook{ + unserializableHook{}, + configs.CommandHook{Command: configs.Command{Path: "poststop-hook"}}, + }, + } + expectedConfig = &configs.Config{ + Rootfs: "/mycontainer/root", + Hooks: expectedHooks, + } + expectedState = &State{ + BaseState: BaseState{ + InitProcessPid: 1024, + Config: *expectedConfig, + }, + } + ) + if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil { + t.Fatal(err) + } + if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil { + t.Fatal(err) + } + factory, err := New(root, Cgroupfs, IntelRdtFs) + if err != nil { + t.Fatal(err) + } + container, err := factory.Load(id) + if err != nil { + t.Fatal(err) + } + if container.ID() != id { + t.Fatalf("expected container id %q but received %q", id, container.ID()) + } + config := container.Config() + if config.Rootfs != expectedConfig.Rootfs { + t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs) + } + expectedHooks.Poststop = expectedHooks.Poststop[1:] // expect unserializable hook to be skipped + if !reflect.DeepEqual(config.Hooks, expectedHooks) { + t.Fatalf("expects hooks %q but received %q", expectedHooks, config.Hooks) + } + lcontainer, ok := container.(*linuxContainer) + if !ok { + t.Fatal("expected linux container on linux based systems") + } + if lcontainer.initProcess.pid() != expectedState.InitProcessPid { + t.Fatalf("expected init pid %d but received %d", expectedState.InitProcessPid, lcontainer.initProcess.pid()) + } +} + +func marshal(path string, v interface{}) error { + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + return utils.WriteJSON(f, v) +} + +type unserializableHook struct{} + +func (unserializableHook) Run(*specs.State) error { + return nil +} diff --git a/libcontainer/generic_error.go b/libcontainer/generic_error.go new file mode 100644 index 0000000..6e7de2f --- /dev/null +++ b/libcontainer/generic_error.go @@ -0,0 +1,92 @@ +package libcontainer + +import ( + "fmt" + "io" + "text/template" + "time" + + "github.com/opencontainers/runc/libcontainer/stacktrace" +) + +var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}} +Code: {{.ECode}} +{{if .Message }} +Message: {{.Message}} +{{end}} +Frames:{{range $i, $frame := .Stack.Frames}} +--- +{{$i}}: {{$frame.Function}} +Package: {{$frame.Package}} +File: {{$frame.File}}@{{$frame.Line}}{{end}} +`)) + +func newGenericError(err error, c ErrorCode) Error { + if le, ok := err.(Error); ok { + return le + } + gerr := &genericError{ + Timestamp: time.Now(), + Err: err, + ECode: c, + Stack: stacktrace.Capture(1), + } + if err != nil { + gerr.Message = err.Error() + } + return gerr +} + +func newSystemError(err error) Error { + return createSystemError(err, "") +} + +func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error { + return createSystemError(err, fmt.Sprintf(cause, v...)) +} + +func newSystemErrorWithCause(err error, cause string) Error { + return createSystemError(err, cause) +} + +// createSystemError creates the specified error with the correct number of +// stack frames skipped. This is only to be called by the other functions for +// formatting the error. +func createSystemError(err error, cause string) Error { + gerr := &genericError{ + Timestamp: time.Now(), + Err: err, + ECode: SystemError, + Cause: cause, + Stack: stacktrace.Capture(2), + } + if err != nil { + gerr.Message = err.Error() + } + return gerr +} + +type genericError struct { + Timestamp time.Time + ECode ErrorCode + Err error `json:"-"` + Cause string + Message string + Stack stacktrace.Stacktrace +} + +func (e *genericError) Error() string { + if e.Cause == "" { + return e.Message + } + frame := e.Stack.Frames[0] + return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message) +} + +func (e *genericError) Code() ErrorCode { + return e.ECode +} + +func (e *genericError) Detail(w io.Writer) error { + return errorTemplate.Execute(w, e) +} diff --git a/libcontainer/generic_error_test.go b/libcontainer/generic_error_test.go new file mode 100644 index 0000000..8fbdd4d --- /dev/null +++ b/libcontainer/generic_error_test.go @@ -0,0 +1,49 @@ +package libcontainer + +import ( + "fmt" + "io/ioutil" + "testing" +) + +func TestErrorDetail(t *testing.T) { + err := newGenericError(fmt.Errorf("test error"), SystemError) + if derr := err.Detail(ioutil.Discard); derr != nil { + t.Fatal(derr) + } +} + +func TestErrorWithCode(t *testing.T) { + err := newGenericError(fmt.Errorf("test error"), SystemError) + if code := err.Code(); code != SystemError { + t.Fatalf("expected err code %q but %q", SystemError, code) + } +} + +func TestErrorWithError(t *testing.T) { + cc := []struct { + errmsg string + cause string + }{ + { + errmsg: "test error", + }, + { + errmsg: "test error", + cause: "test", + }, + } + + for _, v := range cc { + err := newSystemErrorWithCause(fmt.Errorf(v.errmsg), v.cause) + + msg := err.Error() + if v.cause == "" && msg != v.errmsg { + t.Fatalf("expected err(%q) equal errmsg(%q)", msg, v.errmsg) + } + if v.cause != "" && msg == v.errmsg { + t.Fatalf("unexpected err(%q) equal errmsg(%q)", msg, v.errmsg) + } + + } +} diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go new file mode 100644 index 0000000..c1b1560 --- /dev/null +++ b/libcontainer/init_linux.go @@ -0,0 +1,537 @@ +// +build linux + +package libcontainer + +import ( + "encoding/json" + "fmt" + "io" + "io/ioutil" + "net" + "os" + "strings" + "syscall" // only for Errno + "unsafe" + + "golang.org/x/sys/unix" + + "github.com/containerd/console" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/user" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/vishvananda/netlink" +) + +type initType string + +const ( + initSetns initType = "setns" + initStandard initType = "standard" +) + +type pid struct { + Pid int `json:"pid"` + PidFirstChild int `json:"pid_first"` +} + +// network is an internal struct used to setup container networks. +type network struct { + configs.Network + + // TempVethPeerName is a unique temporary veth peer name that was placed into + // the container's namespace. + TempVethPeerName string `json:"temp_veth_peer_name"` +} + +// initConfig is used for transferring parameters from Exec() to Init() +type initConfig struct { + Args []string `json:"args"` + Env []string `json:"env"` + Cwd string `json:"cwd"` + Capabilities *configs.Capabilities `json:"capabilities"` + ProcessLabel string `json:"process_label"` + AppArmorProfile string `json:"apparmor_profile"` + NoNewPrivileges bool `json:"no_new_privileges"` + User string `json:"user"` + AdditionalGroups []string `json:"additional_groups"` + Config *configs.Config `json:"config"` + Networks []*network `json:"network"` + PassedFilesCount int `json:"passed_files_count"` + ContainerId string `json:"containerid"` + Rlimits []configs.Rlimit `json:"rlimits"` + CreateConsole bool `json:"create_console"` + ConsoleWidth uint16 `json:"console_width"` + ConsoleHeight uint16 `json:"console_height"` + RootlessEUID bool `json:"rootless_euid,omitempty"` + RootlessCgroups bool `json:"rootless_cgroups,omitempty"` +} + +type initer interface { + Init() error +} + +func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) { + var config *initConfig + if err := json.NewDecoder(pipe).Decode(&config); err != nil { + return nil, err + } + if err := populateProcessEnvironment(config.Env); err != nil { + return nil, err + } + switch t { + case initSetns: + return &linuxSetnsInit{ + pipe: pipe, + consoleSocket: consoleSocket, + config: config, + }, nil + case initStandard: + return &linuxStandardInit{ + pipe: pipe, + consoleSocket: consoleSocket, + parentPid: unix.Getppid(), + config: config, + fifoFd: fifoFd, + }, nil + } + return nil, fmt.Errorf("unknown init type %q", t) +} + +// populateProcessEnvironment loads the provided environment variables into the +// current processes's environment. +func populateProcessEnvironment(env []string) error { + for _, pair := range env { + p := strings.SplitN(pair, "=", 2) + if len(p) < 2 { + return fmt.Errorf("invalid environment '%v'", pair) + } + if err := os.Setenv(p[0], p[1]); err != nil { + return err + } + } + return nil +} + +// finalizeNamespace drops the caps, sets the correct user +// and working dir, and closes any leaked file descriptors +// before executing the command inside the namespace +func finalizeNamespace(config *initConfig) error { + // Ensure that all unwanted fds we may have accidentally + // inherited are marked close-on-exec so they stay out of the + // container + if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil { + return errors.Wrap(err, "close exec fds") + } + + if config.Cwd != "" { + if err := unix.Chdir(config.Cwd); err != nil { + return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err) + } + } + + capabilities := &configs.Capabilities{} + if config.Capabilities != nil { + capabilities = config.Capabilities + } else if config.Config.Capabilities != nil { + capabilities = config.Config.Capabilities + } + w, err := newContainerCapList(capabilities) + if err != nil { + return err + } + // drop capabilities in bounding set before changing user + if err := w.ApplyBoundingSet(); err != nil { + return errors.Wrap(err, "apply bounding set") + } + // preserve existing capabilities while we change users + if err := system.SetKeepCaps(); err != nil { + return errors.Wrap(err, "set keep caps") + } + if err := setupUser(config); err != nil { + return errors.Wrap(err, "setup user") + } + if err := system.ClearKeepCaps(); err != nil { + return errors.Wrap(err, "clear keep caps") + } + if err := w.ApplyCaps(); err != nil { + return errors.Wrap(err, "apply caps") + } + return nil +} + +// setupConsole sets up the console from inside the container, and sends the +// master pty fd to the config.Pipe (using cmsg). This is done to ensure that +// consoles are scoped to a container properly (see runc#814 and the many +// issues related to that). This has to be run *after* we've pivoted to the new +// rootfs (and the users' configuration is entirely set up). +func setupConsole(socket *os.File, config *initConfig, mount bool) error { + defer socket.Close() + // At this point, /dev/ptmx points to something that we would expect. We + // used to change the owner of the slave path, but since the /dev/pts mount + // can have gid=X set (at the users' option). So touching the owner of the + // slave PTY is not necessary, as the kernel will handle that for us. Note + // however, that setupUser (specifically fixStdioPermissions) *will* change + // the UID owner of the console to be the user the process will run as (so + // they can actually control their console). + + pty, slavePath, err := console.NewPty() + if err != nil { + return err + } + + if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 { + err = pty.Resize(console.WinSize{ + Height: config.ConsoleHeight, + Width: config.ConsoleWidth, + }) + + if err != nil { + return err + } + } + + // After we return from here, we don't need the console anymore. + defer pty.Close() + + // Mount the console inside our rootfs. + if mount { + if err := mountConsole(slavePath); err != nil { + return err + } + } + // While we can access console.master, using the API is a good idea. + if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil { + return err + } + // Now, dup over all the things. + return dupStdio(slavePath) +} + +// syncParentReady sends to the given pipe a JSON payload which indicates that +// the init is ready to Exec the child process. It then waits for the parent to +// indicate that it is cleared to Exec. +func syncParentReady(pipe io.ReadWriter) error { + // Tell parent. + if err := writeSync(pipe, procReady); err != nil { + return err + } + + // Wait for parent to give the all-clear. + return readSync(pipe, procRun) +} + +// syncParentHooks sends to the given pipe a JSON payload which indicates that +// the parent should execute pre-start hooks. It then waits for the parent to +// indicate that it is cleared to resume. +func syncParentHooks(pipe io.ReadWriter) error { + // Tell parent. + if err := writeSync(pipe, procHooks); err != nil { + return err + } + + // Wait for parent to give the all-clear. + return readSync(pipe, procResume) +} + +// setupUser changes the groups, gid, and uid for the user inside the container +func setupUser(config *initConfig) error { + // Set up defaults. + defaultExecUser := user.ExecUser{ + Uid: 0, + Gid: 0, + Home: "/", + } + + passwdPath, err := user.GetPasswdPath() + if err != nil { + return err + } + + groupPath, err := user.GetGroupPath() + if err != nil { + return err + } + + execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) + if err != nil { + return err + } + + var addGroups []int + if len(config.AdditionalGroups) > 0 { + addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath) + if err != nil { + return err + } + } + + // Rather than just erroring out later in setuid(2) and setgid(2), check + // that the user is mapped here. + if _, err := config.Config.HostUID(execUser.Uid); err != nil { + return fmt.Errorf("cannot set uid to unmapped user in user namespace") + } + if _, err := config.Config.HostGID(execUser.Gid); err != nil { + return fmt.Errorf("cannot set gid to unmapped user in user namespace") + } + + if config.RootlessEUID { + // We cannot set any additional groups in a rootless container and thus + // we bail if the user asked us to do so. TODO: We currently can't do + // this check earlier, but if libcontainer.Process.User was typesafe + // this might work. + if len(addGroups) > 0 { + return fmt.Errorf("cannot set any additional groups in a rootless container") + } + } + + // Before we change to the container's user make sure that the processes + // STDIO is correctly owned by the user that we are switching to. + if err := fixStdioPermissions(config, execUser); err != nil { + return err + } + + setgroups, err := ioutil.ReadFile("/proc/self/setgroups") + if err != nil && !os.IsNotExist(err) { + return err + } + + // This isn't allowed in an unprivileged user namespace since Linux 3.19. + // There's nothing we can do about /etc/group entries, so we silently + // ignore setting groups here (since the user didn't explicitly ask us to + // set the group). + allowSupGroups := !config.RootlessEUID && strings.TrimSpace(string(setgroups)) != "deny" + + if allowSupGroups { + suppGroups := append(execUser.Sgids, addGroups...) + if err := unix.Setgroups(suppGroups); err != nil { + return err + } + } + + if err := system.Setgid(execUser.Gid); err != nil { + return err + } + if err := system.Setuid(execUser.Uid); err != nil { + return err + } + + // if we didn't get HOME already, set it based on the user's HOME + if envHome := os.Getenv("HOME"); envHome == "" { + if err := os.Setenv("HOME", execUser.Home); err != nil { + return err + } + } + return nil +} + +// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user. +// The ownership needs to match because it is created outside of the container and needs to be +// localized. +func fixStdioPermissions(config *initConfig, u *user.ExecUser) error { + var null unix.Stat_t + if err := unix.Stat("/dev/null", &null); err != nil { + return err + } + for _, fd := range []uintptr{ + os.Stdin.Fd(), + os.Stderr.Fd(), + os.Stdout.Fd(), + } { + var s unix.Stat_t + if err := unix.Fstat(int(fd), &s); err != nil { + return err + } + + // Skip chown of /dev/null if it was used as one of the STDIO fds. + if s.Rdev == null.Rdev { + continue + } + + // We only change the uid owner (as it is possible for the mount to + // prefer a different gid, and there's no reason for us to change it). + // The reason why we don't just leave the default uid=X mount setup is + // that users expect to be able to actually use their console. Without + // this code, you couldn't effectively run as a non-root user inside a + // container and also have a console set up. + if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil { + // If we've hit an EINVAL then s.Gid isn't mapped in the user + // namespace. If we've hit an EPERM then the inode's current owner + // is not mapped in our user namespace (in particular, + // privileged_wrt_inode_uidgid() has failed). In either case, we + // are in a configuration where it's better for us to just not + // touch the stdio rather than bail at this point. + if err == unix.EINVAL || err == unix.EPERM { + continue + } + return err + } + } + return nil +} + +// setupNetwork sets up and initializes any network interface inside the container. +func setupNetwork(config *initConfig) error { + for _, config := range config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + if err := strategy.initialize(config); err != nil { + return err + } + } + return nil +} + +func setupRoute(config *configs.Config) error { + for _, config := range config.Routes { + _, dst, err := net.ParseCIDR(config.Destination) + if err != nil { + return err + } + src := net.ParseIP(config.Source) + if src == nil { + return fmt.Errorf("Invalid source for route: %s", config.Source) + } + gw := net.ParseIP(config.Gateway) + if gw == nil { + return fmt.Errorf("Invalid gateway for route: %s", config.Gateway) + } + l, err := netlink.LinkByName(config.InterfaceName) + if err != nil { + return err + } + route := &netlink.Route{ + Scope: netlink.SCOPE_UNIVERSE, + Dst: dst, + Src: src, + Gw: gw, + LinkIndex: l.Attrs().Index, + } + if err := netlink.RouteAdd(route); err != nil { + return err + } + } + return nil +} + +func setupRlimits(limits []configs.Rlimit, pid int) error { + for _, rlimit := range limits { + if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil { + return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) + } + } + return nil +} + +const _P_PID = 1 + +type siginfo struct { + si_signo int32 + si_errno int32 + si_code int32 + // below here is a union; si_pid is the only field we use + si_pid int32 + // Pad to 128 bytes as detailed in blockUntilWaitable + pad [96]byte +} + +// isWaitable returns true if the process has exited false otherwise. +// Its based off blockUntilWaitable in src/os/wait_waitid.go +func isWaitable(pid int) (bool, error) { + si := &siginfo{} + _, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0) + if e != 0 { + return false, os.NewSyscallError("waitid", e) + } + + return si.si_pid != 0, nil +} + +// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise +func isNoChildren(err error) bool { + switch err := err.(type) { + case syscall.Errno: + if err == unix.ECHILD { + return true + } + case *os.SyscallError: + if err.Err == unix.ECHILD { + return true + } + } + return false +} + +// signalAllProcesses freezes then iterates over all the processes inside the +// manager's cgroups sending the signal s to them. +// If s is SIGKILL then it will wait for each process to exit. +// For all other signals it will check if the process is ready to report its +// exit status and only if it is will a wait be performed. +func signalAllProcesses(m cgroups.Manager, s os.Signal) error { + var procs []*os.Process + if err := m.Freeze(configs.Frozen); err != nil { + logrus.Warn(err) + } + pids, err := m.GetAllPids() + if err != nil { + m.Freeze(configs.Thawed) + return err + } + for _, pid := range pids { + p, err := os.FindProcess(pid) + if err != nil { + logrus.Warn(err) + continue + } + procs = append(procs, p) + if err := p.Signal(s); err != nil { + logrus.Warn(err) + } + } + if err := m.Freeze(configs.Thawed); err != nil { + logrus.Warn(err) + } + + subreaper, err := system.GetSubreaper() + if err != nil { + // The error here means that PR_GET_CHILD_SUBREAPER is not + // supported because this code might run on a kernel older + // than 3.4. We don't want to throw an error in that case, + // and we simplify things, considering there is no subreaper + // set. + subreaper = 0 + } + + for _, p := range procs { + if s != unix.SIGKILL { + if ok, err := isWaitable(p.Pid); err != nil { + if !isNoChildren(err) { + logrus.Warn("signalAllProcesses: ", p.Pid, err) + } + continue + } else if !ok { + // Not ready to report so don't wait + continue + } + } + + // In case a subreaper has been setup, this code must not + // wait for the process. Otherwise, we cannot be sure the + // current process will be reaped by the subreaper, while + // the subreaper might be waiting for this process in order + // to retrieve its exit code. + if subreaper == 0 { + if _, err := p.Wait(); err != nil { + if !isNoChildren(err) { + logrus.Warn("wait: ", err) + } + } + } + } + return nil +} diff --git a/libcontainer/integration/checkpoint_test.go b/libcontainer/integration/checkpoint_test.go new file mode 100644 index 0000000..cdb6810 --- /dev/null +++ b/libcontainer/integration/checkpoint_test.go @@ -0,0 +1,264 @@ +package integration + +import ( + "bufio" + "bytes" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + + "golang.org/x/sys/unix" +) + +func showFile(t *testing.T, fname string) error { + t.Logf("=== %s ===\n", fname) + + f, err := os.Open(fname) + if err != nil { + t.Log(err) + return err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + t.Log(scanner.Text()) + } + + if err := scanner.Err(); err != nil { + return err + } + + t.Logf("=== END ===\n") + + return nil +} + +func TestUsernsCheckpoint(t *testing.T) { + t.Skip("Ubuntu kernel is broken to run criu (#2196, #2198)") + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + cmd := exec.Command("criu", "check", "--feature", "userns") + if err := cmd.Run(); err != nil { + t.Skip("Unable to c/r a container with userns") + } + testCheckpoint(t, true) +} + +func TestCheckpoint(t *testing.T) { + t.Skip("Ubuntu kernel is broken to run criu (#2196, #2198)") + testCheckpoint(t, false) +} + +func testCheckpoint(t *testing.T, userns bool) { + if testing.Short() { + return + } + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + + root, err := newTestRoot() + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(root) + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + + config.Mounts = append(config.Mounts, &configs.Mount{ + Destination: "/sys/fs/cgroup", + Device: "cgroup", + Flags: defaultMountFlags | unix.MS_RDONLY, + }) + + if userns { + config.UidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config.GidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + } + + factory, err := libcontainer.New(root, libcontainer.Cgroupfs) + + if err != nil { + t.Fatal(err) + } + + container, err := factory.Create("test", config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + stdinR, stdinW, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + + var stdout bytes.Buffer + + pconfig := libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Stdout: &stdout, + Init: true, + } + + err = container.Run(&pconfig) + stdinR.Close() + defer stdinW.Close() + if err != nil { + t.Fatal(err) + } + + pid, err := pconfig.Pid() + if err != nil { + t.Fatal(err) + } + + process, err := os.FindProcess(pid) + if err != nil { + t.Fatal(err) + } + + parentDir, err := ioutil.TempDir("", "criu-parent") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(parentDir) + + preDumpOpts := &libcontainer.CriuOpts{ + ImagesDirectory: parentDir, + WorkDirectory: parentDir, + PreDump: true, + } + preDumpLog := filepath.Join(preDumpOpts.WorkDirectory, "dump.log") + + if err := container.Checkpoint(preDumpOpts); err != nil { + showFile(t, preDumpLog) + t.Fatal(err) + } + + state, err := container.Status() + if err != nil { + t.Fatal(err) + } + + if state != libcontainer.Running { + t.Fatal("Unexpected preDump state: ", state) + } + + imagesDir, err := ioutil.TempDir("", "criu") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(imagesDir) + + checkpointOpts := &libcontainer.CriuOpts{ + ImagesDirectory: imagesDir, + WorkDirectory: imagesDir, + ParentImage: "../criu-parent", + } + dumpLog := filepath.Join(checkpointOpts.WorkDirectory, "dump.log") + restoreLog := filepath.Join(checkpointOpts.WorkDirectory, "restore.log") + + if err := container.Checkpoint(checkpointOpts); err != nil { + showFile(t, dumpLog) + t.Fatal(err) + } + + state, err = container.Status() + if err != nil { + t.Fatal(err) + } + + if state != libcontainer.Stopped { + t.Fatal("Unexpected state checkpoint: ", state) + } + + stdinW.Close() + _, err = process.Wait() + if err != nil { + t.Fatal(err) + } + + // reload the container + container, err = factory.Load("test") + if err != nil { + t.Fatal(err) + } + + restoreStdinR, restoreStdinW, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + + restoreProcessConfig := &libcontainer.Process{ + Cwd: "/", + Stdin: restoreStdinR, + Stdout: &stdout, + Init: true, + } + + err = container.Restore(restoreProcessConfig, checkpointOpts) + restoreStdinR.Close() + defer restoreStdinW.Close() + if err != nil { + showFile(t, restoreLog) + t.Fatal(err) + } + + state, err = container.Status() + if err != nil { + t.Fatal(err) + } + if state != libcontainer.Running { + t.Fatal("Unexpected restore state: ", state) + } + + pid, err = restoreProcessConfig.Pid() + if err != nil { + t.Fatal(err) + } + + process, err = os.FindProcess(pid) + if err != nil { + t.Fatal(err) + } + + _, err = restoreStdinW.WriteString("Hello!") + if err != nil { + t.Fatal(err) + } + + restoreStdinW.Close() + s, err := process.Wait() + if err != nil { + t.Fatal(err) + } + + if !s.Success() { + t.Fatal(s.String(), pid) + } + + output := string(stdout.Bytes()) + if !strings.Contains(output, "Hello!") { + t.Fatal("Did not restore the pipe correctly:", output) + } +} diff --git a/libcontainer/integration/doc.go b/libcontainer/integration/doc.go new file mode 100644 index 0000000..87545bc --- /dev/null +++ b/libcontainer/integration/doc.go @@ -0,0 +1,2 @@ +// integration is used for integration testing of libcontainer +package integration diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go new file mode 100644 index 0000000..7822fa8 --- /dev/null +++ b/libcontainer/integration/exec_test.go @@ -0,0 +1,1793 @@ +package integration + +import ( + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "reflect" + "strconv" + "strings" + "testing" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runtime-spec/specs-go" + + "golang.org/x/sys/unix" +) + +func TestExecPS(t *testing.T) { + testExecPS(t, false) +} + +func TestUsernsExecPS(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + testExecPS(t, true) +} + +func testExecPS(t *testing.T, userns bool) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + if userns { + config.UidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config.GidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + } + + buffers, exitCode, err := runContainer(config, "", "ps", "-o", "pid,user,comm") + if err != nil { + t.Fatalf("%s: %s", buffers, err) + } + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + lines := strings.Split(buffers.Stdout.String(), "\n") + if len(lines) < 2 { + t.Fatalf("more than one process running for output %q", buffers.Stdout.String()) + } + expected := `1 root ps` + actual := strings.Trim(lines[1], "\n ") + if actual != expected { + t.Fatalf("expected output %q but received %q", expected, actual) + } +} + +func TestIPCPrivate(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/ipc") + ok(t, err) + + config := newTemplateConfig(rootfs) + buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") + ok(t, err) + + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l { + t.Fatalf("ipc link should be private to the container but equals host %q %q", actual, l) + } +} + +func TestIPCHost(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/ipc") + ok(t, err) + + config := newTemplateConfig(rootfs) + config.Namespaces.Remove(configs.NEWIPC) + buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") + ok(t, err) + + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { + t.Fatalf("ipc link not equal to host link %q %q", actual, l) + } +} + +func TestIPCJoinPath(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/ipc") + ok(t, err) + + config := newTemplateConfig(rootfs) + config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipc") + + buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") + ok(t, err) + + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { + t.Fatalf("ipc link not equal to host link %q %q", actual, l) + } +} + +func TestIPCBadPath(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.Namespaces.Add(configs.NEWIPC, "/proc/1/ns/ipcc") + + _, _, err = runContainer(config, "", "true") + if err == nil { + t.Fatal("container succeeded with bad ipc path") + } +} + +func TestRlimit(t *testing.T) { + testRlimit(t, false) +} + +func TestUsernsRlimit(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + + testRlimit(t, true) +} + +func testRlimit(t *testing.T, userns bool) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + if userns { + config.UidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config.GidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + } + + // ensure limit is lower than what the config requests to test that in a user namespace + // the Setrlimit call happens early enough that we still have permissions to raise the limit. + ok(t, unix.Setrlimit(unix.RLIMIT_NOFILE, &unix.Rlimit{ + Max: 1024, + Cur: 1024, + })) + + out, _, err := runContainer(config, "", "/bin/sh", "-c", "ulimit -n") + ok(t, err) + if limit := strings.TrimSpace(out.Stdout.String()); limit != "1025" { + t.Fatalf("expected rlimit to be 1025, got %s", limit) + } +} + +func TestEnter(t *testing.T) { + if testing.Short() { + return + } + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + + container, err := newContainerWithName("test", config) + ok(t, err) + defer container.Destroy() + + // Execute a first process in the container + stdinR, stdinW, err := os.Pipe() + ok(t, err) + + var stdout, stdout2 bytes.Buffer + + pconfig := libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"}, + Env: standardEnvironment, + Stdin: stdinR, + Stdout: &stdout, + Init: true, + } + err = container.Run(&pconfig) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + pid, err := pconfig.Pid() + ok(t, err) + + // Execute another process in the container + stdinR2, stdinW2, err := os.Pipe() + ok(t, err) + pconfig2 := libcontainer.Process{ + Cwd: "/", + Env: standardEnvironment, + } + pconfig2.Args = []string{"sh", "-c", "cat && readlink /proc/self/ns/pid"} + pconfig2.Stdin = stdinR2 + pconfig2.Stdout = &stdout2 + + err = container.Run(&pconfig2) + stdinR2.Close() + defer stdinW2.Close() + ok(t, err) + + pid2, err := pconfig2.Pid() + ok(t, err) + + processes, err := container.Processes() + ok(t, err) + + n := 0 + for i := range processes { + if processes[i] == pid || processes[i] == pid2 { + n++ + } + } + if n != 2 { + t.Fatal("unexpected number of processes", processes, pid, pid2) + } + + // Wait processes + stdinW2.Close() + waitProcess(&pconfig2, t) + + stdinW.Close() + waitProcess(&pconfig, t) + + // Check that both processes live in the same pidns + pidns := string(stdout.Bytes()) + ok(t, err) + + pidns2 := string(stdout2.Bytes()) + ok(t, err) + + if pidns != pidns2 { + t.Fatal("The second process isn't in the required pid namespace", pidns, pidns2) + } +} + +func TestProcessEnv(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + + container, err := newContainerWithName("test", config) + ok(t, err) + defer container.Destroy() + + var stdout bytes.Buffer + pconfig := libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "env"}, + Env: []string{ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "HOSTNAME=integration", + "TERM=xterm", + "FOO=BAR", + }, + Stdin: nil, + Stdout: &stdout, + Init: true, + } + err = container.Run(&pconfig) + ok(t, err) + + // Wait for process + waitProcess(&pconfig, t) + + outputEnv := string(stdout.Bytes()) + + // Check that the environment has the key/value pair we added + if !strings.Contains(outputEnv, "FOO=BAR") { + t.Fatal("Environment doesn't have the expected FOO=BAR key/value pair: ", outputEnv) + } + + // Make sure that HOME is set + if !strings.Contains(outputEnv, "HOME=/root") { + t.Fatal("Environment doesn't have HOME set: ", outputEnv) + } +} + +func TestProcessEmptyCaps(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.Capabilities = nil + + container, err := newContainerWithName("test", config) + ok(t, err) + defer container.Destroy() + + var stdout bytes.Buffer + pconfig := libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "cat /proc/self/status"}, + Env: standardEnvironment, + Stdin: nil, + Stdout: &stdout, + Init: true, + } + err = container.Run(&pconfig) + ok(t, err) + + // Wait for process + waitProcess(&pconfig, t) + + outputStatus := string(stdout.Bytes()) + + lines := strings.Split(outputStatus, "\n") + + effectiveCapsLine := "" + for _, l := range lines { + line := strings.TrimSpace(l) + if strings.Contains(line, "CapEff:") { + effectiveCapsLine = line + break + } + } + + if effectiveCapsLine == "" { + t.Fatal("Couldn't find effective caps: ", outputStatus) + } +} + +func TestProcessCaps(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + + container, err := newContainerWithName("test", config) + ok(t, err) + defer container.Destroy() + + var stdout bytes.Buffer + pconfig := libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "cat /proc/self/status"}, + Env: standardEnvironment, + Stdin: nil, + Stdout: &stdout, + Capabilities: &configs.Capabilities{}, + Init: true, + } + pconfig.Capabilities.Bounding = append(config.Capabilities.Bounding, "CAP_NET_ADMIN") + pconfig.Capabilities.Permitted = append(config.Capabilities.Permitted, "CAP_NET_ADMIN") + pconfig.Capabilities.Effective = append(config.Capabilities.Effective, "CAP_NET_ADMIN") + pconfig.Capabilities.Inheritable = append(config.Capabilities.Inheritable, "CAP_NET_ADMIN") + err = container.Run(&pconfig) + ok(t, err) + + // Wait for process + waitProcess(&pconfig, t) + + outputStatus := string(stdout.Bytes()) + + lines := strings.Split(outputStatus, "\n") + + effectiveCapsLine := "" + for _, l := range lines { + line := strings.TrimSpace(l) + if strings.Contains(line, "CapEff:") { + effectiveCapsLine = line + break + } + } + + if effectiveCapsLine == "" { + t.Fatal("Couldn't find effective caps: ", outputStatus) + } + + parts := strings.Split(effectiveCapsLine, ":") + effectiveCapsStr := strings.TrimSpace(parts[1]) + + effectiveCaps, err := strconv.ParseUint(effectiveCapsStr, 16, 64) + if err != nil { + t.Fatal("Could not parse effective caps", err) + } + + var netAdminMask uint64 + var netAdminBit uint + netAdminBit = 12 // from capability.h + netAdminMask = 1 << netAdminBit + if effectiveCaps&netAdminMask != netAdminMask { + t.Fatal("CAP_NET_ADMIN is not set as expected") + } +} + +func TestAdditionalGroups(t *testing.T) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + + container, err := newContainerWithName("test", config) + ok(t, err) + defer container.Destroy() + + var stdout bytes.Buffer + pconfig := libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "id", "-Gn"}, + Env: standardEnvironment, + Stdin: nil, + Stdout: &stdout, + AdditionalGroups: []string{"plugdev", "audio"}, + Init: true, + } + err = container.Run(&pconfig) + ok(t, err) + + // Wait for process + waitProcess(&pconfig, t) + + outputGroups := string(stdout.Bytes()) + + // Check that the groups output has the groups that we specified + if !strings.Contains(outputGroups, "audio") { + t.Fatalf("Listed groups do not contain the audio group as expected: %v", outputGroups) + } + + if !strings.Contains(outputGroups, "plugdev") { + t.Fatalf("Listed groups do not contain the plugdev group as expected: %v", outputGroups) + } +} + +func TestFreeze(t *testing.T) { + testFreeze(t, false) +} + +func TestSystemdFreeze(t *testing.T) { + if !systemd.UseSystemd() { + t.Skip("Systemd is unsupported") + } + testFreeze(t, true) +} + +func testFreeze(t *testing.T, systemd bool) { + if testing.Short() { + return + } + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + container, err := newContainerWithName("test", config) + ok(t, err) + defer container.Destroy() + + stdinR, stdinW, err := os.Pipe() + ok(t, err) + + pconfig := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + err = container.Run(pconfig) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + err = container.Pause() + ok(t, err) + state, err := container.Status() + ok(t, err) + err = container.Resume() + ok(t, err) + if state != libcontainer.Paused { + t.Fatal("Unexpected state: ", state) + } + + stdinW.Close() + waitProcess(pconfig, t) +} + +func TestCpuShares(t *testing.T) { + testCpuShares(t, false) +} + +func TestCpuSharesSystemd(t *testing.T) { + if !systemd.UseSystemd() { + t.Skip("Systemd is unsupported") + } + testCpuShares(t, true) +} + +func testCpuShares(t *testing.T, systemd bool) { + if testing.Short() { + return + } + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + if systemd { + config.Cgroups.Parent = "system.slice" + } + config.Cgroups.Resources.CpuShares = 1 + + _, _, err = runContainer(config, "", "ps") + if err == nil { + t.Fatalf("runContainer should failed with invalid CpuShares") + } +} + +func TestPids(t *testing.T) { + testPids(t, false) +} + +func TestPidsSystemd(t *testing.T) { + if !systemd.UseSystemd() { + t.Skip("Systemd is unsupported") + } + testPids(t, true) +} + +func testPids(t *testing.T, systemd bool) { + if testing.Short() { + return + } + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + if systemd { + config.Cgroups.Parent = "system.slice" + } + config.Cgroups.Resources.PidsLimit = -1 + + // Running multiple processes. + _, ret, err := runContainer(config, "", "/bin/sh", "-c", "/bin/true | /bin/true | /bin/true | /bin/true") + if err != nil && strings.Contains(err.Error(), "no such directory for pids.max") { + t.Skip("PIDs cgroup is unsupported") + } + ok(t, err) + + if ret != 0 { + t.Fatalf("expected fork() to succeed with no pids limit") + } + + // Enforce a permissive limit. This needs to be fairly hand-wavey due to the + // issues with running Go binaries with pids restrictions (see below). + config.Cgroups.Resources.PidsLimit = 64 + _, ret, err = runContainer(config, "", "/bin/sh", "-c", ` + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true`) + if err != nil && strings.Contains(err.Error(), "no such directory for pids.max") { + t.Skip("PIDs cgroup is unsupported") + } + ok(t, err) + + if ret != 0 { + t.Fatalf("expected fork() to succeed with permissive pids limit") + } + + // Enforce a restrictive limit. 64 * /bin/true + 1 * shell should cause this + // to fail reliability. + config.Cgroups.Resources.PidsLimit = 64 + out, _, err := runContainer(config, "", "/bin/sh", "-c", ` + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true | + /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | /bin/true | bin/true | /bin/true`) + if err != nil && strings.Contains(err.Error(), "no such directory for pids.max") { + t.Skip("PIDs cgroup is unsupported") + } + if err != nil && !strings.Contains(out.String(), "sh: can't fork") { + ok(t, err) + } + + if err == nil { + t.Fatalf("expected fork() to fail with restrictive pids limit") + } + + // Minimal restrictions are not really supported, due to quirks in using Go + // due to the fact that it spawns random processes. While we do our best with + // late setting cgroup values, it's just too unreliable with very small pids.max. + // As such, we don't test that case. YMMV. +} + +func TestRunWithKernelMemory(t *testing.T) { + testRunWithKernelMemory(t, false) +} + +func TestRunWithKernelMemorySystemd(t *testing.T) { + if !systemd.UseSystemd() { + t.Skip("Systemd is unsupported") + } + testRunWithKernelMemory(t, true) +} + +func testRunWithKernelMemory(t *testing.T, systemd bool) { + if testing.Short() { + return + } + if cgroups.IsCgroup2UnifiedMode() { + t.Skip("cgroup v1 is not supported") + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + if systemd { + config.Cgroups.Parent = "system.slice" + } + config.Cgroups.Resources.KernelMemory = 52428800 + + _, _, err = runContainer(config, "", "ps") + if err != nil { + t.Fatalf("runContainer failed with kernel memory limit: %v", err) + } +} + +func TestContainerState(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/ipc") + if err != nil { + t.Fatal(err) + } + + config := newTemplateConfig(rootfs) + config.Namespaces = configs.Namespaces([]configs.Namespace{ + {Type: configs.NEWNS}, + {Type: configs.NEWUTS}, + // host for IPC + //{Type: configs.NEWIPC}, + {Type: configs.NEWPID}, + {Type: configs.NEWNET}, + }) + + container, err := newContainerWithName("test", config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + stdinR, stdinW, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + p := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + err = container.Run(p) + if err != nil { + t.Fatal(err) + } + stdinR.Close() + defer stdinW.Close() + + st, err := container.State() + if err != nil { + t.Fatal(err) + } + + l1, err := os.Readlink(st.NamespacePaths[configs.NEWIPC]) + if err != nil { + t.Fatal(err) + } + if l1 != l { + t.Fatal("Container using non-host ipc namespace") + } + stdinW.Close() + waitProcess(p, t) +} + +func TestPassExtraFiles(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + + container, err := newContainerWithName("test", config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + var stdout bytes.Buffer + pipeout1, pipein1, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + pipeout2, pipein2, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + process := libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "cd /proc/$$/fd; echo -n *; echo -n 1 >3; echo -n 2 >4"}, + Env: []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"}, + ExtraFiles: []*os.File{pipein1, pipein2}, + Stdin: nil, + Stdout: &stdout, + Init: true, + } + err = container.Run(&process) + if err != nil { + t.Fatal(err) + } + + waitProcess(&process, t) + + out := string(stdout.Bytes()) + // fd 5 is the directory handle for /proc/$$/fd + if out != "0 1 2 3 4 5" { + t.Fatalf("expected to have the file descriptors '0 1 2 3 4 5' passed to init, got '%s'", out) + } + var buf = []byte{0} + _, err = pipeout1.Read(buf) + if err != nil { + t.Fatal(err) + } + out1 := string(buf) + if out1 != "1" { + t.Fatalf("expected first pipe to receive '1', got '%s'", out1) + } + + _, err = pipeout2.Read(buf) + if err != nil { + t.Fatal(err) + } + out2 := string(buf) + if out2 != "2" { + t.Fatalf("expected second pipe to receive '2', got '%s'", out2) + } +} + +func TestMountCmds(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + tmpDir, err := ioutil.TempDir("", "tmpdir") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + + config := newTemplateConfig(rootfs) + config.Mounts = append(config.Mounts, &configs.Mount{ + Source: tmpDir, + Destination: "/tmp", + Device: "bind", + Flags: unix.MS_BIND | unix.MS_REC, + PremountCmds: []configs.Command{ + {Path: "touch", Args: []string{filepath.Join(tmpDir, "hello")}}, + {Path: "touch", Args: []string{filepath.Join(tmpDir, "world")}}, + }, + PostmountCmds: []configs.Command{ + {Path: "cp", Args: []string{filepath.Join(rootfs, "tmp", "hello"), filepath.Join(rootfs, "tmp", "hello-backup")}}, + {Path: "cp", Args: []string{filepath.Join(rootfs, "tmp", "world"), filepath.Join(rootfs, "tmp", "world-backup")}}, + }, + }) + + container, err := newContainerWithName("test", config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + pconfig := libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "env"}, + Env: standardEnvironment, + Init: true, + } + err = container.Run(&pconfig) + if err != nil { + t.Fatal(err) + } + + // Wait for process + waitProcess(&pconfig, t) + + entries, err := ioutil.ReadDir(tmpDir) + if err != nil { + t.Fatal(err) + } + expected := []string{"hello", "hello-backup", "world", "world-backup"} + for i, e := range entries { + if e.Name() != expected[i] { + t.Errorf("Got(%s), expect %s", e.Name(), expected[i]) + } + } +} + +func TestSysctl(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.Sysctl = map[string]string{ + "kernel.shmmni": "8192", + } + + container, err := newContainerWithName("test", config) + ok(t, err) + defer container.Destroy() + + var stdout bytes.Buffer + pconfig := libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "cat /proc/sys/kernel/shmmni"}, + Env: standardEnvironment, + Stdin: nil, + Stdout: &stdout, + Init: true, + } + err = container.Run(&pconfig) + ok(t, err) + + // Wait for process + waitProcess(&pconfig, t) + + shmmniOutput := strings.TrimSpace(string(stdout.Bytes())) + if shmmniOutput != "8192" { + t.Fatalf("kernel.shmmni property expected to be 8192, but is %s", shmmniOutput) + } +} + +func TestMountCgroupRO(t *testing.T) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + + config.Mounts = append(config.Mounts, &configs.Mount{ + Destination: "/sys/fs/cgroup", + Device: "cgroup", + Flags: defaultMountFlags | unix.MS_RDONLY, + }) + + buffers, exitCode, err := runContainer(config, "", "mount") + if err != nil { + t.Fatalf("%s: %s", buffers, err) + } + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + mountInfo := buffers.Stdout.String() + lines := strings.Split(mountInfo, "\n") + for _, l := range lines { + if strings.HasPrefix(l, "tmpfs on /sys/fs/cgroup") { + if !strings.Contains(l, "ro") || + !strings.Contains(l, "nosuid") || + !strings.Contains(l, "nodev") || + !strings.Contains(l, "noexec") { + t.Fatalf("Mode expected to contain 'ro,nosuid,nodev,noexec': %s", l) + } + if !strings.Contains(l, "mode=755") { + t.Fatalf("Mode expected to contain 'mode=755': %s", l) + } + continue + } + if !strings.HasPrefix(l, "cgroup") { + continue + } + if !strings.Contains(l, "ro") || + !strings.Contains(l, "nosuid") || + !strings.Contains(l, "nodev") || + !strings.Contains(l, "noexec") { + t.Fatalf("Mode expected to contain 'ro,nosuid,nodev,noexec': %s", l) + } + } +} + +func TestMountCgroupRW(t *testing.T) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + + config.Mounts = append(config.Mounts, &configs.Mount{ + Destination: "/sys/fs/cgroup", + Device: "cgroup", + Flags: defaultMountFlags, + }) + + buffers, exitCode, err := runContainer(config, "", "mount") + if err != nil { + t.Fatalf("%s: %s", buffers, err) + } + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + mountInfo := buffers.Stdout.String() + lines := strings.Split(mountInfo, "\n") + for _, l := range lines { + if strings.HasPrefix(l, "tmpfs on /sys/fs/cgroup") { + if !strings.Contains(l, "rw") || + !strings.Contains(l, "nosuid") || + !strings.Contains(l, "nodev") || + !strings.Contains(l, "noexec") { + t.Fatalf("Mode expected to contain 'rw,nosuid,nodev,noexec': %s", l) + } + if !strings.Contains(l, "mode=755") { + t.Fatalf("Mode expected to contain 'mode=755': %s", l) + } + continue + } + if !strings.HasPrefix(l, "cgroup") { + continue + } + if !strings.Contains(l, "rw") || + !strings.Contains(l, "nosuid") || + !strings.Contains(l, "nodev") || + !strings.Contains(l, "noexec") { + t.Fatalf("Mode expected to contain 'rw,nosuid,nodev,noexec': %s", l) + } + } +} + +func TestOomScoreAdj(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.OomScoreAdj = ptrInt(200) + + container, err := newContainerWithName("test", config) + ok(t, err) + defer container.Destroy() + + var stdout bytes.Buffer + pconfig := libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "cat /proc/self/oom_score_adj"}, + Env: standardEnvironment, + Stdin: nil, + Stdout: &stdout, + Init: true, + } + err = container.Run(&pconfig) + ok(t, err) + + // Wait for process + waitProcess(&pconfig, t) + outputOomScoreAdj := strings.TrimSpace(string(stdout.Bytes())) + + // Check that the oom_score_adj matches the value that was set as part of config. + if outputOomScoreAdj != strconv.Itoa(*config.OomScoreAdj) { + t.Fatalf("Expected oom_score_adj %d; got %q", *config.OomScoreAdj, outputOomScoreAdj) + } +} + +func TestHook(t *testing.T) { + if testing.Short() { + return + } + + bundle, err := newTestBundle() + ok(t, err) + defer remove(bundle) + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + expectedBundle := bundle + config.Labels = append(config.Labels, fmt.Sprintf("bundle=%s", expectedBundle)) + + getRootfsFromBundle := func(bundle string) (string, error) { + f, err := os.Open(filepath.Join(bundle, "config.json")) + if err != nil { + return "", err + } + + var config configs.Config + if err = json.NewDecoder(f).Decode(&config); err != nil { + return "", err + } + return config.Rootfs, nil + } + + config.Hooks = &configs.Hooks{ + Prestart: []configs.Hook{ + configs.NewFunctionHook(func(s *specs.State) error { + if s.Bundle != expectedBundle { + t.Fatalf("Expected prestart hook bundlePath '%s'; got '%s'", expectedBundle, s.Bundle) + } + + root, err := getRootfsFromBundle(s.Bundle) + if err != nil { + return err + } + f, err := os.Create(filepath.Join(root, "test")) + if err != nil { + return err + } + return f.Close() + }), + }, + Poststart: []configs.Hook{ + configs.NewFunctionHook(func(s *specs.State) error { + if s.Bundle != expectedBundle { + t.Fatalf("Expected poststart hook bundlePath '%s'; got '%s'", expectedBundle, s.Bundle) + } + + root, err := getRootfsFromBundle(s.Bundle) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(root, "test"), []byte("hello world"), 0755) + }), + }, + Poststop: []configs.Hook{ + configs.NewFunctionHook(func(s *specs.State) error { + if s.Bundle != expectedBundle { + t.Fatalf("Expected poststop hook bundlePath '%s'; got '%s'", expectedBundle, s.Bundle) + } + + root, err := getRootfsFromBundle(s.Bundle) + if err != nil { + return err + } + return os.RemoveAll(filepath.Join(root, "test")) + }), + }, + } + + // write config of json format into config.json under bundle + f, err := os.OpenFile(filepath.Join(bundle, "config.json"), os.O_CREATE|os.O_RDWR, 0644) + ok(t, err) + ok(t, json.NewEncoder(f).Encode(config)) + + container, err := newContainerWithName("test", config) + ok(t, err) + + var stdout bytes.Buffer + pconfig := libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "ls /test"}, + Env: standardEnvironment, + Stdin: nil, + Stdout: &stdout, + Init: true, + } + err = container.Run(&pconfig) + ok(t, err) + + // Wait for process + waitProcess(&pconfig, t) + + outputLs := string(stdout.Bytes()) + + // Check that the ls output has the expected file touched by the prestart hook + if !strings.Contains(outputLs, "/test") { + container.Destroy() + t.Fatalf("ls output doesn't have the expected file: %s", outputLs) + } + + // Check that the file is written by the poststart hook + testFilePath := filepath.Join(rootfs, "test") + contents, err := ioutil.ReadFile(testFilePath) + if err != nil { + t.Fatalf("cannot read file '%s': %s", testFilePath, err) + } + if string(contents) != "hello world" { + t.Fatalf("Expected test file to contain 'hello world'; got '%s'", string(contents)) + } + + if err := container.Destroy(); err != nil { + t.Fatalf("container destroy %s", err) + } + fi, err := os.Stat(filepath.Join(rootfs, "test")) + if err == nil || !os.IsNotExist(err) { + t.Fatalf("expected file to not exist, got %s", fi.Name()) + } +} + +func TestSTDIOPermissions(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + buffers, exitCode, err := runContainer(config, "", "sh", "-c", "echo hi > /dev/stderr") + ok(t, err) + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stderr.String(), "\n"); actual != "hi" { + t.Fatalf("stderr should equal be equal %q %q", actual, "hi") + } +} + +func unmountOp(path string) error { + return unix.Unmount(path, unix.MNT_DETACH) +} + +// Launch container with rootfsPropagation in rslave mode. Also +// bind mount a volume /mnt1host at /mnt1cont at the time of launch. Now do +// another mount on host (/mnt1host/mnt2host) and this new mount should +// propagate to container (/mnt1cont/mnt2host) +func TestRootfsPropagationSlaveMount(t *testing.T) { + var mountPropagated bool + var dir1cont string + var dir2cont string + + dir1cont = "/root/mnt1cont" + + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + + config.RootPropagation = unix.MS_SLAVE | unix.MS_REC + + // Bind mount a volume + dir1host, err := ioutil.TempDir("", "mnt1host") + ok(t, err) + defer os.RemoveAll(dir1host) + + // Make this dir a "shared" mount point. This will make sure a + // slave relationship can be established in container. + err = unix.Mount(dir1host, dir1host, "bind", unix.MS_BIND|unix.MS_REC, "") + ok(t, err) + err = unix.Mount("", dir1host, "", unix.MS_SHARED|unix.MS_REC, "") + ok(t, err) + defer unmountOp(dir1host) + + config.Mounts = append(config.Mounts, &configs.Mount{ + Source: dir1host, + Destination: dir1cont, + Device: "bind", + Flags: unix.MS_BIND | unix.MS_REC}) + + container, err := newContainerWithName("testSlaveMount", config) + ok(t, err) + defer container.Destroy() + + stdinR, stdinW, err := os.Pipe() + ok(t, err) + + pconfig := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + + err = container.Run(pconfig) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + // Create mnt1host/mnt2host and bind mount itself on top of it. This + // should be visible in container. + dir2host, err := ioutil.TempDir(dir1host, "mnt2host") + ok(t, err) + defer os.RemoveAll(dir2host) + + err = unix.Mount(dir2host, dir2host, "bind", unix.MS_BIND, "") + defer unmountOp(dir2host) + ok(t, err) + + // Run "cat /proc/self/mountinfo" in container and look at mount points. + var stdout2 bytes.Buffer + + stdinR2, stdinW2, err := os.Pipe() + ok(t, err) + + pconfig2 := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat", "/proc/self/mountinfo"}, + Env: standardEnvironment, + Stdin: stdinR2, + Stdout: &stdout2, + } + + err = container.Run(pconfig2) + stdinR2.Close() + defer stdinW2.Close() + ok(t, err) + + stdinW2.Close() + waitProcess(pconfig2, t) + stdinW.Close() + waitProcess(pconfig, t) + + mountPropagated = false + dir2cont = filepath.Join(dir1cont, filepath.Base(dir2host)) + + propagationInfo := string(stdout2.Bytes()) + lines := strings.Split(propagationInfo, "\n") + for _, l := range lines { + linefields := strings.Split(l, " ") + if len(linefields) < 5 { + continue + } + + if linefields[4] == dir2cont { + mountPropagated = true + break + } + } + + if mountPropagated != true { + t.Fatalf("Mount on host %s did not propagate in container at %s\n", dir2host, dir2cont) + } +} + +// Launch container with rootfsPropagation 0 so no propagation flags are +// applied. Also bind mount a volume /mnt1host at /mnt1cont at the time of +// launch. Now do a mount in container (/mnt1cont/mnt2cont) and this new +// mount should propagate to host (/mnt1host/mnt2cont) + +func TestRootfsPropagationSharedMount(t *testing.T) { + var dir1cont string + var dir2cont string + + dir1cont = "/root/mnt1cont" + + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + config.RootPropagation = unix.MS_PRIVATE + + // Bind mount a volume + dir1host, err := ioutil.TempDir("", "mnt1host") + ok(t, err) + defer os.RemoveAll(dir1host) + + // Make this dir a "shared" mount point. This will make sure a + // shared relationship can be established in container. + err = unix.Mount(dir1host, dir1host, "bind", unix.MS_BIND|unix.MS_REC, "") + ok(t, err) + err = unix.Mount("", dir1host, "", unix.MS_SHARED|unix.MS_REC, "") + ok(t, err) + defer unmountOp(dir1host) + + config.Mounts = append(config.Mounts, &configs.Mount{ + Source: dir1host, + Destination: dir1cont, + Device: "bind", + Flags: unix.MS_BIND | unix.MS_REC}) + + container, err := newContainerWithName("testSharedMount", config) + ok(t, err) + defer container.Destroy() + + stdinR, stdinW, err := os.Pipe() + ok(t, err) + + pconfig := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + + err = container.Run(pconfig) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + // Create mnt1host/mnt2cont. This will become visible inside container + // at mnt1cont/mnt2cont. Bind mount itself on top of it. This + // should be visible on host now. + dir2host, err := ioutil.TempDir(dir1host, "mnt2cont") + ok(t, err) + defer os.RemoveAll(dir2host) + + dir2cont = filepath.Join(dir1cont, filepath.Base(dir2host)) + + // Mount something in container and see if it is visible on host. + var stdout2 bytes.Buffer + + stdinR2, stdinW2, err := os.Pipe() + ok(t, err) + + pconfig2 := &libcontainer.Process{ + Cwd: "/", + Args: []string{"mount", "--bind", dir2cont, dir2cont}, + Env: standardEnvironment, + Stdin: stdinR2, + Stdout: &stdout2, + Capabilities: &configs.Capabilities{}, + } + + // Provide CAP_SYS_ADMIN + pconfig2.Capabilities.Bounding = append(config.Capabilities.Bounding, "CAP_SYS_ADMIN") + pconfig2.Capabilities.Permitted = append(config.Capabilities.Permitted, "CAP_SYS_ADMIN") + pconfig2.Capabilities.Effective = append(config.Capabilities.Effective, "CAP_SYS_ADMIN") + pconfig2.Capabilities.Inheritable = append(config.Capabilities.Inheritable, "CAP_SYS_ADMIN") + + err = container.Run(pconfig2) + stdinR2.Close() + defer stdinW2.Close() + ok(t, err) + + // Wait for process + stdinW2.Close() + waitProcess(pconfig2, t) + stdinW.Close() + waitProcess(pconfig, t) + + defer unmountOp(dir2host) + + // Check if mount is visible on host or not. + out, err := exec.Command("findmnt", "-n", "-f", "-oTARGET", dir2host).CombinedOutput() + outtrim := strings.TrimSpace(string(out)) + if err != nil { + t.Logf("findmnt error %q: %q", err, outtrim) + } + + if string(outtrim) != dir2host { + t.Fatalf("Mount in container on %s did not propagate to host on %s. finmnt output=%s", dir2cont, dir2host, outtrim) + } +} + +func TestPIDHost(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/pid") + ok(t, err) + + config := newTemplateConfig(rootfs) + config.Namespaces.Remove(configs.NEWPID) + buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/pid") + ok(t, err) + + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { + t.Fatalf("ipc link not equal to host link %q %q", actual, l) + } +} + +func TestInitJoinPID(t *testing.T) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + // Execute a long-running container + container1, err := newContainer(newTemplateConfig(rootfs)) + ok(t, err) + defer container1.Destroy() + + stdinR1, stdinW1, err := os.Pipe() + ok(t, err) + init1 := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR1, + Init: true, + } + err = container1.Run(init1) + stdinR1.Close() + defer stdinW1.Close() + ok(t, err) + + // get the state of the first container + state1, err := container1.State() + ok(t, err) + pidns1 := state1.NamespacePaths[configs.NEWPID] + + // Run a container inside the existing pidns but with different cgroups + config2 := newTemplateConfig(rootfs) + config2.Namespaces.Add(configs.NEWPID, pidns1) + config2.Cgroups.Path = "integration/test2" + container2, err := newContainerWithName("testCT2", config2) + ok(t, err) + defer container2.Destroy() + + stdinR2, stdinW2, err := os.Pipe() + ok(t, err) + init2 := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR2, + Init: true, + } + err = container2.Run(init2) + stdinR2.Close() + defer stdinW2.Close() + ok(t, err) + // get the state of the second container + state2, err := container2.State() + ok(t, err) + + ns1, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/pid", state1.InitProcessPid)) + ok(t, err) + ns2, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/pid", state2.InitProcessPid)) + ok(t, err) + if ns1 != ns2 { + t.Errorf("pidns(%s), wanted %s", ns2, ns1) + } + + // check that namespaces are not the same + if reflect.DeepEqual(state2.NamespacePaths, state1.NamespacePaths) { + t.Errorf("Namespaces(%v), original %v", state2.NamespacePaths, + state1.NamespacePaths) + } + // check that pidns is joined correctly. The initial container process list + // should contain the second container's init process + buffers := newStdBuffers() + ps := &libcontainer.Process{ + Cwd: "/", + Args: []string{"ps"}, + Env: standardEnvironment, + Stdout: buffers.Stdout, + } + err = container1.Run(ps) + ok(t, err) + waitProcess(ps, t) + + // Stop init processes one by one. Stop the second container should + // not stop the first. + stdinW2.Close() + waitProcess(init2, t) + stdinW1.Close() + waitProcess(init1, t) + + out := strings.TrimSpace(buffers.Stdout.String()) + // output of ps inside the initial PID namespace should have + // 1 line of header, + // 2 lines of init processes, + // 1 line of ps process + if len(strings.Split(out, "\n")) != 4 { + t.Errorf("unexpected running process, output %q", out) + } +} + +func TestInitJoinNetworkAndUser(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + // Execute a long-running container + config1 := newTemplateConfig(rootfs) + config1.UidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config1.GidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config1.Namespaces = append(config1.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + container1, err := newContainer(config1) + ok(t, err) + defer container1.Destroy() + + stdinR1, stdinW1, err := os.Pipe() + ok(t, err) + init1 := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR1, + Init: true, + } + err = container1.Run(init1) + stdinR1.Close() + defer stdinW1.Close() + ok(t, err) + + // get the state of the first container + state1, err := container1.State() + ok(t, err) + netns1 := state1.NamespacePaths[configs.NEWNET] + userns1 := state1.NamespacePaths[configs.NEWUSER] + + // Run a container inside the existing pidns but with different cgroups + rootfs2, err := newRootfs() + ok(t, err) + defer remove(rootfs2) + + config2 := newTemplateConfig(rootfs2) + config2.UidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config2.GidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config2.Namespaces.Add(configs.NEWNET, netns1) + config2.Namespaces.Add(configs.NEWUSER, userns1) + config2.Cgroups.Path = "integration/test2" + container2, err := newContainerWithName("testCT2", config2) + ok(t, err) + defer container2.Destroy() + + stdinR2, stdinW2, err := os.Pipe() + ok(t, err) + init2 := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR2, + Init: true, + } + err = container2.Run(init2) + stdinR2.Close() + defer stdinW2.Close() + ok(t, err) + + // get the state of the second container + state2, err := container2.State() + ok(t, err) + + for _, ns := range []string{"net", "user"} { + ns1, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/%s", state1.InitProcessPid, ns)) + ok(t, err) + ns2, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/%s", state2.InitProcessPid, ns)) + ok(t, err) + if ns1 != ns2 { + t.Errorf("%s(%s), wanted %s", ns, ns2, ns1) + } + } + + // check that namespaces are not the same + if reflect.DeepEqual(state2.NamespacePaths, state1.NamespacePaths) { + t.Errorf("Namespaces(%v), original %v", state2.NamespacePaths, + state1.NamespacePaths) + } + // Stop init processes one by one. Stop the second container should + // not stop the first. + stdinW2.Close() + waitProcess(init2, t) + stdinW1.Close() + waitProcess(init1, t) +} + +func TestTmpfsCopyUp(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + + config.Mounts = append(config.Mounts, &configs.Mount{ + Source: "tmpfs", + Destination: "/etc", + Device: "tmpfs", + Extensions: configs.EXT_COPYUP, + }) + + container, err := newContainerWithName("test", config) + ok(t, err) + defer container.Destroy() + + var stdout bytes.Buffer + pconfig := libcontainer.Process{ + Args: []string{"ls", "/etc/passwd"}, + Env: standardEnvironment, + Stdin: nil, + Stdout: &stdout, + Init: true, + } + err = container.Run(&pconfig) + ok(t, err) + + // Wait for process + waitProcess(&pconfig, t) + + outputLs := string(stdout.Bytes()) + + // Check that the ls output has /etc/passwd + if !strings.Contains(outputLs, "/etc/passwd") { + t.Fatalf("/etc/passwd not copied up as expected: %v", outputLs) + } +} + +func TestCGROUPPrivate(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) { + t.Skip("cgroupns is unsupported") + } + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/cgroup") + ok(t, err) + + config := newTemplateConfig(rootfs) + config.Namespaces.Add(configs.NEWCGROUP, "") + buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/cgroup") + ok(t, err) + + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l { + t.Fatalf("cgroup link should be private to the container but equals host %q %q", actual, l) + } +} + +func TestCGROUPHost(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) { + t.Skip("cgroupns is unsupported") + } + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/cgroup") + ok(t, err) + + config := newTemplateConfig(rootfs) + buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/cgroup") + ok(t, err) + + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { + t.Fatalf("cgroup link not equal to host link %q %q", actual, l) + } +} diff --git a/libcontainer/integration/execin_test.go b/libcontainer/integration/execin_test.go new file mode 100644 index 0000000..14f8a59 --- /dev/null +++ b/libcontainer/integration/execin_test.go @@ -0,0 +1,608 @@ +package integration + +import ( + "bytes" + "fmt" + "io" + "os" + "strconv" + "strings" + "testing" + "time" + + "github.com/containerd/console" + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/utils" + + "golang.org/x/sys/unix" +) + +func TestExecIn(t *testing.T) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + container, err := newContainer(config) + ok(t, err) + defer container.Destroy() + + // Execute a first process in the container + stdinR, stdinW, err := os.Pipe() + ok(t, err) + process := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + err = container.Run(process) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + buffers := newStdBuffers() + ps := &libcontainer.Process{ + Cwd: "/", + Args: []string{"ps"}, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + } + + err = container.Run(ps) + ok(t, err) + waitProcess(ps, t) + stdinW.Close() + waitProcess(process, t) + + out := buffers.Stdout.String() + if !strings.Contains(out, "cat") || !strings.Contains(out, "ps") { + t.Fatalf("unexpected running process, output %q", out) + } + if strings.Contains(out, "\r") { + t.Fatalf("unexpected carriage-return in output") + } +} + +func TestExecInUsernsRlimit(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + + testExecInRlimit(t, true) +} + +func TestExecInRlimit(t *testing.T) { + testExecInRlimit(t, false) +} + +func testExecInRlimit(t *testing.T, userns bool) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + if userns { + config.UidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config.GidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + } + + container, err := newContainer(config) + ok(t, err) + defer container.Destroy() + + stdinR, stdinW, err := os.Pipe() + ok(t, err) + process := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + err = container.Run(process) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + buffers := newStdBuffers() + ps := &libcontainer.Process{ + Cwd: "/", + Args: []string{"/bin/sh", "-c", "ulimit -n"}, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + Rlimits: []configs.Rlimit{ + // increase process rlimit higher than container rlimit to test per-process limit + {Type: unix.RLIMIT_NOFILE, Hard: 1026, Soft: 1026}, + }, + Init: true, + } + err = container.Run(ps) + ok(t, err) + waitProcess(ps, t) + + stdinW.Close() + waitProcess(process, t) + + out := buffers.Stdout.String() + if limit := strings.TrimSpace(out); limit != "1026" { + t.Fatalf("expected rlimit to be 1026, got %s", limit) + } +} + +func TestExecInAdditionalGroups(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + container, err := newContainer(config) + ok(t, err) + defer container.Destroy() + + // Execute a first process in the container + stdinR, stdinW, err := os.Pipe() + ok(t, err) + process := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + err = container.Run(process) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + var stdout bytes.Buffer + pconfig := libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "id", "-Gn"}, + Env: standardEnvironment, + Stdin: nil, + Stdout: &stdout, + AdditionalGroups: []string{"plugdev", "audio"}, + } + err = container.Run(&pconfig) + ok(t, err) + + // Wait for process + waitProcess(&pconfig, t) + + stdinW.Close() + waitProcess(process, t) + + outputGroups := string(stdout.Bytes()) + + // Check that the groups output has the groups that we specified + if !strings.Contains(outputGroups, "audio") { + t.Fatalf("Listed groups do not contain the audio group as expected: %v", outputGroups) + } + + if !strings.Contains(outputGroups, "plugdev") { + t.Fatalf("Listed groups do not contain the plugdev group as expected: %v", outputGroups) + } +} + +func TestExecInError(t *testing.T) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + container, err := newContainer(config) + ok(t, err) + defer container.Destroy() + + // Execute a first process in the container + stdinR, stdinW, err := os.Pipe() + ok(t, err) + process := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + err = container.Run(process) + stdinR.Close() + defer func() { + stdinW.Close() + if _, err := process.Wait(); err != nil { + t.Log(err) + } + }() + ok(t, err) + + for i := 0; i < 42; i++ { + var out bytes.Buffer + unexistent := &libcontainer.Process{ + Cwd: "/", + Args: []string{"unexistent"}, + Env: standardEnvironment, + Stderr: &out, + } + err = container.Run(unexistent) + if err == nil { + t.Fatal("Should be an error") + } + if !strings.Contains(err.Error(), "executable file not found") { + t.Fatalf("Should be error about not found executable, got %s", err) + } + if !bytes.Contains(out.Bytes(), []byte("executable file not found")) { + t.Fatalf("executable file not found error not delivered to stdio:\n%s", out.String()) + } + } +} + +func TestExecInTTY(t *testing.T) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + container, err := newContainer(config) + ok(t, err) + defer container.Destroy() + + // Execute a first process in the container + stdinR, stdinW, err := os.Pipe() + ok(t, err) + process := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + err = container.Run(process) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + var stdout bytes.Buffer + ps := &libcontainer.Process{ + Cwd: "/", + Args: []string{"ps"}, + Env: standardEnvironment, + } + parent, child, err := utils.NewSockPair("console") + if err != nil { + ok(t, err) + } + defer parent.Close() + defer child.Close() + ps.ConsoleSocket = child + type cdata struct { + c console.Console + err error + } + dc := make(chan *cdata, 1) + go func() { + f, err := utils.RecvFd(parent) + if err != nil { + dc <- &cdata{ + err: err, + } + return + } + c, err := console.ConsoleFromFile(f) + if err != nil { + dc <- &cdata{ + err: err, + } + return + } + console.ClearONLCR(c.Fd()) + dc <- &cdata{ + c: c, + } + }() + err = container.Run(ps) + ok(t, err) + data := <-dc + if data.err != nil { + ok(t, data.err) + } + console := data.c + copy := make(chan struct{}) + go func() { + io.Copy(&stdout, console) + close(copy) + }() + ok(t, err) + select { + case <-time.After(5 * time.Second): + t.Fatal("Waiting for copy timed out") + case <-copy: + } + waitProcess(ps, t) + + stdinW.Close() + waitProcess(process, t) + + out := stdout.String() + if !strings.Contains(out, "cat") || !strings.Contains(out, "ps") { + t.Fatalf("unexpected running process, output %q", out) + } + if strings.Contains(out, "\r") { + t.Fatalf("unexpected carriage-return in output") + } +} + +func TestExecInEnvironment(t *testing.T) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + container, err := newContainer(config) + ok(t, err) + defer container.Destroy() + + // Execute a first process in the container + stdinR, stdinW, err := os.Pipe() + ok(t, err) + process := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + err = container.Run(process) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + buffers := newStdBuffers() + process2 := &libcontainer.Process{ + Cwd: "/", + Args: []string{"env"}, + Env: []string{ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "DEBUG=true", + "DEBUG=false", + "ENV=test", + }, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + Init: true, + } + err = container.Run(process2) + ok(t, err) + waitProcess(process2, t) + + stdinW.Close() + waitProcess(process, t) + + out := buffers.Stdout.String() + // check execin's process environment + if !strings.Contains(out, "DEBUG=false") || + !strings.Contains(out, "ENV=test") || + !strings.Contains(out, "HOME=/root") || + !strings.Contains(out, "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin") || + strings.Contains(out, "DEBUG=true") { + t.Fatalf("unexpected running process, output %q", out) + } +} + +func TestExecinPassExtraFiles(t *testing.T) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + config := newTemplateConfig(rootfs) + container, err := newContainer(config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + // Execute a first process in the container + stdinR, stdinW, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + process := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + err = container.Run(process) + stdinR.Close() + defer stdinW.Close() + if err != nil { + t.Fatal(err) + } + + var stdout bytes.Buffer + pipeout1, pipein1, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + pipeout2, pipein2, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + inprocess := &libcontainer.Process{ + Cwd: "/", + Args: []string{"sh", "-c", "cd /proc/$$/fd; echo -n *; echo -n 1 >3; echo -n 2 >4"}, + Env: []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"}, + ExtraFiles: []*os.File{pipein1, pipein2}, + Stdin: nil, + Stdout: &stdout, + } + err = container.Run(inprocess) + if err != nil { + t.Fatal(err) + } + + waitProcess(inprocess, t) + stdinW.Close() + waitProcess(process, t) + + out := string(stdout.Bytes()) + // fd 5 is the directory handle for /proc/$$/fd + if out != "0 1 2 3 4 5" { + t.Fatalf("expected to have the file descriptors '0 1 2 3 4 5' passed to exec, got '%s'", out) + } + var buf = []byte{0} + _, err = pipeout1.Read(buf) + if err != nil { + t.Fatal(err) + } + out1 := string(buf) + if out1 != "1" { + t.Fatalf("expected first pipe to receive '1', got '%s'", out1) + } + + _, err = pipeout2.Read(buf) + if err != nil { + t.Fatal(err) + } + out2 := string(buf) + if out2 != "2" { + t.Fatalf("expected second pipe to receive '2', got '%s'", out2) + } +} + +func TestExecInOomScoreAdj(t *testing.T) { + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + config.OomScoreAdj = ptrInt(200) + container, err := newContainer(config) + ok(t, err) + defer container.Destroy() + + stdinR, stdinW, err := os.Pipe() + ok(t, err) + process := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + err = container.Run(process) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + buffers := newStdBuffers() + ps := &libcontainer.Process{ + Cwd: "/", + Args: []string{"/bin/sh", "-c", "cat /proc/self/oom_score_adj"}, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + } + err = container.Run(ps) + ok(t, err) + waitProcess(ps, t) + + stdinW.Close() + waitProcess(process, t) + + out := buffers.Stdout.String() + if oomScoreAdj := strings.TrimSpace(out); oomScoreAdj != strconv.Itoa(*config.OomScoreAdj) { + t.Fatalf("expected oomScoreAdj to be %d, got %s", *config.OomScoreAdj, oomScoreAdj) + } +} + +func TestExecInUserns(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + if testing.Short() { + return + } + rootfs, err := newRootfs() + ok(t, err) + defer remove(rootfs) + config := newTemplateConfig(rootfs) + config.UidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config.GidMappings = []configs.IDMap{{HostID: 0, ContainerID: 0, Size: 1000}} + config.Namespaces = append(config.Namespaces, configs.Namespace{Type: configs.NEWUSER}) + container, err := newContainer(config) + ok(t, err) + defer container.Destroy() + + // Execute a first process in the container + stdinR, stdinW, err := os.Pipe() + ok(t, err) + + process := &libcontainer.Process{ + Cwd: "/", + Args: []string{"cat"}, + Env: standardEnvironment, + Stdin: stdinR, + Init: true, + } + err = container.Run(process) + stdinR.Close() + defer stdinW.Close() + ok(t, err) + + initPID, err := process.Pid() + ok(t, err) + initUserns, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/user", initPID)) + ok(t, err) + + buffers := newStdBuffers() + process2 := &libcontainer.Process{ + Cwd: "/", + Args: []string{"readlink", "/proc/self/ns/user"}, + Env: []string{ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + }, + Stdout: buffers.Stdout, + Stderr: os.Stderr, + } + err = container.Run(process2) + ok(t, err) + waitProcess(process2, t) + stdinW.Close() + waitProcess(process, t) + + if out := strings.TrimSpace(buffers.Stdout.String()); out != initUserns { + t.Errorf("execin userns(%s), wanted %s", out, initUserns) + } +} diff --git a/libcontainer/integration/init_test.go b/libcontainer/integration/init_test.go new file mode 100644 index 0000000..f5180ea --- /dev/null +++ b/libcontainer/integration/init_test.go @@ -0,0 +1,46 @@ +package integration + +import ( + "os" + "runtime" + "testing" + + "github.com/opencontainers/runc/libcontainer" + _ "github.com/opencontainers/runc/libcontainer/nsenter" + + "github.com/sirupsen/logrus" +) + +// init runs the libcontainer initialization code because of the busybox style needs +// to work around the go runtime and the issues with forking +func init() { + if len(os.Args) < 2 || os.Args[1] != "init" { + return + } + runtime.GOMAXPROCS(1) + runtime.LockOSThread() + factory, err := libcontainer.New("") + if err != nil { + logrus.Fatalf("unable to initialize for container: %s", err) + } + if err := factory.StartInitialization(); err != nil { + logrus.Fatal(err) + } +} + +var testRoots []string + +func TestMain(m *testing.M) { + logrus.SetOutput(os.Stderr) + logrus.SetLevel(logrus.InfoLevel) + + // Clean up roots after running everything. + defer func() { + for _, root := range testRoots { + os.RemoveAll(root) + } + }() + + ret := m.Run() + os.Exit(ret) +} diff --git a/libcontainer/integration/seccomp_test.go b/libcontainer/integration/seccomp_test.go new file mode 100644 index 0000000..77f1a8d --- /dev/null +++ b/libcontainer/integration/seccomp_test.go @@ -0,0 +1,422 @@ +// +build linux,cgo,seccomp + +package integration + +import ( + "strings" + "syscall" + "testing" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/configs" + libseccomp "github.com/seccomp/libseccomp-golang" +) + +func TestSeccompDenyGetcwd(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.Seccomp = &configs.Seccomp{ + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { + Name: "getcwd", + Action: configs.Errno, + }, + }, + } + + container, err := newContainer(config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + buffers := newStdBuffers() + pwd := &libcontainer.Process{ + Cwd: "/", + Args: []string{"pwd"}, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + Init: true, + } + + err = container.Run(pwd) + if err != nil { + t.Fatal(err) + } + ps, err := pwd.Wait() + if err == nil { + t.Fatal("Expecting error (negative return code); instead exited cleanly!") + } + + var exitCode int + status := ps.Sys().(syscall.WaitStatus) + if status.Exited() { + exitCode = status.ExitStatus() + } else if status.Signaled() { + exitCode = -int(status.Signal()) + } else { + t.Fatalf("Unrecognized exit reason!") + } + + if exitCode == 0 { + t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode) + } + + expected := "pwd: getcwd: Operation not permitted" + actual := strings.Trim(buffers.Stderr.String(), "\n") + if actual != expected { + t.Fatalf("Expected output %s but got %s\n", expected, actual) + } +} + +func TestSeccompPermitWriteConditional(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.Seccomp = &configs.Seccomp{ + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { + Name: "write", + Action: configs.Errno, + Args: []*configs.Arg{ + { + Index: 0, + Value: 2, + Op: configs.EqualTo, + }, + }, + }, + }, + } + + container, err := newContainer(config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + buffers := newStdBuffers() + dmesg := &libcontainer.Process{ + Cwd: "/", + Args: []string{"busybox", "ls", "/"}, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + Init: true, + } + + err = container.Run(dmesg) + if err != nil { + t.Fatal(err) + } + if _, err := dmesg.Wait(); err != nil { + t.Fatalf("%s: %s", err, buffers.Stderr) + } +} + +func TestSeccompDenyWriteConditional(t *testing.T) { + if testing.Short() { + return + } + + // Only test if library version is v2.2.1 or higher + // Conditional filtering will always error in v2.2.0 and lower + major, minor, micro := libseccomp.GetLibraryVersion() + if (major == 2 && minor < 2) || (major == 2 && minor == 2 && micro < 1) { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.Seccomp = &configs.Seccomp{ + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { + Name: "write", + Action: configs.Errno, + Args: []*configs.Arg{ + { + Index: 0, + Value: 2, + Op: configs.EqualTo, + }, + }, + }, + }, + } + + container, err := newContainer(config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + buffers := newStdBuffers() + dmesg := &libcontainer.Process{ + Cwd: "/", + Args: []string{"busybox", "ls", "does_not_exist"}, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + Init: true, + } + + err = container.Run(dmesg) + if err != nil { + t.Fatal(err) + } + + ps, err := dmesg.Wait() + if err == nil { + t.Fatal("Expecting negative return, instead got 0!") + } + + var exitCode int + status := ps.Sys().(syscall.WaitStatus) + if status.Exited() { + exitCode = status.ExitStatus() + } else if status.Signaled() { + exitCode = -int(status.Signal()) + } else { + t.Fatalf("Unrecognized exit reason!") + } + + if exitCode == 0 { + t.Fatalf("Busybox should fail with negative exit code, instead got %d!", exitCode) + } + + // We're denying write to stderr, so we expect an empty buffer + expected := "" + actual := strings.Trim(buffers.Stderr.String(), "\n") + if actual != expected { + t.Fatalf("Expected output %s but got %s\n", expected, actual) + } +} + +func TestSeccompPermitWriteMultipleConditions(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.Seccomp = &configs.Seccomp{ + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { + Name: "write", + Action: configs.Errno, + Args: []*configs.Arg{ + { + Index: 0, + Value: 2, + Op: configs.EqualTo, + }, + { + Index: 2, + Value: 0, + Op: configs.NotEqualTo, + }, + }, + }, + }, + } + + buffers, exitCode, err := runContainer(config, "", "ls", "/") + if err != nil { + t.Fatalf("%s: %s", buffers, err) + } + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d buffers %s", exitCode, buffers) + } + // We don't need to verify the actual thing printed + // Just that something was written to stdout + if len(buffers.Stdout.String()) == 0 { + t.Fatalf("Nothing was written to stdout, write call failed!\n") + } +} + +func TestSeccompDenyWriteMultipleConditions(t *testing.T) { + if testing.Short() { + return + } + + // Only test if library version is v2.2.1 or higher + // Conditional filtering will always error in v2.2.0 and lower + major, minor, micro := libseccomp.GetLibraryVersion() + if (major == 2 && minor < 2) || (major == 2 && minor == 2 && micro < 1) { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.Seccomp = &configs.Seccomp{ + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { + Name: "write", + Action: configs.Errno, + Args: []*configs.Arg{ + { + Index: 0, + Value: 2, + Op: configs.EqualTo, + }, + { + Index: 2, + Value: 0, + Op: configs.NotEqualTo, + }, + }, + }, + }, + } + + buffers, exitCode, err := runContainer(config, "", "ls", "/does_not_exist") + if err == nil { + t.Fatalf("Expecting error return, instead got 0") + } + if exitCode == 0 { + t.Fatalf("Busybox should fail with negative exit code, instead got %d!", exitCode) + } + + expected := "" + actual := strings.Trim(buffers.Stderr.String(), "\n") + if actual != expected { + t.Fatalf("Expected output %s but got %s\n", expected, actual) + } +} + +func TestSeccompMultipleConditionSameArgDeniesStdout(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + // Prevent writing to both stdout and stderr + config := newTemplateConfig(rootfs) + config.Seccomp = &configs.Seccomp{ + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { + Name: "write", + Action: configs.Errno, + Args: []*configs.Arg{ + { + Index: 0, + Value: 1, + Op: configs.EqualTo, + }, + { + Index: 0, + Value: 2, + Op: configs.EqualTo, + }, + }, + }, + }, + } + + buffers, exitCode, err := runContainer(config, "", "ls", "/") + if err != nil { + t.Fatalf("%s: %s", buffers, err) + } + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d buffers %s", exitCode, buffers) + } + // Verify that nothing was printed + if len(buffers.Stdout.String()) != 0 { + t.Fatalf("Something was written to stdout, write call succeeded!\n") + } +} + +func TestSeccompMultipleConditionSameArgDeniesStderr(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + // Prevent writing to both stdout and stderr + config := newTemplateConfig(rootfs) + config.Seccomp = &configs.Seccomp{ + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { + Name: "write", + Action: configs.Errno, + Args: []*configs.Arg{ + { + Index: 0, + Value: 1, + Op: configs.EqualTo, + }, + { + Index: 0, + Value: 2, + Op: configs.EqualTo, + }, + }, + }, + }, + } + + buffers, exitCode, err := runContainer(config, "", "ls", "/does_not_exist") + if err == nil { + t.Fatalf("Expecting error return, instead got 0") + } + if exitCode == 0 { + t.Fatalf("Busybox should fail with negative exit code, instead got %d!", exitCode) + } + // Verify nothing was printed + if len(buffers.Stderr.String()) != 0 { + t.Fatalf("Something was written to stderr, write call succeeded!\n") + } +} diff --git a/libcontainer/integration/template_test.go b/libcontainer/integration/template_test.go new file mode 100644 index 0000000..5f7cab5 --- /dev/null +++ b/libcontainer/integration/template_test.go @@ -0,0 +1,191 @@ +package integration + +import ( + "github.com/opencontainers/runc/libcontainer/configs" + + "golang.org/x/sys/unix" +) + +var standardEnvironment = []string{ + "HOME=/root", + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "HOSTNAME=integration", + "TERM=xterm", +} + +const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV + +// newTemplateConfig returns a base template for running a container +// +// it uses a network strategy of just setting a loopback interface +// and the default setup for devices +func newTemplateConfig(rootfs string) *configs.Config { + allowAllDevices := false + return &configs.Config{ + Rootfs: rootfs, + Capabilities: &configs.Capabilities{ + Bounding: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Permitted: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Inheritable: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Ambient: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Effective: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + }, + Namespaces: configs.Namespaces([]configs.Namespace{ + {Type: configs.NEWNS}, + {Type: configs.NEWUTS}, + {Type: configs.NEWIPC}, + {Type: configs.NEWPID}, + {Type: configs.NEWNET}, + }), + Cgroups: &configs.Cgroup{ + Path: "integration/test", + Resources: &configs.Resources{ + MemorySwappiness: nil, + AllowAllDevices: &allowAllDevices, + AllowedDevices: configs.DefaultAllowedDevices, + }, + }, + MaskPaths: []string{ + "/proc/kcore", + "/sys/firmware", + }, + ReadonlyPaths: []string{ + "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", + }, + Devices: configs.DefaultAutoCreatedDevices, + Hostname: "integration", + Mounts: []*configs.Mount{ + { + Source: "proc", + Destination: "/proc", + Device: "proc", + Flags: defaultMountFlags, + }, + { + Source: "tmpfs", + Destination: "/dev", + Device: "tmpfs", + Flags: unix.MS_NOSUID | unix.MS_STRICTATIME, + Data: "mode=755", + }, + { + Source: "devpts", + Destination: "/dev/pts", + Device: "devpts", + Flags: unix.MS_NOSUID | unix.MS_NOEXEC, + Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", + }, + { + Device: "tmpfs", + Source: "shm", + Destination: "/dev/shm", + Data: "mode=1777,size=65536k", + Flags: defaultMountFlags, + }, + /* + CI is broken on the debian based kernels with this + { + Source: "mqueue", + Destination: "/dev/mqueue", + Device: "mqueue", + Flags: defaultMountFlags, + }, + */ + { + Source: "sysfs", + Destination: "/sys", + Device: "sysfs", + Flags: defaultMountFlags | unix.MS_RDONLY, + }, + }, + Networks: []*configs.Network{ + { + Type: "loopback", + Address: "127.0.0.1/0", + Gateway: "localhost", + }, + }, + Rlimits: []configs.Rlimit{ + { + Type: unix.RLIMIT_NOFILE, + Hard: uint64(1025), + Soft: uint64(1025), + }, + }, + } +} diff --git a/libcontainer/integration/utils_test.go b/libcontainer/integration/utils_test.go new file mode 100644 index 0000000..8b2d714 --- /dev/null +++ b/libcontainer/integration/utils_test.go @@ -0,0 +1,187 @@ +package integration + +import ( + "bytes" + "crypto/md5" + "encoding/hex" + "fmt" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "syscall" + "testing" + "time" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/configs" +) + +func ptrInt(v int) *int { + return &v +} + +func newStdBuffers() *stdBuffers { + return &stdBuffers{ + Stdin: bytes.NewBuffer(nil), + Stdout: bytes.NewBuffer(nil), + Stderr: bytes.NewBuffer(nil), + } +} + +type stdBuffers struct { + Stdin *bytes.Buffer + Stdout *bytes.Buffer + Stderr *bytes.Buffer +} + +func (b *stdBuffers) String() string { + s := []string{} + if b.Stderr != nil { + s = append(s, b.Stderr.String()) + } + if b.Stdout != nil { + s = append(s, b.Stdout.String()) + } + return strings.Join(s, "|") +} + +// ok fails the test if an err is not nil. +func ok(t testing.TB, err error) { + if err != nil { + _, file, line, _ := runtime.Caller(1) + t.Fatalf("%s:%d: unexpected error: %s\n\n", filepath.Base(file), line, err.Error()) + } +} + +func waitProcess(p *libcontainer.Process, t *testing.T) { + _, file, line, _ := runtime.Caller(1) + status, err := p.Wait() + + if err != nil { + t.Fatalf("%s:%d: unexpected error: %s\n\n", filepath.Base(file), line, err.Error()) + } + + if !status.Success() { + t.Fatalf("%s:%d: unexpected status: %s\n\n", filepath.Base(file), line, status.String()) + } +} + +func newTestRoot() (string, error) { + dir, err := ioutil.TempDir("", "libcontainer") + if err != nil { + return "", err + } + if err := os.MkdirAll(dir, 0700); err != nil { + return "", err + } + testRoots = append(testRoots, dir) + return dir, nil +} + +func newTestBundle() (string, error) { + dir, err := ioutil.TempDir("", "bundle") + if err != nil { + return "", err + } + if err := os.MkdirAll(dir, 0700); err != nil { + return "", err + } + return dir, nil +} + +// newRootfs creates a new tmp directory and copies the busybox root filesystem +func newRootfs() (string, error) { + dir, err := ioutil.TempDir("", "") + if err != nil { + return "", err + } + if err := os.MkdirAll(dir, 0700); err != nil { + return "", err + } + if err := copyBusybox(dir); err != nil { + return "", err + } + return dir, nil +} + +func remove(dir string) { + os.RemoveAll(dir) +} + +// copyBusybox copies the rootfs for a busybox container created for the test image +// into the new directory for the specific test +func copyBusybox(dest string) error { + out, err := exec.Command("sh", "-c", fmt.Sprintf("cp -a /busybox/* %s/", dest)).CombinedOutput() + if err != nil { + return fmt.Errorf("copy error %q: %q", err, out) + } + return nil +} + +func newContainer(config *configs.Config) (libcontainer.Container, error) { + h := md5.New() + h.Write([]byte(time.Now().String())) + return newContainerWithName(hex.EncodeToString(h.Sum(nil)), config) +} + +func newContainerWithName(name string, config *configs.Config) (libcontainer.Container, error) { + root, err := newTestRoot() + if err != nil { + return nil, err + } + + f, err := libcontainer.New(root, libcontainer.Cgroupfs) + if err != nil { + return nil, err + } + if config.Cgroups != nil && config.Cgroups.Parent == "system.slice" { + f, err = libcontainer.New(root, libcontainer.SystemdCgroups) + if err != nil { + return nil, err + } + } + return f.Create(name, config) +} + +// runContainer runs the container with the specific config and arguments +// +// buffers are returned containing the STDOUT and STDERR output for the run +// along with the exit code and any go error +func runContainer(config *configs.Config, console string, args ...string) (buffers *stdBuffers, exitCode int, err error) { + container, err := newContainer(config) + if err != nil { + return nil, -1, err + } + defer container.Destroy() + buffers = newStdBuffers() + process := &libcontainer.Process{ + Cwd: "/", + Args: args, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + Init: true, + } + + err = container.Run(process) + if err != nil { + return buffers, -1, err + } + ps, err := process.Wait() + if err != nil { + return buffers, -1, err + } + status := ps.Sys().(syscall.WaitStatus) + if status.Exited() { + exitCode = status.ExitStatus() + } else if status.Signaled() { + exitCode = -int(status.Signal()) + } else { + return buffers, -1, err + } + return +} diff --git a/libcontainer/intelrdt/intelrdt.go b/libcontainer/intelrdt/intelrdt.go new file mode 100644 index 0000000..0071ce7 --- /dev/null +++ b/libcontainer/intelrdt/intelrdt.go @@ -0,0 +1,773 @@ +// +build linux + +package intelrdt + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +/* + * About Intel RDT features: + * Intel platforms with new Xeon CPU support Resource Director Technology (RDT). + * Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are + * two sub-features of RDT. + * + * Cache Allocation Technology (CAT) provides a way for the software to restrict + * cache allocation to a defined 'subset' of L3 cache which may be overlapping + * with other 'subsets'. The different subsets are identified by class of + * service (CLOS) and each CLOS has a capacity bitmask (CBM). + * + * Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle + * over memory bandwidth for the software. A user controls the resource by + * indicating the percentage of maximum memory bandwidth or memory bandwidth + * limit in MBps unit if MBA Software Controller is enabled. + * + * More details about Intel RDT CAT and MBA can be found in the section 17.18 + * of Intel Software Developer Manual: + * https://software.intel.com/en-us/articles/intel-sdm + * + * About Intel RDT kernel interface: + * In Linux 4.10 kernel or newer, the interface is defined and exposed via + * "resource control" filesystem, which is a "cgroup-like" interface. + * + * Comparing with cgroups, it has similar process management lifecycle and + * interfaces in a container. But unlike cgroups' hierarchy, it has single level + * filesystem layout. + * + * CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via + * "resource control" filesystem. + * + * Intel RDT "resource control" filesystem hierarchy: + * mount -t resctrl resctrl /sys/fs/resctrl + * tree /sys/fs/resctrl + * /sys/fs/resctrl/ + * |-- info + * | |-- L3 + * | | |-- cbm_mask + * | | |-- min_cbm_bits + * | | |-- num_closids + * | |-- MB + * | |-- bandwidth_gran + * | |-- delay_linear + * | |-- min_bandwidth + * | |-- num_closids + * |-- ... + * |-- schemata + * |-- tasks + * |-- + * |-- ... + * |-- schemata + * |-- tasks + * + * For runc, we can make use of `tasks` and `schemata` configuration for L3 + * cache and memory bandwidth resources constraints. + * + * The file `tasks` has a list of tasks that belongs to this group (e.g., + * " group). Tasks can be added to a group by writing the task ID + * to the "tasks" file (which will automatically remove them from the previous + * group to which they belonged). New tasks created by fork(2) and clone(2) are + * added to the same group as their parent. + * + * The file `schemata` has a list of all the resources available to this group. + * Each resource (L3 cache, memory bandwidth) has its own line and format. + * + * L3 cache schema: + * It has allocation bitmasks/values for L3 cache on each socket, which + * contains L3 cache id and capacity bitmask (CBM). + * Format: "L3:=;=;..." + * For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0" + * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + * + * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can + * be set is less than the max bit. The max bits in the CBM is varied among + * supported Intel CPU models. Kernel will check if it is valid when writing. + * e.g., default value 0xfffff in root indicates the max bits of CBM is 20 + * bits, which mapping to entire L3 cache capacity. Some valid CBM values to + * set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + * + * Memory bandwidth schema: + * It has allocation values for memory bandwidth on each socket, which contains + * L3 cache id and memory bandwidth. + * Format: "MB:=bandwidth0;=bandwidth1;..." + * For example, on a two-socket machine, the schema line could be "MB:0=20;1=70" + * + * The minimum bandwidth percentage value for each CPU model is predefined and + * can be looked up through "info/MB/min_bandwidth". The bandwidth granularity + * that is allocated is also dependent on the CPU model and can be looked up at + * "info/MB/bandwidth_gran". The available bandwidth control steps are: + * min_bw + N * bw_gran. Intermediate values are rounded to the next control + * step available on the hardware. + * + * If MBA Software Controller is enabled through mount option "-o mba_MBps": + * mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl + * We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit + * instead of "percentages". The kernel underneath would use a software feedback + * mechanism or a "Software Controller" which reads the actual bandwidth using + * MBM counters and adjust the memory bandwidth percentages to ensure: + * "actual memory bandwidth < user specified memory bandwidth". + * + * For example, on a two-socket machine, the schema line could be + * "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0 + * and 7000 MBps memory bandwidth limit on socket 1. + * + * For more information about Intel RDT kernel interface: + * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt + * + * An example for runc: + * Consider a two-socket machine with two L3 caches where the default CBM is + * 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10% + * with a memory bandwidth granularity of 10%. + * + * Tasks inside the container only have access to the "upper" 7/11 of L3 cache + * on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a + * maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. + * + * "linux": { + * "intelRdt": { + * "l3CacheSchema": "L3:0=7f0;1=1f", + * "memBwSchema": "MB:0=20;1=70" + * } + * } + */ + +type Manager interface { + // Applies Intel RDT configuration to the process with the specified pid + Apply(pid int) error + + // Returns statistics for Intel RDT + GetStats() (*Stats, error) + + // Destroys the Intel RDT 'container_id' group + Destroy() error + + // Returns Intel RDT path to save in a state file and to be able to + // restore the object later + GetPath() string + + // Set Intel RDT "resource control" filesystem as configured. + Set(container *configs.Config) error +} + +// This implements interface Manager +type IntelRdtManager struct { + mu sync.Mutex + Config *configs.Config + Id string + Path string +} + +const ( + IntelRdtTasks = "tasks" +) + +var ( + // The absolute root path of the Intel RDT "resource control" filesystem + intelRdtRoot string + intelRdtRootLock sync.Mutex + + // The flag to indicate if Intel RDT/CAT is enabled + isCatEnabled bool + // The flag to indicate if Intel RDT/MBA is enabled + isMbaEnabled bool + // The flag to indicate if Intel RDT/MBA Software Controller is enabled + isMbaScEnabled bool +) + +type intelRdtData struct { + root string + config *configs.Config + pid int +} + +// Check if Intel RDT sub-features are enabled in init() +func init() { + // 1. Check if hardware and kernel support Intel RDT sub-features + // "cat_l3" flag for CAT and "mba" flag for MBA + isCatFlagSet, isMbaFlagSet, err := parseCpuInfoFile("/proc/cpuinfo") + if err != nil { + return + } + + // 2. Check if Intel RDT "resource control" filesystem is mounted + // The user guarantees to mount the filesystem + if !isIntelRdtMounted() { + return + } + + // 3. Double check if Intel RDT sub-features are available in + // "resource control" filesystem. Intel RDT sub-features can be + // selectively disabled or enabled by kernel command line + // (e.g., rdt=!l3cat,mba) in 4.14 and newer kernel + if isCatFlagSet { + if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "L3")); err == nil { + isCatEnabled = true + } + } + if isMbaScEnabled { + // We confirm MBA Software Controller is enabled in step 2, + // MBA should be enabled because MBA Software Controller + // depends on MBA + isMbaEnabled = true + } else if isMbaFlagSet { + if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "MB")); err == nil { + isMbaEnabled = true + } + } +} + +// Return the mount point path of Intel RDT "resource control" filesysem +func findIntelRdtMountpointDir() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + text := s.Text() + fields := strings.Split(text, " ") + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + numPostFields := len(postSeparatorFields) + + // This is an error as we can't detect if the mount is for "Intel RDT" + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + + if postSeparatorFields[0] == "resctrl" { + // Check that the mount is properly formatted. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + // Check if MBA Software Controller is enabled through mount option "-o mba_MBps" + if strings.Contains(postSeparatorFields[2], "mba_MBps") { + isMbaScEnabled = true + } + + return fields[4], nil + } + } + if err := s.Err(); err != nil { + return "", err + } + + return "", NewNotFoundError("Intel RDT") +} + +// Gets the root path of Intel RDT "resource control" filesystem +func getIntelRdtRoot() (string, error) { + intelRdtRootLock.Lock() + defer intelRdtRootLock.Unlock() + + if intelRdtRoot != "" { + return intelRdtRoot, nil + } + + root, err := findIntelRdtMountpointDir() + if err != nil { + return "", err + } + + if _, err := os.Stat(root); err != nil { + return "", err + } + + intelRdtRoot = root + return intelRdtRoot, nil +} + +func isIntelRdtMounted() bool { + _, err := getIntelRdtRoot() + if err != nil { + return false + } + + return true +} + +func parseCpuInfoFile(path string) (bool, bool, error) { + isCatFlagSet := false + isMbaFlagSet := false + + f, err := os.Open(path) + if err != nil { + return false, false, err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + if err := s.Err(); err != nil { + return false, false, err + } + + line := s.Text() + + // Search "cat_l3" and "mba" flags in first "flags" line + if strings.Contains(line, "flags") { + flags := strings.Split(line, " ") + // "cat_l3" flag for CAT and "mba" flag for MBA + for _, flag := range flags { + switch flag { + case "cat_l3": + isCatFlagSet = true + case "mba": + isMbaFlagSet = true + } + } + return isCatFlagSet, isMbaFlagSet, nil + } + } + return isCatFlagSet, isMbaFlagSet, nil +} + +func parseUint(s string, base, bitSize int) (uint64, error) { + value, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { + return 0, nil + } + + return value, err + } + + return value, nil +} + +// Gets a single uint64 value from the specified file. +func getIntelRdtParamUint(path, file string) (uint64, error) { + fileName := filepath.Join(path, file) + contents, err := ioutil.ReadFile(fileName) + if err != nil { + return 0, err + } + + res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64) + if err != nil { + return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName) + } + return res, nil +} + +// Gets a string value from the specified file +func getIntelRdtParamString(path, file string) (string, error) { + contents, err := ioutil.ReadFile(filepath.Join(path, file)) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(contents)), nil +} + +func writeFile(dir, file, data string) error { + if dir == "" { + return fmt.Errorf("no such directory for %s", file) + } + if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", data, file, err) + } + return nil +} + +func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) { + rootPath, err := getIntelRdtRoot() + if err != nil { + return nil, err + } + return &intelRdtData{ + root: rootPath, + config: c, + pid: pid, + }, nil +} + +// Get the read-only L3 cache information +func getL3CacheInfo() (*L3CacheInfo, error) { + l3CacheInfo := &L3CacheInfo{} + + rootPath, err := getIntelRdtRoot() + if err != nil { + return l3CacheInfo, err + } + + path := filepath.Join(rootPath, "info", "L3") + cbmMask, err := getIntelRdtParamString(path, "cbm_mask") + if err != nil { + return l3CacheInfo, err + } + minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits") + if err != nil { + return l3CacheInfo, err + } + numClosids, err := getIntelRdtParamUint(path, "num_closids") + if err != nil { + return l3CacheInfo, err + } + + l3CacheInfo.CbmMask = cbmMask + l3CacheInfo.MinCbmBits = minCbmBits + l3CacheInfo.NumClosids = numClosids + + return l3CacheInfo, nil +} + +// Get the read-only memory bandwidth information +func getMemBwInfo() (*MemBwInfo, error) { + memBwInfo := &MemBwInfo{} + + rootPath, err := getIntelRdtRoot() + if err != nil { + return memBwInfo, err + } + + path := filepath.Join(rootPath, "info", "MB") + bandwidthGran, err := getIntelRdtParamUint(path, "bandwidth_gran") + if err != nil { + return memBwInfo, err + } + delayLinear, err := getIntelRdtParamUint(path, "delay_linear") + if err != nil { + return memBwInfo, err + } + minBandwidth, err := getIntelRdtParamUint(path, "min_bandwidth") + if err != nil { + return memBwInfo, err + } + numClosids, err := getIntelRdtParamUint(path, "num_closids") + if err != nil { + return memBwInfo, err + } + + memBwInfo.BandwidthGran = bandwidthGran + memBwInfo.DelayLinear = delayLinear + memBwInfo.MinBandwidth = minBandwidth + memBwInfo.NumClosids = numClosids + + return memBwInfo, nil +} + +// Get diagnostics for last filesystem operation error from file info/last_cmd_status +func getLastCmdStatus() (string, error) { + rootPath, err := getIntelRdtRoot() + if err != nil { + return "", err + } + + path := filepath.Join(rootPath, "info") + lastCmdStatus, err := getIntelRdtParamString(path, "last_cmd_status") + if err != nil { + return "", err + } + + return lastCmdStatus, nil +} + +// WriteIntelRdtTasks writes the specified pid into the "tasks" file +func WriteIntelRdtTasks(dir string, pid int) error { + if dir == "" { + return fmt.Errorf("no such directory for %s", IntelRdtTasks) + } + + // Don't attach any pid if -1 is specified as a pid + if pid != -1 { + if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err) + } + } + return nil +} + +// Check if Intel RDT/CAT is enabled +func IsCatEnabled() bool { + return isCatEnabled +} + +// Check if Intel RDT/MBA is enabled +func IsMbaEnabled() bool { + return isMbaEnabled +} + +// Check if Intel RDT/MBA Software Controller is enabled +func IsMbaScEnabled() bool { + return isMbaScEnabled +} + +// Get the 'container_id' path in Intel RDT "resource control" filesystem +func GetIntelRdtPath(id string) (string, error) { + rootPath, err := getIntelRdtRoot() + if err != nil { + return "", err + } + + path := filepath.Join(rootPath, id) + return path, nil +} + +// Applies Intel RDT configuration to the process with the specified pid +func (m *IntelRdtManager) Apply(pid int) (err error) { + // If intelRdt is not specified in config, we do nothing + if m.Config.IntelRdt == nil { + return nil + } + d, err := getIntelRdtData(m.Config, pid) + if err != nil && !IsNotFound(err) { + return err + } + + m.mu.Lock() + defer m.mu.Unlock() + path, err := d.join(m.Id) + if err != nil { + return err + } + + m.Path = path + return nil +} + +// Destroys the Intel RDT 'container_id' group +func (m *IntelRdtManager) Destroy() error { + m.mu.Lock() + defer m.mu.Unlock() + if err := os.RemoveAll(m.GetPath()); err != nil { + return err + } + m.Path = "" + return nil +} + +// Returns Intel RDT path to save in a state file and to be able to +// restore the object later +func (m *IntelRdtManager) GetPath() string { + if m.Path == "" { + m.Path, _ = GetIntelRdtPath(m.Id) + } + return m.Path +} + +// Returns statistics for Intel RDT +func (m *IntelRdtManager) GetStats() (*Stats, error) { + // If intelRdt is not specified in config + if m.Config.IntelRdt == nil { + return nil, nil + } + + m.mu.Lock() + defer m.mu.Unlock() + stats := NewStats() + + rootPath, err := getIntelRdtRoot() + if err != nil { + return nil, err + } + // The read-only L3 cache and memory bandwidth schemata in root + tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata") + if err != nil { + return nil, err + } + schemaRootStrings := strings.Split(tmpRootStrings, "\n") + + // The L3 cache and memory bandwidth schemata in 'container_id' group + tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata") + if err != nil { + return nil, err + } + schemaStrings := strings.Split(tmpStrings, "\n") + + if IsCatEnabled() { + // The read-only L3 cache information + l3CacheInfo, err := getL3CacheInfo() + if err != nil { + return nil, err + } + stats.L3CacheInfo = l3CacheInfo + + // The read-only L3 cache schema in root + for _, schemaRoot := range schemaRootStrings { + if strings.Contains(schemaRoot, "L3") { + stats.L3CacheSchemaRoot = strings.TrimSpace(schemaRoot) + } + } + + // The L3 cache schema in 'container_id' group + for _, schema := range schemaStrings { + if strings.Contains(schema, "L3") { + stats.L3CacheSchema = strings.TrimSpace(schema) + } + } + } + + if IsMbaEnabled() { + // The read-only memory bandwidth information + memBwInfo, err := getMemBwInfo() + if err != nil { + return nil, err + } + stats.MemBwInfo = memBwInfo + + // The read-only memory bandwidth information + for _, schemaRoot := range schemaRootStrings { + if strings.Contains(schemaRoot, "MB") { + stats.MemBwSchemaRoot = strings.TrimSpace(schemaRoot) + } + } + + // The memory bandwidth schema in 'container_id' group + for _, schema := range schemaStrings { + if strings.Contains(schema, "MB") { + stats.MemBwSchema = strings.TrimSpace(schema) + } + } + } + + return stats, nil +} + +// Set Intel RDT "resource control" filesystem as configured. +func (m *IntelRdtManager) Set(container *configs.Config) error { + // About L3 cache schema: + // It has allocation bitmasks/values for L3 cache on each socket, + // which contains L3 cache id and capacity bitmask (CBM). + // Format: "L3:=;=;..." + // For example, on a two-socket machine, the schema line could be: + // L3:0=ff;1=c0 + // which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM + // is 0xc0. + // + // The valid L3 cache CBM is a *contiguous bits set* and number of + // bits that can be set is less than the max bit. The max bits in the + // CBM is varied among supported Intel CPU models. Kernel will check + // if it is valid when writing. e.g., default value 0xfffff in root + // indicates the max bits of CBM is 20 bits, which mapping to entire + // L3 cache capacity. Some valid CBM values to set in a group: + // 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + // + // + // About memory bandwidth schema: + // It has allocation values for memory bandwidth on each socket, which + // contains L3 cache id and memory bandwidth. + // Format: "MB:=bandwidth0;=bandwidth1;..." + // For example, on a two-socket machine, the schema line could be: + // "MB:0=20;1=70" + // + // The minimum bandwidth percentage value for each CPU model is + // predefined and can be looked up through "info/MB/min_bandwidth". + // The bandwidth granularity that is allocated is also dependent on + // the CPU model and can be looked up at "info/MB/bandwidth_gran". + // The available bandwidth control steps are: min_bw + N * bw_gran. + // Intermediate values are rounded to the next control step available + // on the hardware. + // + // If MBA Software Controller is enabled through mount option + // "-o mba_MBps": mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl + // We could specify memory bandwidth in "MBps" (Mega Bytes per second) + // unit instead of "percentages". The kernel underneath would use a + // software feedback mechanism or a "Software Controller" which reads + // the actual bandwidth using MBM counters and adjust the memory + // bandwidth percentages to ensure: + // "actual memory bandwidth < user specified memory bandwidth". + // + // For example, on a two-socket machine, the schema line could be + // "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on + // socket 0 and 7000 MBps memory bandwidth limit on socket 1. + if container.IntelRdt != nil { + path := m.GetPath() + l3CacheSchema := container.IntelRdt.L3CacheSchema + memBwSchema := container.IntelRdt.MemBwSchema + + // Write a single joint schema string to schemata file + if l3CacheSchema != "" && memBwSchema != "" { + if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil { + return NewLastCmdError(err) + } + } + + // Write only L3 cache schema string to schemata file + if l3CacheSchema != "" && memBwSchema == "" { + if err := writeFile(path, "schemata", l3CacheSchema); err != nil { + return NewLastCmdError(err) + } + } + + // Write only memory bandwidth schema string to schemata file + if l3CacheSchema == "" && memBwSchema != "" { + if err := writeFile(path, "schemata", memBwSchema); err != nil { + return NewLastCmdError(err) + } + } + } + + return nil +} + +func (raw *intelRdtData) join(id string) (string, error) { + path := filepath.Join(raw.root, id) + if err := os.MkdirAll(path, 0755); err != nil { + return "", NewLastCmdError(err) + } + + if err := WriteIntelRdtTasks(path, raw.pid); err != nil { + return "", NewLastCmdError(err) + } + return path, nil +} + +type NotFoundError struct { + ResourceControl string +} + +func (e *NotFoundError) Error() string { + return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl) +} + +func NewNotFoundError(res string) error { + return &NotFoundError{ + ResourceControl: res, + } +} + +func IsNotFound(err error) bool { + if err == nil { + return false + } + _, ok := err.(*NotFoundError) + return ok +} + +type LastCmdError struct { + LastCmdStatus string + Err error +} + +func (e *LastCmdError) Error() string { + return fmt.Sprintf(e.Err.Error() + ", last_cmd_status: " + e.LastCmdStatus) +} + +func NewLastCmdError(err error) error { + lastCmdStatus, err1 := getLastCmdStatus() + if err1 == nil { + return &LastCmdError{ + LastCmdStatus: lastCmdStatus, + Err: err, + } + } + return err +} diff --git a/libcontainer/intelrdt/intelrdt_test.go b/libcontainer/intelrdt/intelrdt_test.go new file mode 100644 index 0000000..a19b961 --- /dev/null +++ b/libcontainer/intelrdt/intelrdt_test.go @@ -0,0 +1,122 @@ +// +build linux + +package intelrdt + +import ( + "strings" + "testing" +) + +func TestIntelRdtSetL3CacheSchema(t *testing.T) { + if !IsCatEnabled() { + return + } + + helper := NewIntelRdtTestUtil(t) + defer helper.cleanup() + + const ( + l3CacheSchemaBefore = "L3:0=f;1=f0" + l3CacheSchemeAfter = "L3:0=f0;1=f" + ) + + helper.writeFileContents(map[string]string{ + "schemata": l3CacheSchemaBefore + "\n", + }) + + helper.IntelRdtData.config.IntelRdt.L3CacheSchema = l3CacheSchemeAfter + intelrdt := &IntelRdtManager{ + Config: helper.IntelRdtData.config, + Path: helper.IntelRdtPath, + } + if err := intelrdt.Set(helper.IntelRdtData.config); err != nil { + t.Fatal(err) + } + + tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata") + if err != nil { + t.Fatalf("Failed to parse file 'schemata' - %s", err) + } + values := strings.Split(tmpStrings, "\n") + value := values[0] + + if value != l3CacheSchemeAfter { + t.Fatal("Got the wrong value, set 'schemata' failed.") + } +} + +func TestIntelRdtSetMemBwSchema(t *testing.T) { + if !IsMbaEnabled() { + return + } + + helper := NewIntelRdtTestUtil(t) + defer helper.cleanup() + + const ( + memBwSchemaBefore = "MB:0=20;1=70" + memBwSchemeAfter = "MB:0=70;1=20" + ) + + helper.writeFileContents(map[string]string{ + "schemata": memBwSchemaBefore + "\n", + }) + + helper.IntelRdtData.config.IntelRdt.MemBwSchema = memBwSchemeAfter + intelrdt := &IntelRdtManager{ + Config: helper.IntelRdtData.config, + Path: helper.IntelRdtPath, + } + if err := intelrdt.Set(helper.IntelRdtData.config); err != nil { + t.Fatal(err) + } + + tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata") + if err != nil { + t.Fatalf("Failed to parse file 'schemata' - %s", err) + } + values := strings.Split(tmpStrings, "\n") + value := values[0] + + if value != memBwSchemeAfter { + t.Fatal("Got the wrong value, set 'schemata' failed.") + } +} + +func TestIntelRdtSetMemBwScSchema(t *testing.T) { + if !IsMbaScEnabled() { + return + } + + helper := NewIntelRdtTestUtil(t) + defer helper.cleanup() + + const ( + memBwScSchemaBefore = "MB:0=5000;1=7000" + memBwScSchemeAfter = "MB:0=9000;1=4000" + ) + + helper.writeFileContents(map[string]string{ + "schemata": memBwScSchemaBefore + "\n", + }) + + helper.IntelRdtData.config.IntelRdt.MemBwSchema = memBwScSchemeAfter + intelrdt := &IntelRdtManager{ + Config: helper.IntelRdtData.config, + Path: helper.IntelRdtPath, + } + if err := intelrdt.Set(helper.IntelRdtData.config); err != nil { + t.Fatal(err) + } + + tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata") + if err != nil { + t.Fatalf("Failed to parse file 'schemata' - %s", err) + } + values := strings.Split(tmpStrings, "\n") + value := values[0] + + if value != memBwScSchemeAfter { + t.Fatal("Got the wrong value, set 'schemata' failed.") + } +} diff --git a/libcontainer/intelrdt/stats.go b/libcontainer/intelrdt/stats.go new file mode 100644 index 0000000..df5686f --- /dev/null +++ b/libcontainer/intelrdt/stats.go @@ -0,0 +1,40 @@ +// +build linux + +package intelrdt + +type L3CacheInfo struct { + CbmMask string `json:"cbm_mask,omitempty"` + MinCbmBits uint64 `json:"min_cbm_bits,omitempty"` + NumClosids uint64 `json:"num_closids,omitempty"` +} + +type MemBwInfo struct { + BandwidthGran uint64 `json:"bandwidth_gran,omitempty"` + DelayLinear uint64 `json:"delay_linear,omitempty"` + MinBandwidth uint64 `json:"min_bandwidth,omitempty"` + NumClosids uint64 `json:"num_closids,omitempty"` +} + +type Stats struct { + // The read-only L3 cache information + L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"` + + // The read-only L3 cache schema in root + L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"` + + // The L3 cache schema in 'container_id' group + L3CacheSchema string `json:"l3_cache_schema,omitempty"` + + // The read-only memory bandwidth information + MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"` + + // The read-only memory bandwidth schema in root + MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"` + + // The memory bandwidth schema in 'container_id' group + MemBwSchema string `json:"mem_bw_schema,omitempty"` +} + +func NewStats() *Stats { + return &Stats{} +} diff --git a/libcontainer/intelrdt/util_test.go b/libcontainer/intelrdt/util_test.go new file mode 100644 index 0000000..970b6ce --- /dev/null +++ b/libcontainer/intelrdt/util_test.go @@ -0,0 +1,67 @@ +// +build linux + +/* + * Utility for testing Intel RDT operations. + * Creates a mock of the Intel RDT "resource control" filesystem for the duration of the test. + */ +package intelrdt + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type intelRdtTestUtil struct { + // intelRdt data to use in tests + IntelRdtData *intelRdtData + + // Path to the mock Intel RDT "resource control" filesystem directory + IntelRdtPath string + + // Temporary directory to store mock Intel RDT "resource control" filesystem + tempDir string + t *testing.T +} + +// Creates a new test util +func NewIntelRdtTestUtil(t *testing.T) *intelRdtTestUtil { + d := &intelRdtData{ + config: &configs.Config{ + IntelRdt: &configs.IntelRdt{}, + }, + } + tempDir, err := ioutil.TempDir("", "intelrdt_test") + if err != nil { + t.Fatal(err) + } + d.root = tempDir + testIntelRdtPath := filepath.Join(d.root, "resctrl") + if err != nil { + t.Fatal(err) + } + + // Ensure the full mock Intel RDT "resource control" filesystem path exists + err = os.MkdirAll(testIntelRdtPath, 0755) + if err != nil { + t.Fatal(err) + } + return &intelRdtTestUtil{IntelRdtData: d, IntelRdtPath: testIntelRdtPath, tempDir: tempDir, t: t} +} + +func (c *intelRdtTestUtil) cleanup() { + os.RemoveAll(c.tempDir) +} + +// Write the specified contents on the mock of the specified Intel RDT "resource control" files +func (c *intelRdtTestUtil) writeFileContents(fileContents map[string]string) { + for file, contents := range fileContents { + err := writeFile(c.IntelRdtPath, file, contents) + if err != nil { + c.t.Fatal(err) + } + } +} diff --git a/libcontainer/keys/keyctl.go b/libcontainer/keys/keyctl.go new file mode 100644 index 0000000..74dedd5 --- /dev/null +++ b/libcontainer/keys/keyctl.go @@ -0,0 +1,48 @@ +// +build linux + +package keys + +import ( + "fmt" + "strconv" + "strings" + + "github.com/pkg/errors" + + "golang.org/x/sys/unix" +) + +type KeySerial uint32 + +func JoinSessionKeyring(name string) (KeySerial, error) { + sessKeyId, err := unix.KeyctlJoinSessionKeyring(name) + if err != nil { + return 0, errors.Wrap(err, "create session key") + } + return KeySerial(sessKeyId), nil +} + +// ModKeyringPerm modifies permissions on a keyring by reading the current permissions, +// anding the bits with the given mask (clearing permissions) and setting +// additional permission bits +func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error { + dest, err := unix.KeyctlString(unix.KEYCTL_DESCRIBE, int(ringId)) + if err != nil { + return err + } + + res := strings.Split(dest, ";") + if len(res) < 5 { + return fmt.Errorf("Destination buffer for key description is too small") + } + + // parse permissions + perm64, err := strconv.ParseUint(res[3], 16, 32) + if err != nil { + return err + } + + perm := (uint32(perm64) & mask) | setbits + + return unix.KeyctlSetperm(int(ringId), perm) +} diff --git a/libcontainer/logs/logs.go b/libcontainer/logs/logs.go new file mode 100644 index 0000000..1077e7b --- /dev/null +++ b/libcontainer/logs/logs.go @@ -0,0 +1,102 @@ +package logs + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "os" + "strconv" + "sync" + + "github.com/sirupsen/logrus" +) + +var ( + configureMutex = sync.Mutex{} + // loggingConfigured will be set once logging has been configured via invoking `ConfigureLogging`. + // Subsequent invocations of `ConfigureLogging` would be no-op + loggingConfigured = false +) + +type Config struct { + LogLevel logrus.Level + LogFormat string + LogFilePath string + LogPipeFd string +} + +func ForwardLogs(logPipe io.Reader) { + lineReader := bufio.NewReader(logPipe) + for { + line, err := lineReader.ReadBytes('\n') + if len(line) > 0 { + processEntry(line) + } + if err == io.EOF { + logrus.Debugf("log pipe has been closed: %+v", err) + return + } + if err != nil { + logrus.Errorf("log pipe read error: %+v", err) + } + } +} + +func processEntry(text []byte) { + type jsonLog struct { + Level string `json:"level"` + Msg string `json:"msg"` + } + + var jl jsonLog + if err := json.Unmarshal(text, &jl); err != nil { + logrus.Errorf("failed to decode %q to json: %+v", text, err) + return + } + + lvl, err := logrus.ParseLevel(jl.Level) + if err != nil { + logrus.Errorf("failed to parse log level %q: %v\n", jl.Level, err) + return + } + logrus.StandardLogger().Logf(lvl, jl.Msg) +} + +func ConfigureLogging(config Config) error { + configureMutex.Lock() + defer configureMutex.Unlock() + + if loggingConfigured { + logrus.Debug("logging has already been configured") + return nil + } + + logrus.SetLevel(config.LogLevel) + + if config.LogPipeFd != "" { + logPipeFdInt, err := strconv.Atoi(config.LogPipeFd) + if err != nil { + return fmt.Errorf("failed to convert _LIBCONTAINER_LOGPIPE environment variable value %q to int: %v", config.LogPipeFd, err) + } + logrus.SetOutput(os.NewFile(uintptr(logPipeFdInt), "logpipe")) + } else if config.LogFilePath != "" { + f, err := os.OpenFile(config.LogFilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0644) + if err != nil { + return err + } + logrus.SetOutput(f) + } + + switch config.LogFormat { + case "text": + // retain logrus's default. + case "json": + logrus.SetFormatter(new(logrus.JSONFormatter)) + default: + return fmt.Errorf("unknown log-format %q", config.LogFormat) + } + + loggingConfigured = true + return nil +} diff --git a/libcontainer/logs/logs_linux_test.go b/libcontainer/logs/logs_linux_test.go new file mode 100644 index 0000000..83166fa --- /dev/null +++ b/libcontainer/logs/logs_linux_test.go @@ -0,0 +1,160 @@ +package logs + +import ( + "errors" + "io/ioutil" + "os" + "strings" + "testing" + "time" + + "github.com/sirupsen/logrus" +) + +func TestLoggingToFile(t *testing.T) { + logW, logFile, _ := runLogForwarding(t) + defer os.Remove(logFile) + defer logW.Close() + + logToLogWriter(t, logW, `{"level": "info","msg":"kitten"}`) + + logFileContent := waitForLogContent(t, logFile) + if !strings.Contains(string(logFileContent), "kitten") { + t.Fatalf("%s does not contain kitten", string(logFileContent)) + } +} + +func TestLogForwardingDoesNotStopOnJsonDecodeErr(t *testing.T) { + logW, logFile, _ := runLogForwarding(t) + defer os.Remove(logFile) + defer logW.Close() + + logToLogWriter(t, logW, "invalid-json-with-kitten") + + logFileContent := waitForLogContent(t, logFile) + if !strings.Contains(string(logFileContent), "failed to decode") { + t.Fatalf("%q does not contain decoding error", string(logFileContent)) + } + + truncateLogFile(t, logFile) + + logToLogWriter(t, logW, `{"level": "info","msg":"puppy"}`) + + logFileContent = waitForLogContent(t, logFile) + if !strings.Contains(string(logFileContent), "puppy") { + t.Fatalf("%s does not contain puppy", string(logFileContent)) + } +} + +func TestLogForwardingDoesNotStopOnLogLevelParsingErr(t *testing.T) { + logW, logFile, _ := runLogForwarding(t) + defer os.Remove(logFile) + defer logW.Close() + + logToLogWriter(t, logW, `{"level": "alert","msg":"puppy"}`) + + logFileContent := waitForLogContent(t, logFile) + if !strings.Contains(string(logFileContent), "failed to parse log level") { + t.Fatalf("%q does not contain log level parsing error", string(logFileContent)) + } + + truncateLogFile(t, logFile) + + logToLogWriter(t, logW, `{"level": "info","msg":"puppy"}`) + + logFileContent = waitForLogContent(t, logFile) + if !strings.Contains(string(logFileContent), "puppy") { + t.Fatalf("%s does not contain puppy", string(logFileContent)) + } +} + +func TestLogForwardingStopsAfterClosingTheWriter(t *testing.T) { + logW, logFile, doneForwarding := runLogForwarding(t) + defer os.Remove(logFile) + + logToLogWriter(t, logW, `{"level": "info","msg":"sync"}`) + + logFileContent := waitForLogContent(t, logFile) + if !strings.Contains(string(logFileContent), "sync") { + t.Fatalf("%q does not contain sync message", string(logFileContent)) + } + + logW.Close() + select { + case <-doneForwarding: + case <-time.After(10 * time.Second): + t.Fatal("log forwarding did not stop after closing the pipe") + } +} + +func logToLogWriter(t *testing.T, logW *os.File, message string) { + _, err := logW.Write([]byte(message + "\n")) + if err != nil { + t.Fatalf("failed to write %q to log writer: %v", message, err) + } +} + +func runLogForwarding(t *testing.T) (*os.File, string, chan struct{}) { + logR, logW, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + + tempFile, err := ioutil.TempFile("", "") + if err != nil { + t.Fatal(err) + } + logFile := tempFile.Name() + + logConfig := Config{LogLevel: logrus.InfoLevel, LogFormat: "json", LogFilePath: logFile} + return logW, logFile, startLogForwarding(t, logConfig, logR) +} + +func startLogForwarding(t *testing.T, logConfig Config, logR *os.File) chan struct{} { + loggingConfigured = false + if err := ConfigureLogging(logConfig); err != nil { + t.Fatal(err) + } + doneForwarding := make(chan struct{}) + go func() { + ForwardLogs(logR) + close(doneForwarding) + }() + return doneForwarding +} + +func waitForLogContent(t *testing.T, logFile string) string { + startTime := time.Now() + + for { + if time.Now().After(startTime.Add(10 * time.Second)) { + t.Fatal(errors.New("No content in log file after 10 seconds")) + break + } + + fileContent, err := ioutil.ReadFile(logFile) + if err != nil { + t.Fatal(err) + } + if len(fileContent) == 0 { + continue + } + return string(fileContent) + } + + return "" +} + +func truncateLogFile(t *testing.T, logFile string) { + file, err := os.OpenFile(logFile, os.O_RDWR, 0666) + if err != nil { + t.Fatalf("failed to open log file: %v", err) + return + } + defer file.Close() + + err = file.Truncate(0) + if err != nil { + t.Fatalf("failed to truncate log file: %v", err) + } +} diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go new file mode 100644 index 0000000..1d4f503 --- /dev/null +++ b/libcontainer/message_linux.go @@ -0,0 +1,89 @@ +// +build linux + +package libcontainer + +import ( + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// list of known message types we want to send to bootstrap program +// The number is randomly chosen to not conflict with known netlink types +const ( + InitMsg uint16 = 62000 + CloneFlagsAttr uint16 = 27281 + NsPathsAttr uint16 = 27282 + UidmapAttr uint16 = 27283 + GidmapAttr uint16 = 27284 + SetgroupAttr uint16 = 27285 + OomScoreAdjAttr uint16 = 27286 + RootlessEUIDAttr uint16 = 27287 + UidmapPathAttr uint16 = 27288 + GidmapPathAttr uint16 = 27289 +) + +type Int32msg struct { + Type uint16 + Value uint32 +} + +// Serialize serializes the message. +// Int32msg has the following representation +// | nlattr len | nlattr type | +// | uint32 value | +func (msg *Int32msg) Serialize() []byte { + buf := make([]byte, msg.Len()) + native := nl.NativeEndian() + native.PutUint16(buf[0:2], uint16(msg.Len())) + native.PutUint16(buf[2:4], msg.Type) + native.PutUint32(buf[4:8], msg.Value) + return buf +} + +func (msg *Int32msg) Len() int { + return unix.NLA_HDRLEN + 4 +} + +// Bytemsg has the following representation +// | nlattr len | nlattr type | +// | value | pad | +type Bytemsg struct { + Type uint16 + Value []byte +} + +func (msg *Bytemsg) Serialize() []byte { + l := msg.Len() + buf := make([]byte, (l+unix.NLA_ALIGNTO-1) & ^(unix.NLA_ALIGNTO-1)) + native := nl.NativeEndian() + native.PutUint16(buf[0:2], uint16(l)) + native.PutUint16(buf[2:4], msg.Type) + copy(buf[4:], msg.Value) + return buf +} + +func (msg *Bytemsg) Len() int { + return unix.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated +} + +type Boolmsg struct { + Type uint16 + Value bool +} + +func (msg *Boolmsg) Serialize() []byte { + buf := make([]byte, msg.Len()) + native := nl.NativeEndian() + native.PutUint16(buf[0:2], uint16(msg.Len())) + native.PutUint16(buf[2:4], msg.Type) + if msg.Value { + native.PutUint32(buf[4:8], uint32(1)) + } else { + native.PutUint32(buf[4:8], uint32(0)) + } + return buf +} + +func (msg *Boolmsg) Len() int { + return unix.NLA_HDRLEN + 4 // alignment +} diff --git a/libcontainer/mount/mount.go b/libcontainer/mount/mount.go new file mode 100644 index 0000000..e8965e0 --- /dev/null +++ b/libcontainer/mount/mount.go @@ -0,0 +1,23 @@ +package mount + +// GetMounts retrieves a list of mounts for the current running process. +func GetMounts() ([]*Info, error) { + return parseMountTable() +} + +// Mounted looks at /proc/self/mountinfo to determine of the specified +// mountpoint has been mounted +func Mounted(mountpoint string) (bool, error) { + entries, err := parseMountTable() + if err != nil { + return false, err + } + + // Search the table for the mountpoint + for _, e := range entries { + if e.Mountpoint == mountpoint { + return true, nil + } + } + return false, nil +} diff --git a/libcontainer/mount/mount_linux.go b/libcontainer/mount/mount_linux.go new file mode 100644 index 0000000..1e51919 --- /dev/null +++ b/libcontainer/mount/mount_linux.go @@ -0,0 +1,82 @@ +// +build linux + +package mount + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" +) + +const ( + /* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11) + + (1) mount ID: unique identifier of the mount (may be reused after umount) + (2) parent ID: ID of parent (or of self for the top of the mount tree) + (3) major:minor: value of st_dev for files on filesystem + (4) root: root of the mount within the filesystem + (5) mount point: mount point relative to the process's root + (6) mount options: per mount options + (7) optional fields: zero or more fields of the form "tag[:value]" + (8) separator: marks the end of the optional fields + (9) filesystem type: name of filesystem of the form "type[.subtype]" + (10) mount source: filesystem specific information or "none" + (11) super options: per super block options*/ + mountinfoFormat = "%d %d %d:%d %s %s %s %s" +) + +// Parse /proc/self/mountinfo because comparing Dev and ino does not work from +// bind mounts +func parseMountTable() ([]*Info, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return nil, err + } + defer f.Close() + + return parseInfoFile(f) +} + +func parseInfoFile(r io.Reader) ([]*Info, error) { + var ( + s = bufio.NewScanner(r) + out = []*Info{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + var ( + p = &Info{} + text = s.Text() + optionalFields string + ) + + if _, err := fmt.Sscanf(text, mountinfoFormat, + &p.ID, &p.Parent, &p.Major, &p.Minor, + &p.Root, &p.Mountpoint, &p.Opts, &optionalFields); err != nil { + return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err) + } + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + if len(postSeparatorFields) < 3 { + return nil, fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + if optionalFields != "-" { + p.Optional = optionalFields + } + + p.Fstype = postSeparatorFields[0] + p.Source = postSeparatorFields[1] + p.VfsOpts = strings.Join(postSeparatorFields[2:], " ") + out = append(out, p) + } + return out, nil +} diff --git a/libcontainer/mount/mountinfo.go b/libcontainer/mount/mountinfo.go new file mode 100644 index 0000000..e3fc353 --- /dev/null +++ b/libcontainer/mount/mountinfo.go @@ -0,0 +1,40 @@ +package mount + +// Info reveals information about a particular mounted filesystem. This +// struct is populated from the content in the /proc//mountinfo file. +type Info struct { + // ID is a unique identifier of the mount (may be reused after umount). + ID int + + // Parent indicates the ID of the mount parent (or of self for the top of the + // mount tree). + Parent int + + // Major indicates one half of the device ID which identifies the device class. + Major int + + // Minor indicates one half of the device ID which identifies a specific + // instance of device. + Minor int + + // Root of the mount within the filesystem. + Root string + + // Mountpoint indicates the mount point relative to the process's root. + Mountpoint string + + // Opts represents mount-specific options. + Opts string + + // Optional represents optional fields. + Optional string + + // Fstype indicates the type of filesystem, such as EXT3. + Fstype string + + // Source indicates filesystem specific information or "none". + Source string + + // VfsOpts represents per super block options. + VfsOpts string +} diff --git a/libcontainer/network_linux.go b/libcontainer/network_linux.go new file mode 100644 index 0000000..938d8ce --- /dev/null +++ b/libcontainer/network_linux.go @@ -0,0 +1,103 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/types" + "github.com/vishvananda/netlink" +) + +var strategies = map[string]networkStrategy{ + "loopback": &loopback{}, +} + +// networkStrategy represents a specific network configuration for +// a container's networking stack +type networkStrategy interface { + create(*network, int) error + initialize(*network) error + detach(*configs.Network) error + attach(*configs.Network) error +} + +// getStrategy returns the specific network strategy for the +// provided type. +func getStrategy(tpe string) (networkStrategy, error) { + s, exists := strategies[tpe] + if !exists { + return nil, fmt.Errorf("unknown strategy type %q", tpe) + } + return s, nil +} + +// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. +func getNetworkInterfaceStats(interfaceName string) (*types.NetworkInterface, error) { + out := &types.NetworkInterface{Name: interfaceName} + // This can happen if the network runtime information is missing - possible if the + // container was created by an old version of libcontainer. + if interfaceName == "" { + return out, nil + } + type netStatsPair struct { + // Where to write the output. + Out *uint64 + // The network stats file to read. + File string + } + // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. + netStats := []netStatsPair{ + {Out: &out.RxBytes, File: "tx_bytes"}, + {Out: &out.RxPackets, File: "tx_packets"}, + {Out: &out.RxErrors, File: "tx_errors"}, + {Out: &out.RxDropped, File: "tx_dropped"}, + + {Out: &out.TxBytes, File: "rx_bytes"}, + {Out: &out.TxPackets, File: "rx_packets"}, + {Out: &out.TxErrors, File: "rx_errors"}, + {Out: &out.TxDropped, File: "rx_dropped"}, + } + for _, netStat := range netStats { + data, err := readSysfsNetworkStats(interfaceName, netStat.File) + if err != nil { + return nil, err + } + *(netStat.Out) = data + } + return out, nil +} + +// Reads the specified statistics available under /sys/class/net//statistics +func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { + data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile)) + if err != nil { + return 0, err + } + return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64) +} + +// loopback is a network strategy that provides a basic loopback device +type loopback struct { +} + +func (l *loopback) create(n *network, nspid int) error { + return nil +} + +func (l *loopback) initialize(config *network) error { + return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}}) +} + +func (l *loopback) attach(n *configs.Network) (err error) { + return nil +} + +func (l *loopback) detach(n *configs.Network) (err error) { + return nil +} diff --git a/libcontainer/notify_linux.go b/libcontainer/notify_linux.go new file mode 100644 index 0000000..47a0678 --- /dev/null +++ b/libcontainer/notify_linux.go @@ -0,0 +1,90 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "golang.org/x/sys/unix" +) + +const oomCgroupName = "memory" + +type PressureLevel uint + +const ( + LowPressure PressureLevel = iota + MediumPressure + CriticalPressure +) + +func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) { + evFile, err := os.Open(filepath.Join(cgDir, evName)) + if err != nil { + return nil, err + } + fd, err := unix.Eventfd(0, unix.EFD_CLOEXEC) + if err != nil { + evFile.Close() + return nil, err + } + + eventfd := os.NewFile(uintptr(fd), "eventfd") + + eventControlPath := filepath.Join(cgDir, "cgroup.event_control") + data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg) + if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil { + eventfd.Close() + evFile.Close() + return nil, err + } + ch := make(chan struct{}) + go func() { + defer func() { + eventfd.Close() + evFile.Close() + close(ch) + }() + buf := make([]byte, 8) + for { + if _, err := eventfd.Read(buf); err != nil { + return + } + // When a cgroup is destroyed, an event is sent to eventfd. + // So if the control path is gone, return instead of notifying. + if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) { + return + } + ch <- struct{}{} + } + }() + return ch, nil +} + +// notifyOnOOM returns channel on which you can expect event about OOM, +// if process died without OOM this channel will be closed. +func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) { + dir := paths[oomCgroupName] + if dir == "" { + return nil, fmt.Errorf("path %q missing", oomCgroupName) + } + + return registerMemoryEvent(dir, "memory.oom_control", "") +} + +func notifyMemoryPressure(paths map[string]string, level PressureLevel) (<-chan struct{}, error) { + dir := paths[oomCgroupName] + if dir == "" { + return nil, fmt.Errorf("path %q missing", oomCgroupName) + } + + if level > CriticalPressure { + return nil, fmt.Errorf("invalid pressure level %d", level) + } + + levelStr := []string{"low", "medium", "critical"}[level] + return registerMemoryEvent(dir, "memory.pressure_level", levelStr) +} diff --git a/libcontainer/notify_linux_test.go b/libcontainer/notify_linux_test.go new file mode 100644 index 0000000..1e15ae2 --- /dev/null +++ b/libcontainer/notify_linux_test.go @@ -0,0 +1,126 @@ +// +build linux + +package libcontainer + +import ( + "encoding/binary" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "testing" + "time" + + "golang.org/x/sys/unix" +) + +type notifyFunc func(paths map[string]string) (<-chan struct{}, error) + +func testMemoryNotification(t *testing.T, evName string, notify notifyFunc, targ string) { + memoryPath, err := ioutil.TempDir("", "testmemnotification-"+evName) + if err != nil { + t.Fatal(err) + } + evFile := filepath.Join(memoryPath, evName) + eventPath := filepath.Join(memoryPath, "cgroup.event_control") + if err := ioutil.WriteFile(evFile, []byte{}, 0700); err != nil { + t.Fatal(err) + } + if err := ioutil.WriteFile(eventPath, []byte{}, 0700); err != nil { + t.Fatal(err) + } + paths := map[string]string{ + "memory": memoryPath, + } + ch, err := notify(paths) + if err != nil { + t.Fatal("expected no error, got:", err) + } + + data, err := ioutil.ReadFile(eventPath) + if err != nil { + t.Fatal("couldn't read event control file:", err) + } + + var eventFd, evFd int + var arg string + if targ != "" { + _, err = fmt.Sscanf(string(data), "%d %d %s", &eventFd, &evFd, &arg) + } else { + _, err = fmt.Sscanf(string(data), "%d %d", &eventFd, &evFd) + } + if err != nil || arg != targ { + t.Fatalf("invalid control data %q: %s", data, err) + } + + // dup the eventfd + efd, err := unix.Dup(eventFd) + if err != nil { + t.Fatal("unable to dup eventfd:", err) + } + defer unix.Close(efd) + + buf := make([]byte, 8) + binary.LittleEndian.PutUint64(buf, 1) + + if _, err := unix.Write(efd, buf); err != nil { + t.Fatal("unable to write to eventfd:", err) + } + + select { + case <-ch: + case <-time.After(100 * time.Millisecond): + t.Fatal("no notification on channel after 100ms") + } + + // simulate what happens when a cgroup is destroyed by cleaning up and then + // writing to the eventfd. + if err := os.RemoveAll(memoryPath); err != nil { + t.Fatal(err) + } + if _, err := unix.Write(efd, buf); err != nil { + t.Fatal("unable to write to eventfd:", err) + } + + // give things a moment to shut down + select { + case _, ok := <-ch: + if ok { + t.Fatal("expected no notification to be triggered") + } + case <-time.After(100 * time.Millisecond): + t.Fatal("channel not closed after 100ms") + } + + if _, _, err := unix.Syscall(unix.SYS_FCNTL, uintptr(evFd), unix.F_GETFD, 0); err != unix.EBADF { + t.Errorf("expected event control to be closed, but received error %s", err.Error()) + } + + if _, _, err := unix.Syscall(unix.SYS_FCNTL, uintptr(eventFd), unix.F_GETFD, 0); err != unix.EBADF { + t.Errorf("expected event fd to be closed, but received error %s", err.Error()) + } +} + +func TestNotifyOnOOM(t *testing.T) { + f := func(paths map[string]string) (<-chan struct{}, error) { + return notifyOnOOM(paths) + } + + testMemoryNotification(t, "memory.oom_control", f, "") +} + +func TestNotifyMemoryPressure(t *testing.T) { + tests := map[PressureLevel]string{ + LowPressure: "low", + MediumPressure: "medium", + CriticalPressure: "critical", + } + + for level, arg := range tests { + f := func(paths map[string]string) (<-chan struct{}, error) { + return notifyMemoryPressure(paths, level) + } + + testMemoryNotification(t, "memory.pressure_level", f, arg) + } +} diff --git a/libcontainer/nsenter/README.md b/libcontainer/nsenter/README.md new file mode 100644 index 0000000..9ec6c39 --- /dev/null +++ b/libcontainer/nsenter/README.md @@ -0,0 +1,44 @@ +## nsenter + +The `nsenter` package registers a special init constructor that is called before +the Go runtime has a chance to boot. This provides us the ability to `setns` on +existing namespaces and avoid the issues that the Go runtime has with multiple +threads. This constructor will be called if this package is registered, +imported, in your go application. + +The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd/cgo/) +package. In cgo, if the import of "C" is immediately preceded by a comment, that comment, +called the preamble, is used as a header when compiling the C parts of the package. +So every time we import package `nsenter`, the C code function `nsexec()` would be +called. And package `nsenter` is only imported in `init.go`, so every time the runc +`init` command is invoked, that C code is run. + +Because `nsexec()` must be run before the Go runtime in order to use the +Linux kernel namespace, you must `import` this library into a package if +you plan to use `libcontainer` directly. Otherwise Go will not execute +the `nsexec()` constructor, which means that the re-exec will not cause +the namespaces to be joined. You can import it like this: + +```go +import _ "github.com/opencontainers/runc/libcontainer/nsenter" +``` + +`nsexec()` will first get the file descriptor number for the init pipe +from the environment variable `_LIBCONTAINER_INITPIPE` (which was opened +by the parent and kept open across the fork-exec of the `nsexec()` init +process). The init pipe is used to read bootstrap data (namespace paths, +clone flags, uid and gid mappings, and the console path) from the parent +process. `nsexec()` will then call `setns(2)` to join the namespaces +provided in the bootstrap data (if available), `clone(2)` a child process +with the provided clone flags, update the user and group ID mappings, do +some further miscellaneous setup steps, and then send the PID of the +child process to the parent of the `nsexec()` "caller". Finally, +the parent `nsexec()` will exit and the child `nsexec()` process will +return to allow the Go runtime take over. + +NOTE: We do both `setns(2)` and `clone(2)` even if we don't have any +`CLONE_NEW*` clone flags because we must fork a new process in order to +enter the PID namespace. + + + diff --git a/libcontainer/nsenter/cloned_binary.c b/libcontainer/nsenter/cloned_binary.c new file mode 100644 index 0000000..ad10f14 --- /dev/null +++ b/libcontainer/nsenter/cloned_binary.c @@ -0,0 +1,516 @@ +/* + * Copyright (C) 2019 Aleksa Sarai + * Copyright (C) 2019 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Use our own wrapper for memfd_create. */ +#if !defined(SYS_memfd_create) && defined(__NR_memfd_create) +# define SYS_memfd_create __NR_memfd_create +#endif +/* memfd_create(2) flags -- copied from . */ +#ifndef MFD_CLOEXEC +# define MFD_CLOEXEC 0x0001U +# define MFD_ALLOW_SEALING 0x0002U +#endif +int memfd_create(const char *name, unsigned int flags) +{ +#ifdef SYS_memfd_create + return syscall(SYS_memfd_create, name, flags); +#else + errno = ENOSYS; + return -1; +#endif +} + + +/* This comes directly from . */ +#ifndef F_LINUX_SPECIFIC_BASE +# define F_LINUX_SPECIFIC_BASE 1024 +#endif +#ifndef F_ADD_SEALS +# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) +#endif +#ifndef F_SEAL_SEAL +# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +# define F_SEAL_GROW 0x0004 /* prevent file from growing */ +# define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +#define CLONED_BINARY_ENV "_LIBCONTAINER_CLONED_BINARY" +#define RUNC_MEMFD_COMMENT "runc_cloned:/proc/self/exe" +#define RUNC_MEMFD_SEALS \ + (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE) + +static void *must_realloc(void *ptr, size_t size) +{ + void *old = ptr; + do { + ptr = realloc(old, size); + } while(!ptr); + return ptr; +} + +/* + * Verify whether we are currently in a self-cloned program (namely, is + * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather + * for shmem files), and we want to be sure it's actually sealed. + */ +static int is_self_cloned(void) +{ + int fd, ret, is_cloned = 0; + struct stat statbuf = {}; + struct statfs fsbuf = {}; + + fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC); + if (fd < 0) + return -ENOTRECOVERABLE; + + /* + * Is the binary a fully-sealed memfd? We don't need CLONED_BINARY_ENV for + * this, because you cannot write to a sealed memfd no matter what (so + * sharing it isn't a bad thing -- and an admin could bind-mount a sealed + * memfd to /usr/bin/runc to allow re-use). + */ + ret = fcntl(fd, F_GET_SEALS); + if (ret >= 0) { + is_cloned = (ret == RUNC_MEMFD_SEALS); + goto out; + } + + /* + * All other forms require CLONED_BINARY_ENV, since they are potentially + * writeable (or we can't tell if they're fully safe) and thus we must + * check the environment as an extra layer of defence. + */ + if (!getenv(CLONED_BINARY_ENV)) { + is_cloned = false; + goto out; + } + + /* + * Is the binary on a read-only filesystem? We can't detect bind-mounts in + * particular (in-kernel they are identical to regular mounts) but we can + * at least be sure that it's read-only. In addition, to make sure that + * it's *our* bind-mount we check CLONED_BINARY_ENV. + */ + if (fstatfs(fd, &fsbuf) >= 0) + is_cloned |= (fsbuf.f_flags & MS_RDONLY); + + /* + * Okay, we're a tmpfile -- or we're currently running on RHEL <=7.6 + * which appears to have a borked backport of F_GET_SEALS. Either way, + * having a file which has no hardlinks indicates that we aren't using + * a host-side "runc" binary and this is something that a container + * cannot fake (because unlinking requires being able to resolve the + * path that you want to unlink). + */ + if (fstat(fd, &statbuf) >= 0) + is_cloned |= (statbuf.st_nlink == 0); + +out: + close(fd); + return is_cloned; +} + +/* Read a given file into a new buffer, and providing the length. */ +static char *read_file(char *path, size_t *length) +{ + int fd; + char buf[4096], *copy = NULL; + + if (!length) + return NULL; + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) + return NULL; + + *length = 0; + for (;;) { + ssize_t n; + + n = read(fd, buf, sizeof(buf)); + if (n < 0) + goto error; + if (!n) + break; + + copy = must_realloc(copy, (*length + n) * sizeof(*copy)); + memcpy(copy + *length, buf, n); + *length += n; + } + close(fd); + return copy; + +error: + close(fd); + free(copy); + return NULL; +} + +/* + * A poor-man's version of "xargs -0". Basically parses a given block of + * NUL-delimited data, within the given length and adds a pointer to each entry + * to the array of pointers. + */ +static int parse_xargs(char *data, int data_length, char ***output) +{ + int num = 0; + char *cur = data; + + if (!data || *output != NULL) + return -1; + + while (cur < data + data_length) { + num++; + *output = must_realloc(*output, (num + 1) * sizeof(**output)); + (*output)[num - 1] = cur; + cur += strlen(cur) + 1; + } + (*output)[num] = NULL; + return num; +} + +/* + * "Parse" out argv from /proc/self/cmdline. + * This is necessary because we are running in a context where we don't have a + * main() that we can just get the arguments from. + */ +static int fetchve(char ***argv) +{ + char *cmdline = NULL; + size_t cmdline_size; + + cmdline = read_file("/proc/self/cmdline", &cmdline_size); + if (!cmdline) + goto error; + + if (parse_xargs(cmdline, cmdline_size, argv) <= 0) + goto error; + + return 0; + +error: + free(cmdline); + return -EINVAL; +} + +enum { + EFD_NONE = 0, + EFD_MEMFD, + EFD_FILE, +}; + +/* + * This comes from . We can't hard-code __O_TMPFILE because it + * changes depending on the architecture. If we don't have O_TMPFILE we always + * have the mkostemp(3) fallback. + */ +#ifndef O_TMPFILE +# if defined(__O_TMPFILE) && defined(O_DIRECTORY) +# define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) +# endif +#endif + +static int make_execfd(int *fdtype) +{ + int fd = -1; + char template[PATH_MAX] = {0}; + char *prefix = getenv("_LIBCONTAINER_STATEDIR"); + + if (!prefix || *prefix != '/') + prefix = "/tmp"; + if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0) + return -1; + + /* + * Now try memfd, it's much nicer than actually creating a file in STATEDIR + * since it's easily detected thanks to sealing and also doesn't require + * assumptions about STATEDIR. + */ + *fdtype = EFD_MEMFD; + fd = memfd_create(RUNC_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING); + if (fd >= 0) + return fd; + if (errno != ENOSYS && errno != EINVAL) + goto error; + +#ifdef O_TMPFILE + /* + * Try O_TMPFILE to avoid races where someone might snatch our file. Note + * that O_EXCL isn't actually a security measure here (since you can just + * fd re-open it and clear O_EXCL). + */ + *fdtype = EFD_FILE; + fd = open(prefix, O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0700); + if (fd >= 0) { + struct stat statbuf = {}; + bool working_otmpfile = false; + + /* + * open(2) ignores unknown O_* flags -- yeah, I was surprised when I + * found this out too. As a result we can't check for EINVAL. However, + * if we get nlink != 0 (or EISDIR) then we know that this kernel + * doesn't support O_TMPFILE. + */ + if (fstat(fd, &statbuf) >= 0) + working_otmpfile = (statbuf.st_nlink == 0); + + if (working_otmpfile) + return fd; + + /* Pretend that we got EISDIR since O_TMPFILE failed. */ + close(fd); + errno = EISDIR; + } + if (errno != EISDIR) + goto error; +#endif /* defined(O_TMPFILE) */ + + /* + * Our final option is to create a temporary file the old-school way, and + * then unlink it so that nothing else sees it by accident. + */ + *fdtype = EFD_FILE; + fd = mkostemp(template, O_CLOEXEC); + if (fd >= 0) { + if (unlink(template) >= 0) + return fd; + close(fd); + } + +error: + *fdtype = EFD_NONE; + return -1; +} + +static int seal_execfd(int *fd, int fdtype) +{ + switch (fdtype) { + case EFD_MEMFD: + return fcntl(*fd, F_ADD_SEALS, RUNC_MEMFD_SEALS); + case EFD_FILE: { + /* Need to re-open our pseudo-memfd as an O_PATH to avoid execve(2) giving -ETXTBSY. */ + int newfd; + char fdpath[PATH_MAX] = {0}; + + if (fchmod(*fd, 0100) < 0) + return -1; + + if (snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", *fd) < 0) + return -1; + + newfd = open(fdpath, O_PATH | O_CLOEXEC); + if (newfd < 0) + return -1; + + close(*fd); + *fd = newfd; + return 0; + } + default: + break; + } + return -1; +} + +static int try_bindfd(void) +{ + int fd, ret = -1; + char template[PATH_MAX] = {0}; + char *prefix = getenv("_LIBCONTAINER_STATEDIR"); + + if (!prefix || *prefix != '/') + prefix = "/tmp"; + if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0) + return ret; + + /* + * We need somewhere to mount it, mounting anything over /proc/self is a + * BAD idea on the host -- even if we do it temporarily. + */ + fd = mkstemp(template); + if (fd < 0) + return ret; + close(fd); + + /* + * For obvious reasons this won't work in rootless mode because we haven't + * created a userns+mntns -- but getting that to work will be a bit + * complicated and it's only worth doing if someone actually needs it. + */ + ret = -EPERM; + if (mount("/proc/self/exe", template, "", MS_BIND, "") < 0) + goto out; + if (mount("", template, "", MS_REMOUNT | MS_BIND | MS_RDONLY, "") < 0) + goto out_umount; + + + /* Get read-only handle that we're sure can't be made read-write. */ + ret = open(template, O_PATH | O_CLOEXEC); + +out_umount: + /* + * Make sure the MNT_DETACH works, otherwise we could get remounted + * read-write and that would be quite bad (the fd would be made read-write + * too, invalidating the protection). + */ + if (umount2(template, MNT_DETACH) < 0) { + if (ret >= 0) + close(ret); + ret = -ENOTRECOVERABLE; + } + +out: + /* + * We don't care about unlink errors, the worst that happens is that + * there's an empty file left around in STATEDIR. + */ + unlink(template); + return ret; +} + +static ssize_t fd_to_fd(int outfd, int infd) +{ + ssize_t total = 0; + char buffer[4096]; + + for (;;) { + ssize_t nread, nwritten = 0; + + nread = read(infd, buffer, sizeof(buffer)); + if (nread < 0) + return -1; + if (!nread) + break; + + do { + ssize_t n = write(outfd, buffer + nwritten, nread - nwritten); + if (n < 0) + return -1; + nwritten += n; + } while(nwritten < nread); + + total += nwritten; + } + + return total; +} + +static int clone_binary(void) +{ + int binfd, execfd; + struct stat statbuf = {}; + size_t sent = 0; + int fdtype = EFD_NONE; + + /* + * Before we resort to copying, let's try creating an ro-binfd in one shot + * by getting a handle for a read-only bind-mount of the execfd. + */ + execfd = try_bindfd(); + if (execfd >= 0) + return execfd; + + /* + * Dammit, that didn't work -- time to copy the binary to a safe place we + * can seal the contents. + */ + execfd = make_execfd(&fdtype); + if (execfd < 0 || fdtype == EFD_NONE) + return -ENOTRECOVERABLE; + + binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC); + if (binfd < 0) + goto error; + + if (fstat(binfd, &statbuf) < 0) + goto error_binfd; + + while (sent < statbuf.st_size) { + int n = sendfile(execfd, binfd, NULL, statbuf.st_size - sent); + if (n < 0) { + /* sendfile can fail so we fallback to a dumb user-space copy. */ + n = fd_to_fd(execfd, binfd); + if (n < 0) + goto error_binfd; + } + sent += n; + } + close(binfd); + if (sent != statbuf.st_size) + goto error; + + if (seal_execfd(&execfd, fdtype) < 0) + goto error; + + return execfd; + +error_binfd: + close(binfd); +error: + close(execfd); + return -EIO; +} + +/* Get cheap access to the environment. */ +extern char **environ; + +int ensure_cloned_binary(void) +{ + int execfd; + char **argv = NULL; + + /* Check that we're not self-cloned, and if we are then bail. */ + int cloned = is_self_cloned(); + if (cloned > 0 || cloned == -ENOTRECOVERABLE) + return cloned; + + if (fetchve(&argv) < 0) + return -EINVAL; + + execfd = clone_binary(); + if (execfd < 0) + return -EIO; + + if (putenv(CLONED_BINARY_ENV "=1")) + goto error; + + fexecve(execfd, argv, environ); +error: + close(execfd); + return -ENOEXEC; +} diff --git a/libcontainer/nsenter/namespace.h b/libcontainer/nsenter/namespace.h new file mode 100644 index 0000000..9e9bdca --- /dev/null +++ b/libcontainer/nsenter/namespace.h @@ -0,0 +1,32 @@ +#ifndef NSENTER_NAMESPACE_H +#define NSENTER_NAMESPACE_H + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif +#include + +/* All of these are taken from include/uapi/linux/sched.h */ +#ifndef CLONE_NEWNS +# define CLONE_NEWNS 0x00020000 /* New mount namespace group */ +#endif +#ifndef CLONE_NEWCGROUP +# define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */ +#endif +#ifndef CLONE_NEWUTS +# define CLONE_NEWUTS 0x04000000 /* New utsname namespace */ +#endif +#ifndef CLONE_NEWIPC +# define CLONE_NEWIPC 0x08000000 /* New ipc namespace */ +#endif +#ifndef CLONE_NEWUSER +# define CLONE_NEWUSER 0x10000000 /* New user namespace */ +#endif +#ifndef CLONE_NEWPID +# define CLONE_NEWPID 0x20000000 /* New pid namespace */ +#endif +#ifndef CLONE_NEWNET +# define CLONE_NEWNET 0x40000000 /* New network namespace */ +#endif + +#endif /* NSENTER_NAMESPACE_H */ diff --git a/libcontainer/nsenter/nsenter.go b/libcontainer/nsenter/nsenter.go new file mode 100644 index 0000000..07f4d63 --- /dev/null +++ b/libcontainer/nsenter/nsenter.go @@ -0,0 +1,12 @@ +// +build linux,!gccgo + +package nsenter + +/* +#cgo CFLAGS: -Wall +extern void nsexec(); +void __attribute__((constructor)) init(void) { + nsexec(); +} +*/ +import "C" diff --git a/libcontainer/nsenter/nsenter_gccgo.go b/libcontainer/nsenter/nsenter_gccgo.go new file mode 100644 index 0000000..63c7a3e --- /dev/null +++ b/libcontainer/nsenter/nsenter_gccgo.go @@ -0,0 +1,25 @@ +// +build linux,gccgo + +package nsenter + +/* +#cgo CFLAGS: -Wall +extern void nsexec(); +void __attribute__((constructor)) init(void) { + nsexec(); +} +*/ +import "C" + +// AlwaysFalse is here to stay false +// (and be exported so the compiler doesn't optimize out its reference) +var AlwaysFalse bool + +func init() { + if AlwaysFalse { + // by referencing this C init() in a noop test, it will ensure the compiler + // links in the C function. + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134 + C.init() + } +} diff --git a/libcontainer/nsenter/nsenter_test.go b/libcontainer/nsenter/nsenter_test.go new file mode 100644 index 0000000..c4d3c86 --- /dev/null +++ b/libcontainer/nsenter/nsenter_test.go @@ -0,0 +1,239 @@ +package nsenter + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "strings" + "testing" + + "github.com/opencontainers/runc/libcontainer" + "github.com/vishvananda/netlink/nl" + + "golang.org/x/sys/unix" +) + +type pid struct { + Pid int `json:"Pid"` +} + +type logentry struct { + Msg string `json:"msg"` + Level string `json:"level"` +} + +func TestNsenterValidPaths(t *testing.T) { + args := []string{"nsenter-exec"} + parent, child, err := newPipe() + if err != nil { + t.Fatalf("failed to create pipe %v", err) + } + + namespaces := []string{ + // join pid ns of the current process + fmt.Sprintf("pid:/proc/%d/ns/pid", os.Getpid()), + } + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + ExtraFiles: []*os.File{child}, + Env: []string{"_LIBCONTAINER_INITPIPE=3"}, + Stdout: os.Stdout, + Stderr: os.Stderr, + } + + if err := cmd.Start(); err != nil { + t.Fatalf("nsenter failed to start %v", err) + } + // write cloneFlags + r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0) + r.AddData(&libcontainer.Int32msg{ + Type: libcontainer.CloneFlagsAttr, + Value: uint32(unix.CLONE_NEWNET), + }) + r.AddData(&libcontainer.Bytemsg{ + Type: libcontainer.NsPathsAttr, + Value: []byte(strings.Join(namespaces, ",")), + }) + if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil { + t.Fatal(err) + } + + decoder := json.NewDecoder(parent) + var pid *pid + + if err := cmd.Wait(); err != nil { + t.Fatalf("nsenter exits with a non-zero exit status") + } + if err := decoder.Decode(&pid); err != nil { + dir, _ := ioutil.ReadDir(fmt.Sprintf("/proc/%d/ns", os.Getpid())) + for _, d := range dir { + t.Log(d.Name()) + } + t.Fatalf("%v", err) + } + + p, err := os.FindProcess(pid.Pid) + if err != nil { + t.Fatalf("%v", err) + } + p.Wait() +} + +func TestNsenterInvalidPaths(t *testing.T) { + args := []string{"nsenter-exec"} + parent, child, err := newPipe() + if err != nil { + t.Fatalf("failed to create pipe %v", err) + } + + namespaces := []string{ + // join pid ns of the current process + fmt.Sprintf("pid:/proc/%d/ns/pid", -1), + } + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + ExtraFiles: []*os.File{child}, + Env: []string{"_LIBCONTAINER_INITPIPE=3"}, + } + + if err := cmd.Start(); err != nil { + t.Fatal(err) + } + // write cloneFlags + r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0) + r.AddData(&libcontainer.Int32msg{ + Type: libcontainer.CloneFlagsAttr, + Value: uint32(unix.CLONE_NEWNET), + }) + r.AddData(&libcontainer.Bytemsg{ + Type: libcontainer.NsPathsAttr, + Value: []byte(strings.Join(namespaces, ",")), + }) + if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil { + t.Fatal(err) + } + + if err := cmd.Wait(); err == nil { + t.Fatalf("nsenter exits with a zero exit status") + } +} + +func TestNsenterIncorrectPathType(t *testing.T) { + args := []string{"nsenter-exec"} + parent, child, err := newPipe() + if err != nil { + t.Fatalf("failed to create pipe %v", err) + } + + namespaces := []string{ + // join pid ns of the current process + fmt.Sprintf("net:/proc/%d/ns/pid", os.Getpid()), + } + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + ExtraFiles: []*os.File{child}, + Env: []string{"_LIBCONTAINER_INITPIPE=3"}, + } + + if err := cmd.Start(); err != nil { + t.Fatal(err) + } + // write cloneFlags + r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0) + r.AddData(&libcontainer.Int32msg{ + Type: libcontainer.CloneFlagsAttr, + Value: uint32(unix.CLONE_NEWNET), + }) + r.AddData(&libcontainer.Bytemsg{ + Type: libcontainer.NsPathsAttr, + Value: []byte(strings.Join(namespaces, ",")), + }) + if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil { + t.Fatal(err) + } + + if err := cmd.Wait(); err == nil { + t.Fatalf("nsenter exits with a zero exit status") + } +} + +func TestNsenterChildLogging(t *testing.T) { + args := []string{"nsenter-exec"} + parent, child, err := newPipe() + if err != nil { + t.Fatalf("failed to create exec pipe %v", err) + } + logread, logwrite, err := os.Pipe() + if err != nil { + t.Fatalf("failed to create log pipe %v", err) + } + defer logread.Close() + defer logwrite.Close() + + namespaces := []string{ + // join pid ns of the current process + fmt.Sprintf("pid:/proc/%d/ns/pid", os.Getpid()), + } + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + ExtraFiles: []*os.File{child, logwrite}, + Env: []string{"_LIBCONTAINER_INITPIPE=3", "_LIBCONTAINER_LOGPIPE=4"}, + Stdout: os.Stdout, + Stderr: os.Stderr, + } + + if err := cmd.Start(); err != nil { + t.Fatalf("nsenter failed to start %v", err) + } + // write cloneFlags + r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0) + r.AddData(&libcontainer.Int32msg{ + Type: libcontainer.CloneFlagsAttr, + Value: uint32(unix.CLONE_NEWNET), + }) + r.AddData(&libcontainer.Bytemsg{ + Type: libcontainer.NsPathsAttr, + Value: []byte(strings.Join(namespaces, ",")), + }) + if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil { + t.Fatal(err) + } + + logsDecoder := json.NewDecoder(logread) + var logentry *logentry + + err = logsDecoder.Decode(&logentry) + if err != nil { + t.Fatalf("child log: %v", err) + } + if logentry.Level == "" || logentry.Msg == "" { + t.Fatalf("child log: empty log fileds: level=\"%s\" msg=\"%s\"", logentry.Level, logentry.Msg) + } + + if err := cmd.Wait(); err != nil { + t.Fatalf("nsenter exits with a non-zero exit status") + } +} + +func init() { + if strings.HasPrefix(os.Args[0], "nsenter-") { + os.Exit(0) + } + return +} + +func newPipe() (parent *os.File, child *os.File, err error) { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil +} diff --git a/libcontainer/nsenter/nsenter_unsupported.go b/libcontainer/nsenter/nsenter_unsupported.go new file mode 100644 index 0000000..2459c63 --- /dev/null +++ b/libcontainer/nsenter/nsenter_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux !cgo + +package nsenter diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c new file mode 100644 index 0000000..0726568 --- /dev/null +++ b/libcontainer/nsenter/nsexec.c @@ -0,0 +1,1032 @@ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Get all of the CLONE_NEW* flags. */ +#include "namespace.h" + +/* Synchronisation values. */ +enum sync_t { + SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */ + SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */ + SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */ + SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */ + SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */ + SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */ +}; + +/* + * Synchronisation value for cgroup namespace setup. + * The same constant is defined in process_linux.go as "createCgroupns". + */ +#define CREATECGROUPNS 0x80 + +/* longjmp() arguments. */ +#define JUMP_PARENT 0x00 +#define JUMP_CHILD 0xA0 +#define JUMP_INIT 0xA1 + +/* Assume the stack grows down, so arguments should be above it. */ +struct clone_t { + /* + * Reserve some space for clone() to locate arguments + * and retcode in this place + */ + char stack[4096] __attribute__ ((aligned(16))); + char stack_ptr[0]; + + /* There's two children. This is used to execute the different code. */ + jmp_buf *env; + int jmpval; +}; + +struct nlconfig_t { + char *data; + + /* Process settings. */ + uint32_t cloneflags; + char *oom_score_adj; + size_t oom_score_adj_len; + + /* User namespace settings. */ + char *uidmap; + size_t uidmap_len; + char *gidmap; + size_t gidmap_len; + char *namespaces; + size_t namespaces_len; + uint8_t is_setgroup; + + /* Rootless container settings. */ + uint8_t is_rootless_euid; /* boolean */ + char *uidmappath; + size_t uidmappath_len; + char *gidmappath; + size_t gidmappath_len; +}; + +#define PANIC "panic" +#define FATAL "fatal" +#define ERROR "error" +#define WARNING "warning" +#define INFO "info" +#define DEBUG "debug" + +static int logfd = -1; + +/* + * List of netlink message types sent to us as part of bootstrapping the init. + * These constants are defined in libcontainer/message_linux.go. + */ +#define INIT_MSG 62000 +#define CLONE_FLAGS_ATTR 27281 +#define NS_PATHS_ATTR 27282 +#define UIDMAP_ATTR 27283 +#define GIDMAP_ATTR 27284 +#define SETGROUP_ATTR 27285 +#define OOM_SCORE_ADJ_ATTR 27286 +#define ROOTLESS_EUID_ATTR 27287 +#define UIDMAPPATH_ATTR 27288 +#define GIDMAPPATH_ATTR 27289 + +/* + * Use the raw syscall for versions of glibc which don't include a function for + * it, namely (glibc 2.12). + */ +#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 +# define _GNU_SOURCE +# include "syscall.h" +# if !defined(SYS_setns) && defined(__NR_setns) +# define SYS_setns __NR_setns +# endif + +#ifndef SYS_setns +# error "setns(2) syscall not supported by glibc version" +#endif + +int setns(int fd, int nstype) +{ + return syscall(SYS_setns, fd, nstype); +} +#endif + +static void write_log_with_info(const char *level, const char *function, int line, const char *format, ...) +{ + char message[1024] = {}; + + va_list args; + + if (logfd < 0 || level == NULL) + return; + + va_start(args, format); + if (vsnprintf(message, sizeof(message), format, args) < 0) + goto done; + + dprintf(logfd, "{\"level\":\"%s\", \"msg\": \"%s:%d %s\"}\n", level, function, line, message); +done: + va_end(args); +} + +#define write_log(level, fmt, ...) \ + write_log_with_info((level), __FUNCTION__, __LINE__, (fmt), ##__VA_ARGS__) + +/* XXX: This is ugly. */ +static int syncfd = -1; + +#define bail(fmt, ...) \ + do { \ + write_log(FATAL, "nsenter: " fmt ": %m", ##__VA_ARGS__); \ + exit(1); \ + } while(0) + +static int write_file(char *data, size_t data_len, char *pathfmt, ...) +{ + int fd, len, ret = 0; + char path[PATH_MAX]; + + va_list ap; + va_start(ap, pathfmt); + len = vsnprintf(path, PATH_MAX, pathfmt, ap); + va_end(ap); + if (len < 0) + return -1; + + fd = open(path, O_RDWR); + if (fd < 0) { + return -1; + } + + len = write(fd, data, data_len); + if (len != data_len) { + ret = -1; + goto out; + } + + out: + close(fd); + return ret; +} + +enum policy_t { + SETGROUPS_DEFAULT = 0, + SETGROUPS_ALLOW, + SETGROUPS_DENY, +}; + +/* This *must* be called before we touch gid_map. */ +static void update_setgroups(int pid, enum policy_t setgroup) +{ + char *policy; + + switch (setgroup) { + case SETGROUPS_ALLOW: + policy = "allow"; + break; + case SETGROUPS_DENY: + policy = "deny"; + break; + case SETGROUPS_DEFAULT: + default: + /* Nothing to do. */ + return; + } + + if (write_file(policy, strlen(policy), "/proc/%d/setgroups", pid) < 0) { + /* + * If the kernel is too old to support /proc/pid/setgroups, + * open(2) or write(2) will return ENOENT. This is fine. + */ + if (errno != ENOENT) + bail("failed to write '%s' to /proc/%d/setgroups", policy, pid); + } +} + +static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len) +{ + int child; + + /* + * If @app is NULL, execve will segfault. Just check it here and bail (if + * we're in this path, the caller is already getting desperate and there + * isn't a backup to this failing). This usually would be a configuration + * or programming issue. + */ + if (!app) + bail("mapping tool not present"); + + child = fork(); + if (child < 0) + bail("failed to fork"); + + if (!child) { +#define MAX_ARGV 20 + char *argv[MAX_ARGV]; + char *envp[] = { NULL }; + char pid_fmt[16]; + int argc = 0; + char *next; + + snprintf(pid_fmt, 16, "%d", pid); + + argv[argc++] = (char *)app; + argv[argc++] = pid_fmt; + /* + * Convert the map string into a list of argument that + * newuidmap/newgidmap can understand. + */ + + while (argc < MAX_ARGV) { + if (*map == '\0') { + argv[argc++] = NULL; + break; + } + argv[argc++] = map; + next = strpbrk(map, "\n "); + if (next == NULL) + break; + *next++ = '\0'; + map = next + strspn(next, "\n "); + } + + execve(app, argv, envp); + bail("failed to execv"); + } else { + int status; + + while (true) { + if (waitpid(child, &status, 0) < 0) { + if (errno == EINTR) + continue; + bail("failed to waitpid"); + } + if (WIFEXITED(status) || WIFSIGNALED(status)) + return WEXITSTATUS(status); + } + } + + return -1; +} + +static void update_uidmap(const char *path, int pid, char *map, size_t map_len) +{ + if (map == NULL || map_len <= 0) + return; + + if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) { + if (errno != EPERM) + bail("failed to update /proc/%d/uid_map", pid); + if (try_mapping_tool(path, pid, map, map_len)) + bail("failed to use newuid map on %d", pid); + } +} + +static void update_gidmap(const char *path, int pid, char *map, size_t map_len) +{ + if (map == NULL || map_len <= 0) + return; + + if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) { + if (errno != EPERM) + bail("failed to update /proc/%d/gid_map", pid); + if (try_mapping_tool(path, pid, map, map_len)) + bail("failed to use newgid map on %d", pid); + } +} + +static void update_oom_score_adj(char *data, size_t len) +{ + if (data == NULL || len <= 0) + return; + + if (write_file(data, len, "/proc/self/oom_score_adj") < 0) + bail("failed to update /proc/self/oom_score_adj"); +} + +/* A dummy function that just jumps to the given jumpval. */ +static int child_func(void *arg) __attribute__ ((noinline)); +static int child_func(void *arg) +{ + struct clone_t *ca = (struct clone_t *)arg; + longjmp(*ca->env, ca->jmpval); +} + +static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline)); +static int clone_parent(jmp_buf *env, int jmpval) +{ + struct clone_t ca = { + .env = env, + .jmpval = jmpval, + }; + + return clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca); +} + +/* + * Gets the init pipe fd from the environment, which is used to read the + * bootstrap data and tell the parent what the new pid is after we finish + * setting up the environment. + */ +static int initpipe(void) +{ + int pipenum; + char *initpipe, *endptr; + + initpipe = getenv("_LIBCONTAINER_INITPIPE"); + if (initpipe == NULL || *initpipe == '\0') + return -1; + + pipenum = strtol(initpipe, &endptr, 10); + if (*endptr != '\0') + bail("unable to parse _LIBCONTAINER_INITPIPE"); + + return pipenum; +} + +static void setup_logpipe(void) +{ + char *logpipe, *endptr; + + logpipe = getenv("_LIBCONTAINER_LOGPIPE"); + if (logpipe == NULL || *logpipe == '\0') { + return; + } + + logfd = strtol(logpipe, &endptr, 10); + if (logpipe == endptr || *endptr != '\0') { + fprintf(stderr, "unable to parse _LIBCONTAINER_LOGPIPE, value: %s\n", logpipe); + /* It is too early to use bail */ + exit(1); + } +} + +/* Returns the clone(2) flag for a namespace, given the name of a namespace. */ +static int nsflag(char *name) +{ + if (!strcmp(name, "cgroup")) + return CLONE_NEWCGROUP; + else if (!strcmp(name, "ipc")) + return CLONE_NEWIPC; + else if (!strcmp(name, "mnt")) + return CLONE_NEWNS; + else if (!strcmp(name, "net")) + return CLONE_NEWNET; + else if (!strcmp(name, "pid")) + return CLONE_NEWPID; + else if (!strcmp(name, "user")) + return CLONE_NEWUSER; + else if (!strcmp(name, "uts")) + return CLONE_NEWUTS; + + /* If we don't recognise a name, fallback to 0. */ + return 0; +} + +static uint32_t readint32(char *buf) +{ + return *(uint32_t *) buf; +} + +static uint8_t readint8(char *buf) +{ + return *(uint8_t *) buf; +} + +static void nl_parse(int fd, struct nlconfig_t *config) +{ + size_t len, size; + struct nlmsghdr hdr; + char *data, *current; + + /* Retrieve the netlink header. */ + len = read(fd, &hdr, NLMSG_HDRLEN); + if (len != NLMSG_HDRLEN) + bail("invalid netlink header length %zu", len); + + if (hdr.nlmsg_type == NLMSG_ERROR) + bail("failed to read netlink message"); + + if (hdr.nlmsg_type != INIT_MSG) + bail("unexpected msg type %d", hdr.nlmsg_type); + + /* Retrieve data. */ + size = NLMSG_PAYLOAD(&hdr, 0); + current = data = malloc(size); + if (!data) + bail("failed to allocate %zu bytes of memory for nl_payload", size); + + len = read(fd, data, size); + if (len != size) + bail("failed to read netlink payload, %zu != %zu", len, size); + + /* Parse the netlink payload. */ + config->data = data; + while (current < data + size) { + struct nlattr *nlattr = (struct nlattr *)current; + size_t payload_len = nlattr->nla_len - NLA_HDRLEN; + + /* Advance to payload. */ + current += NLA_HDRLEN; + + /* Handle payload. */ + switch (nlattr->nla_type) { + case CLONE_FLAGS_ATTR: + config->cloneflags = readint32(current); + break; + case ROOTLESS_EUID_ATTR: + config->is_rootless_euid = readint8(current); /* boolean */ + break; + case OOM_SCORE_ADJ_ATTR: + config->oom_score_adj = current; + config->oom_score_adj_len = payload_len; + break; + case NS_PATHS_ATTR: + config->namespaces = current; + config->namespaces_len = payload_len; + break; + case UIDMAP_ATTR: + config->uidmap = current; + config->uidmap_len = payload_len; + break; + case GIDMAP_ATTR: + config->gidmap = current; + config->gidmap_len = payload_len; + break; + case UIDMAPPATH_ATTR: + config->uidmappath = current; + config->uidmappath_len = payload_len; + break; + case GIDMAPPATH_ATTR: + config->gidmappath = current; + config->gidmappath_len = payload_len; + break; + case SETGROUP_ATTR: + config->is_setgroup = readint8(current); + break; + default: + bail("unknown netlink message type %d", nlattr->nla_type); + } + + current += NLA_ALIGN(payload_len); + } +} + +void nl_free(struct nlconfig_t *config) +{ + free(config->data); +} + +void join_namespaces(char *nslist) +{ + int num = 0, i; + char *saveptr = NULL; + char *namespace = strtok_r(nslist, ",", &saveptr); + struct namespace_t { + int fd; + int ns; + char type[PATH_MAX]; + char path[PATH_MAX]; + } *namespaces = NULL; + + if (!namespace || !strlen(namespace) || !strlen(nslist)) + bail("ns paths are empty"); + + /* + * We have to open the file descriptors first, since after + * we join the mnt namespace we might no longer be able to + * access the paths. + */ + do { + int fd; + char *path; + struct namespace_t *ns; + + /* Resize the namespace array. */ + namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t)); + if (!namespaces) + bail("failed to reallocate namespace array"); + ns = &namespaces[num - 1]; + + /* Split 'ns:path'. */ + path = strstr(namespace, ":"); + if (!path) + bail("failed to parse %s", namespace); + *path++ = '\0'; + + fd = open(path, O_RDONLY); + if (fd < 0) + bail("failed to open %s", path); + + ns->fd = fd; + ns->ns = nsflag(namespace); + strncpy(ns->path, path, PATH_MAX - 1); + ns->path[PATH_MAX - 1] = '\0'; + } while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL); + + /* + * The ordering in which we join namespaces is important. We should + * always join the user namespace *first*. This is all guaranteed + * from the container_linux.go side of this, so we're just going to + * follow the order given to us. + */ + + for (i = 0; i < num; i++) { + struct namespace_t ns = namespaces[i]; + + if (setns(ns.fd, ns.ns) < 0) + bail("failed to setns to %s", ns.path); + + close(ns.fd); + } + + free(namespaces); +} + +/* Defined in cloned_binary.c. */ +extern int ensure_cloned_binary(void); + +void nsexec(void) +{ + int pipenum; + jmp_buf env; + int sync_child_pipe[2], sync_grandchild_pipe[2]; + struct nlconfig_t config = { 0 }; + + /* + * Setup a pipe to send logs to the parent. This should happen + * first, because bail will use that pipe. + */ + setup_logpipe(); + + /* + * If we don't have an init pipe, just return to the go routine. + * We'll only get an init pipe for start or exec. + */ + pipenum = initpipe(); + if (pipenum == -1) + return; + + /* + * We need to re-exec if we are not in a cloned binary. This is necessary + * to ensure that containers won't be able to access the host binary + * through /proc/self/exe. See CVE-2019-5736. + */ + if (ensure_cloned_binary() < 0) + bail("could not ensure we are a cloned binary"); + + write_log(DEBUG, "nsexec started"); + + /* Parse all of the netlink configuration. */ + nl_parse(pipenum, &config); + + /* Set oom_score_adj. This has to be done before !dumpable because + * /proc/self/oom_score_adj is not writeable unless you're an privileged + * user (if !dumpable is set). All children inherit their parent's + * oom_score_adj value on fork(2) so this will always be propagated + * properly. + */ + update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len); + + /* + * Make the process non-dumpable, to avoid various race conditions that + * could cause processes in namespaces we're joining to access host + * resources (or potentially execute code). + * + * However, if the number of namespaces we are joining is 0, we are not + * going to be switching to a different security context. Thus setting + * ourselves to be non-dumpable only breaks things (like rootless + * containers), which is the recommendation from the kernel folks. + */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) + bail("failed to set process as non-dumpable"); + } + + /* Pipe so we can tell the child when we've finished setting up. */ + if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0) + bail("failed to setup sync pipe between parent and child"); + + /* + * We need a new socketpair to sync with grandchild so we don't have + * race condition with child. + */ + if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0) + bail("failed to setup sync pipe between parent and grandchild"); + + /* TODO: Currently we aren't dealing with child deaths properly. */ + + /* + * Okay, so this is quite annoying. + * + * In order for this unsharing code to be more extensible we need to split + * up unshare(CLONE_NEWUSER) and clone() in various ways. The ideal case + * would be if we did clone(CLONE_NEWUSER) and the other namespaces + * separately, but because of SELinux issues we cannot really do that. But + * we cannot just dump the namespace flags into clone(...) because several + * usecases (such as rootless containers) require more granularity around + * the namespace setup. In addition, some older kernels had issues where + * CLONE_NEWUSER wasn't handled before other namespaces (but we cannot + * handle this while also dealing with SELinux so we choose SELinux support + * over broken kernel support). + * + * However, if we unshare(2) the user namespace *before* we clone(2), then + * all hell breaks loose. + * + * The parent no longer has permissions to do many things (unshare(2) drops + * all capabilities in your old namespace), and the container cannot be set + * up to have more than one {uid,gid} mapping. This is obviously less than + * ideal. In order to fix this, we have to first clone(2) and then unshare. + * + * Unfortunately, it's not as simple as that. We have to fork to enter the + * PID namespace (the PID namespace only applies to children). Since we'll + * have to double-fork, this clone_parent() call won't be able to get the + * PID of the _actual_ init process (without doing more synchronisation than + * I can deal with at the moment). So we'll just get the parent to send it + * for us, the only job of this process is to update + * /proc/pid/{setgroups,uid_map,gid_map}. + * + * And as a result of the above, we also need to setns(2) in the first child + * because if we join a PID namespace in the topmost parent then our child + * will be in that namespace (and it will not be able to give us a PID value + * that makes sense without resorting to sending things with cmsg). + * + * This also deals with an older issue caused by dumping cloneflags into + * clone(2): On old kernels, CLONE_PARENT didn't work with CLONE_NEWPID, so + * we have to unshare(2) before clone(2) in order to do this. This was fixed + * in upstream commit 1f7f4dde5c945f41a7abc2285be43d918029ecc5, and was + * introduced by 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e. As far as we're + * aware, the last mainline kernel which had this bug was Linux 3.12. + * However, we cannot comment on which kernels the broken patch was + * backported to. + * + * -- Aleksa "what has my life come to?" Sarai + */ + + switch (setjmp(env)) { + /* + * Stage 0: We're in the parent. Our job is just to create a new child + * (stage 1: JUMP_CHILD) process and write its uid_map and + * gid_map. That process will go on to create a new process, then + * it will send us its PID which we will send to the bootstrap + * process. + */ + case JUMP_PARENT:{ + int len; + pid_t child, first_child = -1; + bool ready = false; + + /* For debugging. */ + prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0); + + /* Start the process of getting a container. */ + child = clone_parent(&env, JUMP_CHILD); + if (child < 0) + bail("unable to fork: child_func"); + + /* + * State machine for synchronisation with the children. + * + * Father only return when both child and grandchild are + * ready, so we can receive all possible error codes + * generated by children. + */ + while (!ready) { + enum sync_t s; + + syncfd = sync_child_pipe[1]; + close(sync_child_pipe[0]); + + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with child: next state"); + + switch (s) { + case SYNC_USERMAP_PLS: + /* + * Enable setgroups(2) if we've been asked to. But we also + * have to explicitly disable setgroups(2) if we're + * creating a rootless container for single-entry mapping. + * i.e. config.is_setgroup == false. + * (this is required since Linux 3.19). + * + * For rootless multi-entry mapping, config.is_setgroup shall be true and + * newuidmap/newgidmap shall be used. + */ + + if (config.is_rootless_euid && !config.is_setgroup) + update_setgroups(child, SETGROUPS_DENY); + + /* Set up mappings. */ + update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len); + update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len); + + s = SYNC_USERMAP_ACK; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with child: write(SYNC_USERMAP_ACK)"); + } + break; + case SYNC_RECVPID_PLS:{ + first_child = child; + + /* Get the init_func pid. */ + if (read(syncfd, &child, sizeof(child)) != sizeof(child)) { + kill(first_child, SIGKILL); + bail("failed to sync with child: read(childpid)"); + } + + /* Send ACK. */ + s = SYNC_RECVPID_ACK; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(first_child, SIGKILL); + kill(child, SIGKILL); + bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); + } + + /* Send the init_func pid back to our parent. + * + * Send the init_func pid and the pid of the first child back to our parent. + * We need to send both back because we can't reap the first child we created (CLONE_PARENT). + * It becomes the responsibility of our parent to reap the first child. + */ + len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child); + if (len < 0) { + kill(child, SIGKILL); + bail("unable to generate JSON for child pid"); + } + } + break; + case SYNC_CHILD_READY: + ready = true; + break; + default: + bail("unexpected sync value: %u", s); + } + } + + /* Now sync with grandchild. */ + + ready = false; + while (!ready) { + enum sync_t s; + + syncfd = sync_grandchild_pipe[1]; + close(sync_grandchild_pipe[0]); + + s = SYNC_GRANDCHILD; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with child: write(SYNC_GRANDCHILD)"); + } + + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with child: next state"); + + switch (s) { + case SYNC_CHILD_READY: + ready = true; + break; + default: + bail("unexpected sync value: %u", s); + } + } + exit(0); + } + + /* + * Stage 1: We're in the first child process. Our job is to join any + * provided namespaces in the netlink payload and unshare all + * of the requested namespaces. If we've been asked to + * CLONE_NEWUSER, we will ask our parent (stage 0) to set up + * our user mappings for us. Then, we create a new child + * (stage 2: JUMP_INIT) for PID namespace. We then send the + * child's PID to our parent (stage 0). + */ + case JUMP_CHILD:{ + pid_t child; + enum sync_t s; + + /* We're in a child and thus need to tell the parent if we die. */ + syncfd = sync_child_pipe[0]; + close(sync_child_pipe[1]); + + /* For debugging. */ + prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0); + + /* + * We need to setns first. We cannot do this earlier (in stage 0) + * because of the fact that we forked to get here (the PID of + * [stage 2: JUMP_INIT]) would be meaningless). We could send it + * using cmsg(3) but that's just annoying. + */ + if (config.namespaces) + join_namespaces(config.namespaces); + + /* + * Deal with user namespaces first. They are quite special, as they + * affect our ability to unshare other namespaces and are used as + * context for privilege checks. + * + * We don't unshare all namespaces in one go. The reason for this + * is that, while the kernel documentation may claim otherwise, + * there are certain cases where unsharing all namespaces at once + * will result in namespace objects being owned incorrectly. + * Ideally we should just fix these kernel bugs, but it's better to + * be safe than sorry, and fix them separately. + * + * A specific case of this is that the SELinux label of the + * internal kern-mount that mqueue uses will be incorrect if the + * UTS namespace is cloned before the USER namespace is mapped. + * I've also heard of similar problems with the network namespace + * in some scenarios. This also mirrors how LXC deals with this + * problem. + */ + if (config.cloneflags & CLONE_NEWUSER) { + if (unshare(CLONE_NEWUSER) < 0) + bail("failed to unshare user namespace"); + config.cloneflags &= ~CLONE_NEWUSER; + + /* + * We don't have the privileges to do any mapping here (see the + * clone_parent rant). So signal our parent to hook us up. + */ + + /* Switching is only necessary if we joined namespaces. */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0) + bail("failed to set process as dumpable"); + } + s = SYNC_USERMAP_PLS; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with parent: write(SYNC_USERMAP_PLS)"); + + /* ... wait for mapping ... */ + + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with parent: read(SYNC_USERMAP_ACK)"); + if (s != SYNC_USERMAP_ACK) + bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); + /* Switching is only necessary if we joined namespaces. */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) + bail("failed to set process as dumpable"); + } + + /* Become root in the namespace proper. */ + if (setresuid(0, 0, 0) < 0) + bail("failed to become root in user namespace"); + } + /* + * Unshare all of the namespaces. Now, it should be noted that this + * ordering might break in the future (especially with rootless + * containers). But for now, it's not possible to split this into + * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues. + * + * Note that we don't merge this with clone() because there were + * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) + * was broken, so we'll just do it the long way anyway. + */ + if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0) + bail("failed to unshare namespaces"); + + /* + * TODO: What about non-namespace clone flags that we're dropping here? + * + * We fork again because of PID namespace, setns(2) or unshare(2) don't + * change the PID namespace of the calling process, because doing so + * would change the caller's idea of its own PID (as reported by getpid()), + * which would break many applications and libraries, so we must fork + * to actually enter the new PID namespace. + */ + child = clone_parent(&env, JUMP_INIT); + if (child < 0) + bail("unable to fork: init_func"); + + /* Send the child to our parent, which knows what it's doing. */ + s = SYNC_RECVPID_PLS; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with parent: write(SYNC_RECVPID_PLS)"); + } + if (write(syncfd, &child, sizeof(child)) != sizeof(child)) { + kill(child, SIGKILL); + bail("failed to sync with parent: write(childpid)"); + } + + /* ... wait for parent to get the pid ... */ + + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with parent: read(SYNC_RECVPID_ACK)"); + } + if (s != SYNC_RECVPID_ACK) { + kill(child, SIGKILL); + bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s); + } + + s = SYNC_CHILD_READY; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with parent: write(SYNC_CHILD_READY)"); + } + + /* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */ + exit(0); + } + + /* + * Stage 2: We're the final child process, and the only process that will + * actually return to the Go runtime. Our job is to just do the + * final cleanup steps and then return to the Go runtime to allow + * init_linux.go to run. + */ + case JUMP_INIT:{ + /* + * We're inside the child now, having jumped from the + * start_child() code after forking in the parent. + */ + enum sync_t s; + + /* We're in a child and thus need to tell the parent if we die. */ + syncfd = sync_grandchild_pipe[0]; + close(sync_grandchild_pipe[1]); + close(sync_child_pipe[0]); + close(sync_child_pipe[1]); + + /* For debugging. */ + prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0); + + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with parent: read(SYNC_GRANDCHILD)"); + if (s != SYNC_GRANDCHILD) + bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s); + + if (setsid() < 0) + bail("setsid failed"); + + if (setuid(0) < 0) + bail("setuid failed"); + + if (setgid(0) < 0) + bail("setgid failed"); + + if (!config.is_rootless_euid && config.is_setgroup) { + if (setgroups(0, NULL) < 0) + bail("setgroups failed"); + } + + /* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */ + if (config.cloneflags & CLONE_NEWCGROUP) { + uint8_t value; + if (read(pipenum, &value, sizeof(value)) != sizeof(value)) + bail("read synchronisation value failed"); + if (value == CREATECGROUPNS) { + if (unshare(CLONE_NEWCGROUP) < 0) + bail("failed to unshare cgroup namespace"); + } else + bail("received unknown synchronisation value"); + } + + s = SYNC_CHILD_READY; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with patent: write(SYNC_CHILD_READY)"); + + /* Close sync pipes. */ + close(sync_grandchild_pipe[0]); + + /* Free netlink data. */ + nl_free(&config); + + /* Finish executing, let the Go runtime take over. */ + return; + } + default: + bail("unexpected jump value"); + } + + /* Should never be reached. */ + bail("should never be reached"); +} diff --git a/libcontainer/process.go b/libcontainer/process.go new file mode 100644 index 0000000..d3e472a --- /dev/null +++ b/libcontainer/process.go @@ -0,0 +1,115 @@ +package libcontainer + +import ( + "fmt" + "io" + "math" + "os" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type processOperations interface { + wait() (*os.ProcessState, error) + signal(sig os.Signal) error + pid() int +} + +// Process specifies the configuration and IO for a process inside +// a container. +type Process struct { + // The command to be run followed by any arguments. + Args []string + + // Env specifies the environment variables for the process. + Env []string + + // User will set the uid and gid of the executing process running inside the container + // local to the container's user and group configuration. + User string + + // AdditionalGroups specifies the gids that should be added to supplementary groups + // in addition to those that the user belongs to. + AdditionalGroups []string + + // Cwd will change the processes current working directory inside the container's rootfs. + Cwd string + + // Stdin is a pointer to a reader which provides the standard input stream. + Stdin io.Reader + + // Stdout is a pointer to a writer which receives the standard output stream. + Stdout io.Writer + + // Stderr is a pointer to a writer which receives the standard error stream. + Stderr io.Writer + + // ExtraFiles specifies additional open files to be inherited by the container + ExtraFiles []*os.File + + // Initial sizings for the console + ConsoleWidth uint16 + ConsoleHeight uint16 + + // Capabilities specify the capabilities to keep when executing the process inside the container + // All capabilities not specified will be dropped from the processes capability mask + Capabilities *configs.Capabilities + + // AppArmorProfile specifies the profile to apply to the process and is + // changed at the time the process is execed + AppArmorProfile string + + // Label specifies the label to apply to the process. It is commonly used by selinux + Label string + + // NoNewPrivileges controls whether processes can gain additional privileges. + NoNewPrivileges *bool + + // Rlimits specifies the resource limits, such as max open files, to set in the container + // If Rlimits are not set, the container will inherit rlimits from the parent process + Rlimits []configs.Rlimit + + // ConsoleSocket provides the masterfd console. + ConsoleSocket *os.File + + // Init specifies whether the process is the first process in the container. + Init bool + + ops processOperations + + LogLevel string +} + +// Wait waits for the process to exit. +// Wait releases any resources associated with the Process +func (p Process) Wait() (*os.ProcessState, error) { + if p.ops == nil { + return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps) + } + return p.ops.wait() +} + +// Pid returns the process ID +func (p Process) Pid() (int, error) { + // math.MinInt32 is returned here, because it's invalid value + // for the kill() system call. + if p.ops == nil { + return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps) + } + return p.ops.pid(), nil +} + +// Signal sends a signal to the Process. +func (p Process) Signal(sig os.Signal) error { + if p.ops == nil { + return newGenericError(fmt.Errorf("invalid process"), NoProcessOps) + } + return p.ops.signal(sig) +} + +// IO holds the process's STDIO +type IO struct { + Stdin io.WriteCloser + Stdout io.ReadCloser + Stderr io.ReadCloser +} diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go new file mode 100644 index 0000000..de989b5 --- /dev/null +++ b/libcontainer/process_linux.go @@ -0,0 +1,598 @@ +// +build linux + +package libcontainer + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strconv" + "syscall" // only for Signal + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/libcontainer/logs" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/utils" + + "golang.org/x/sys/unix" +) + +// Synchronisation value for cgroup namespace setup. +// The same constant is defined in nsexec.c as "CREATECGROUPNS". +const createCgroupns = 0x80 + +type parentProcess interface { + // pid returns the pid for the running process. + pid() int + + // start starts the process execution. + start() error + + // send a SIGKILL to the process and wait for the exit. + terminate() error + + // wait waits on the process returning the process state. + wait() (*os.ProcessState, error) + + // startTime returns the process start time. + startTime() (uint64, error) + + signal(os.Signal) error + + externalDescriptors() []string + + setExternalDescriptors(fds []string) + + forwardChildLogs() +} + +type filePair struct { + parent *os.File + child *os.File +} + +type setnsProcess struct { + cmd *exec.Cmd + messageSockPair filePair + logFilePair filePair + cgroupPaths map[string]string + rootlessCgroups bool + intelRdtPath string + config *initConfig + fds []string + process *Process + bootstrapData io.Reader +} + +func (p *setnsProcess) startTime() (uint64, error) { + stat, err := system.Stat(p.pid()) + return stat.StartTime, err +} + +func (p *setnsProcess) signal(sig os.Signal) error { + s, ok := sig.(syscall.Signal) + if !ok { + return errors.New("os: unsupported signal type") + } + return unix.Kill(p.pid(), s) +} + +func (p *setnsProcess) start() (err error) { + defer p.messageSockPair.parent.Close() + err = p.cmd.Start() + // close the write-side of the pipes (controlled by child) + p.messageSockPair.child.Close() + p.logFilePair.child.Close() + if err != nil { + return newSystemErrorWithCause(err, "starting setns process") + } + if p.bootstrapData != nil { + if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil { + return newSystemErrorWithCause(err, "copying bootstrap data to pipe") + } + } + if err = p.execSetns(); err != nil { + return newSystemErrorWithCause(err, "executing setns process") + } + if len(p.cgroupPaths) > 0 { + if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil && !p.rootlessCgroups { + return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) + } + } + if p.intelRdtPath != "" { + // if Intel RDT "resource control" filesystem path exists + _, err := os.Stat(p.intelRdtPath) + if err == nil { + if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil { + return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid()) + } + } + } + // set rlimits, this has to be done here because we lose permissions + // to raise the limits once we enter a user-namespace + if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { + return newSystemErrorWithCause(err, "setting rlimits for process") + } + if err := utils.WriteJSON(p.messageSockPair.parent, p.config); err != nil { + return newSystemErrorWithCause(err, "writing config to pipe") + } + + ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error { + switch sync.Type { + case procReady: + // This shouldn't happen. + panic("unexpected procReady in setns") + case procHooks: + // This shouldn't happen. + panic("unexpected procHooks in setns") + default: + return newSystemError(fmt.Errorf("invalid JSON payload from child")) + } + }) + + if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil { + return newSystemErrorWithCause(err, "calling shutdown on init pipe") + } + // Must be done after Shutdown so the child will exit and we can wait for it. + if ierr != nil { + p.wait() + return ierr + } + return nil +} + +// execSetns runs the process that executes C code to perform the setns calls +// because setns support requires the C process to fork off a child and perform the setns +// before the go runtime boots, we wait on the process to die and receive the child's pid +// over the provided pipe. +func (p *setnsProcess) execSetns() error { + status, err := p.cmd.Process.Wait() + if err != nil { + p.cmd.Wait() + return newSystemErrorWithCause(err, "waiting on setns process to finish") + } + if !status.Success() { + p.cmd.Wait() + return newSystemError(&exec.ExitError{ProcessState: status}) + } + var pid *pid + if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil { + p.cmd.Wait() + return newSystemErrorWithCause(err, "reading pid from init pipe") + } + + // Clean up the zombie parent process + // On Unix systems FindProcess always succeeds. + firstChildProcess, _ := os.FindProcess(pid.PidFirstChild) + + // Ignore the error in case the child has already been reaped for any reason + _, _ = firstChildProcess.Wait() + + process, err := os.FindProcess(pid.Pid) + if err != nil { + return err + } + p.cmd.Process = process + p.process.ops = p + return nil +} + +// terminate sends a SIGKILL to the forked process for the setns routine then waits to +// avoid the process becoming a zombie. +func (p *setnsProcess) terminate() error { + if p.cmd.Process == nil { + return nil + } + err := p.cmd.Process.Kill() + if _, werr := p.wait(); err == nil { + err = werr + } + return err +} + +func (p *setnsProcess) wait() (*os.ProcessState, error) { + err := p.cmd.Wait() + + // Return actual ProcessState even on Wait error + return p.cmd.ProcessState, err +} + +func (p *setnsProcess) pid() int { + return p.cmd.Process.Pid +} + +func (p *setnsProcess) externalDescriptors() []string { + return p.fds +} + +func (p *setnsProcess) setExternalDescriptors(newFds []string) { + p.fds = newFds +} + +func (p *setnsProcess) forwardChildLogs() { + go logs.ForwardLogs(p.logFilePair.parent) +} + +type initProcess struct { + cmd *exec.Cmd + messageSockPair filePair + logFilePair filePair + config *initConfig + manager cgroups.Manager + intelRdtManager intelrdt.Manager + container *linuxContainer + fds []string + process *Process + bootstrapData io.Reader + sharePidns bool +} + +func (p *initProcess) pid() int { + return p.cmd.Process.Pid +} + +func (p *initProcess) externalDescriptors() []string { + return p.fds +} + +// getChildPid receives the final child's pid over the provided pipe. +func (p *initProcess) getChildPid() (int, error) { + var pid pid + if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil { + p.cmd.Wait() + return -1, err + } + + // Clean up the zombie parent process + // On Unix systems FindProcess always succeeds. + firstChildProcess, _ := os.FindProcess(pid.PidFirstChild) + + // Ignore the error in case the child has already been reaped for any reason + _, _ = firstChildProcess.Wait() + + return pid.Pid, nil +} + +func (p *initProcess) waitForChildExit(childPid int) error { + status, err := p.cmd.Process.Wait() + if err != nil { + p.cmd.Wait() + return err + } + if !status.Success() { + p.cmd.Wait() + return &exec.ExitError{ProcessState: status} + } + + process, err := os.FindProcess(childPid) + if err != nil { + return err + } + p.cmd.Process = process + p.process.ops = p + return nil +} + +func (p *initProcess) start() error { + defer p.messageSockPair.parent.Close() + err := p.cmd.Start() + p.process.ops = p + // close the write-side of the pipes (controlled by child) + p.messageSockPair.child.Close() + p.logFilePair.child.Close() + if err != nil { + p.process.ops = nil + return newSystemErrorWithCause(err, "starting init process command") + } + // Do this before syncing with child so that no children can escape the + // cgroup. We don't need to worry about not doing this and not being root + // because we'd be using the rootless cgroup manager in that case. + if err := p.manager.Apply(p.pid()); err != nil { + return newSystemErrorWithCause(err, "applying cgroup configuration for process") + } + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Apply(p.pid()); err != nil { + return newSystemErrorWithCause(err, "applying Intel RDT configuration for process") + } + } + defer func() { + if err != nil { + // TODO: should not be the responsibility to call here + p.manager.Destroy() + if p.intelRdtManager != nil { + p.intelRdtManager.Destroy() + } + } + }() + + if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil { + return newSystemErrorWithCause(err, "copying bootstrap data to pipe") + } + childPid, err := p.getChildPid() + if err != nil { + return newSystemErrorWithCause(err, "getting the final child's pid from pipe") + } + + // Save the standard descriptor names before the container process + // can potentially move them (e.g., via dup2()). If we don't do this now, + // we won't know at checkpoint time which file descriptor to look up. + fds, err := getPipeFds(childPid) + if err != nil { + return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", childPid) + } + p.setExternalDescriptors(fds) + // Do this before syncing with child so that no children + // can escape the cgroup + if err := p.manager.Apply(childPid); err != nil { + return newSystemErrorWithCause(err, "applying cgroup configuration for process") + } + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Apply(childPid); err != nil { + return newSystemErrorWithCause(err, "applying Intel RDT configuration for process") + } + } + // Now it's time to setup cgroup namesapce + if p.config.Config.Namespaces.Contains(configs.NEWCGROUP) && p.config.Config.Namespaces.PathOf(configs.NEWCGROUP) == "" { + if _, err := p.messageSockPair.parent.Write([]byte{createCgroupns}); err != nil { + return newSystemErrorWithCause(err, "sending synchronization value to init process") + } + } + + // Wait for our first child to exit + if err := p.waitForChildExit(childPid); err != nil { + return newSystemErrorWithCause(err, "waiting for our first child to exit") + } + + defer func() { + if err != nil { + // TODO: should not be the responsibility to call here + p.manager.Destroy() + if p.intelRdtManager != nil { + p.intelRdtManager.Destroy() + } + } + }() + if err := p.createNetworkInterfaces(); err != nil { + return newSystemErrorWithCause(err, "creating network interfaces") + } + if err := p.sendConfig(); err != nil { + return newSystemErrorWithCause(err, "sending config to init process") + } + var ( + sentRun bool + sentResume bool + ) + + ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error { + switch sync.Type { + case procReady: + // set rlimits, this has to be done here because we lose permissions + // to raise the limits once we enter a user-namespace + if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { + return newSystemErrorWithCause(err, "setting rlimits for ready process") + } + // call prestart hooks + if !p.config.Config.Namespaces.Contains(configs.NEWNS) { + // Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions. + if err := p.manager.Set(p.config.Config); err != nil { + return newSystemErrorWithCause(err, "setting cgroup config for ready process") + } + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Set(p.config.Config); err != nil { + return newSystemErrorWithCause(err, "setting Intel RDT config for ready process") + } + } + + if p.config.Config.Hooks != nil { + s, err := p.container.currentOCIState() + if err != nil { + return err + } + // initProcessStartTime hasn't been set yet. + s.Pid = p.cmd.Process.Pid + s.Status = "creating" + for i, hook := range p.config.Config.Hooks.Prestart { + if err := hook.Run(s); err != nil { + return newSystemErrorWithCausef(err, "running prestart hook %d", i) + } + } + } + } + // Sync with child. + if err := writeSync(p.messageSockPair.parent, procRun); err != nil { + return newSystemErrorWithCause(err, "writing syncT 'run'") + } + sentRun = true + case procHooks: + // Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions. + if err := p.manager.Set(p.config.Config); err != nil { + return newSystemErrorWithCause(err, "setting cgroup config for procHooks process") + } + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Set(p.config.Config); err != nil { + return newSystemErrorWithCause(err, "setting Intel RDT config for procHooks process") + } + } + if p.config.Config.Hooks != nil { + s, err := p.container.currentOCIState() + if err != nil { + return err + } + // initProcessStartTime hasn't been set yet. + s.Pid = p.cmd.Process.Pid + s.Status = "creating" + for i, hook := range p.config.Config.Hooks.Prestart { + if err := hook.Run(s); err != nil { + return newSystemErrorWithCausef(err, "running prestart hook %d", i) + } + } + } + // Sync with child. + if err := writeSync(p.messageSockPair.parent, procResume); err != nil { + return newSystemErrorWithCause(err, "writing syncT 'resume'") + } + sentResume = true + default: + return newSystemError(fmt.Errorf("invalid JSON payload from child")) + } + + return nil + }) + + if !sentRun { + return newSystemErrorWithCause(ierr, "container init") + } + if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume { + return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process")) + } + if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil { + return newSystemErrorWithCause(err, "shutting down init pipe") + } + + // Must be done after Shutdown so the child will exit and we can wait for it. + if ierr != nil { + p.wait() + return ierr + } + return nil +} + +func (p *initProcess) wait() (*os.ProcessState, error) { + err := p.cmd.Wait() + if err != nil { + return p.cmd.ProcessState, err + } + // we should kill all processes in cgroup when init is died if we use host PID namespace + if p.sharePidns { + signalAllProcesses(p.manager, unix.SIGKILL) + } + return p.cmd.ProcessState, nil +} + +func (p *initProcess) terminate() error { + if p.cmd.Process == nil { + return nil + } + err := p.cmd.Process.Kill() + if _, werr := p.wait(); err == nil { + err = werr + } + return err +} + +func (p *initProcess) startTime() (uint64, error) { + stat, err := system.Stat(p.pid()) + return stat.StartTime, err +} + +func (p *initProcess) sendConfig() error { + // send the config to the container's init process, we don't use JSON Encode + // here because there might be a problem in JSON decoder in some cases, see: + // https://github.com/docker/docker/issues/14203#issuecomment-174177790 + return utils.WriteJSON(p.messageSockPair.parent, p.config) +} + +func (p *initProcess) createNetworkInterfaces() error { + for _, config := range p.config.Config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + n := &network{ + Network: *config, + } + if err := strategy.create(n, p.pid()); err != nil { + return err + } + p.config.Networks = append(p.config.Networks, n) + } + return nil +} + +func (p *initProcess) signal(sig os.Signal) error { + s, ok := sig.(syscall.Signal) + if !ok { + return errors.New("os: unsupported signal type") + } + return unix.Kill(p.pid(), s) +} + +func (p *initProcess) setExternalDescriptors(newFds []string) { + p.fds = newFds +} + +func (p *initProcess) forwardChildLogs() { + go logs.ForwardLogs(p.logFilePair.parent) +} + +func getPipeFds(pid int) ([]string, error) { + fds := make([]string, 3) + + dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd") + for i := 0; i < 3; i++ { + // XXX: This breaks if the path is not a valid symlink (which can + // happen in certain particularly unlucky mount namespace setups). + f := filepath.Join(dirPath, strconv.Itoa(i)) + target, err := os.Readlink(f) + if err != nil { + // Ignore permission errors, for rootless containers and other + // non-dumpable processes. if we can't get the fd for a particular + // file, there's not much we can do. + if os.IsPermission(err) { + continue + } + return fds, err + } + fds[i] = target + } + return fds, nil +} + +// InitializeIO creates pipes for use with the process's stdio and returns the +// opposite side for each. Do not use this if you want to have a pseudoterminal +// set up for you by libcontainer (TODO: fix that too). +// TODO: This is mostly unnecessary, and should be handled by clients. +func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) { + var fds []uintptr + i = &IO{} + // cleanup in case of an error + defer func() { + if err != nil { + for _, fd := range fds { + unix.Close(int(fd)) + } + } + }() + // STDIN + r, w, err := os.Pipe() + if err != nil { + return nil, err + } + fds = append(fds, r.Fd(), w.Fd()) + p.Stdin, i.Stdin = r, w + // STDOUT + if r, w, err = os.Pipe(); err != nil { + return nil, err + } + fds = append(fds, r.Fd(), w.Fd()) + p.Stdout, i.Stdout = w, r + // STDERR + if r, w, err = os.Pipe(); err != nil { + return nil, err + } + fds = append(fds, r.Fd(), w.Fd()) + p.Stderr, i.Stderr = w, r + // change ownership of the pipes in case we are in a user namespace + for _, fd := range fds { + if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil { + return nil, err + } + } + return i, nil +} diff --git a/libcontainer/restored_process.go b/libcontainer/restored_process.go new file mode 100644 index 0000000..28d52ad --- /dev/null +++ b/libcontainer/restored_process.go @@ -0,0 +1,128 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "os" + + "github.com/opencontainers/runc/libcontainer/system" +) + +func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) { + var ( + err error + ) + proc, err := os.FindProcess(pid) + if err != nil { + return nil, err + } + stat, err := system.Stat(pid) + if err != nil { + return nil, err + } + return &restoredProcess{ + proc: proc, + processStartTime: stat.StartTime, + fds: fds, + }, nil +} + +type restoredProcess struct { + proc *os.Process + processStartTime uint64 + fds []string +} + +func (p *restoredProcess) start() error { + return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) +} + +func (p *restoredProcess) pid() int { + return p.proc.Pid +} + +func (p *restoredProcess) terminate() error { + err := p.proc.Kill() + if _, werr := p.wait(); err == nil { + err = werr + } + return err +} + +func (p *restoredProcess) wait() (*os.ProcessState, error) { + // TODO: how do we wait on the actual process? + // maybe use --exec-cmd in criu + st, err := p.proc.Wait() + if err != nil { + return nil, err + } + return st, nil +} + +func (p *restoredProcess) startTime() (uint64, error) { + return p.processStartTime, nil +} + +func (p *restoredProcess) signal(s os.Signal) error { + return p.proc.Signal(s) +} + +func (p *restoredProcess) externalDescriptors() []string { + return p.fds +} + +func (p *restoredProcess) setExternalDescriptors(newFds []string) { + p.fds = newFds +} + +func (p *restoredProcess) forwardChildLogs() { +} + +// nonChildProcess represents a process where the calling process is not +// the parent process. This process is created when a factory loads a container from +// a persisted state. +type nonChildProcess struct { + processPid int + processStartTime uint64 + fds []string +} + +func (p *nonChildProcess) start() error { + return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) +} + +func (p *nonChildProcess) pid() int { + return p.processPid +} + +func (p *nonChildProcess) terminate() error { + return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError) +} + +func (p *nonChildProcess) wait() (*os.ProcessState, error) { + return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError) +} + +func (p *nonChildProcess) startTime() (uint64, error) { + return p.processStartTime, nil +} + +func (p *nonChildProcess) signal(s os.Signal) error { + proc, err := os.FindProcess(p.processPid) + if err != nil { + return err + } + return proc.Signal(s) +} + +func (p *nonChildProcess) externalDescriptors() []string { + return p.fds +} + +func (p *nonChildProcess) setExternalDescriptors(newFds []string) { + p.fds = newFds +} + +func (p *nonChildProcess) forwardChildLogs() { +} diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go new file mode 100644 index 0000000..106c4c2 --- /dev/null +++ b/libcontainer/rootfs_linux.go @@ -0,0 +1,1009 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path" + "path/filepath" + "strings" + "time" + + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/mrunalp/fileutils" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/mount" + "github.com/opencontainers/runc/libcontainer/system" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" + "github.com/opencontainers/selinux/go-selinux/label" + + "golang.org/x/sys/unix" +) + +const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV + +// needsSetupDev returns true if /dev needs to be set up. +func needsSetupDev(config *configs.Config) bool { + for _, m := range config.Mounts { + if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" { + return false + } + } + return true +} + +// prepareRootfs sets up the devices, mount points, and filesystems for use +// inside a new mount namespace. It doesn't set anything as ro. You must call +// finalizeRootfs after this function to finish setting up the rootfs. +func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) { + config := iConfig.Config + if err := prepareRoot(config); err != nil { + return newSystemErrorWithCause(err, "preparing rootfs") + } + + hasCgroupns := config.Namespaces.Contains(configs.NEWCGROUP) + setupDev := needsSetupDev(config) + for _, m := range config.Mounts { + for _, precmd := range m.PremountCmds { + if err := mountCmd(precmd); err != nil { + return newSystemErrorWithCause(err, "running premount command") + } + } + if err := mountToRootfs(m, config.Rootfs, config.MountLabel, hasCgroupns); err != nil { + return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination) + } + + for _, postcmd := range m.PostmountCmds { + if err := mountCmd(postcmd); err != nil { + return newSystemErrorWithCause(err, "running postmount command") + } + } + } + + if setupDev { + if err := createDevices(config); err != nil { + return newSystemErrorWithCause(err, "creating device nodes") + } + if err := setupPtmx(config); err != nil { + return newSystemErrorWithCause(err, "setting up ptmx") + } + if err := setupDevSymlinks(config.Rootfs); err != nil { + return newSystemErrorWithCause(err, "setting up /dev symlinks") + } + } + + // Signal the parent to run the pre-start hooks. + // The hooks are run after the mounts are setup, but before we switch to the new + // root, so that the old root is still available in the hooks for any mount + // manipulations. + // Note that iConfig.Cwd is not guaranteed to exist here. + if err := syncParentHooks(pipe); err != nil { + return err + } + + // The reason these operations are done here rather than in finalizeRootfs + // is because the console-handling code gets quite sticky if we have to set + // up the console before doing the pivot_root(2). This is because the + // Console API has to also work with the ExecIn case, which means that the + // API must be able to deal with being inside as well as outside the + // container. It's just cleaner to do this here (at the expense of the + // operation not being perfectly split). + + if err := unix.Chdir(config.Rootfs); err != nil { + return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs) + } + + if config.NoPivotRoot { + err = msMoveRoot(config.Rootfs) + } else if config.Namespaces.Contains(configs.NEWNS) { + err = pivotRoot(config.Rootfs) + } else { + err = chroot(config.Rootfs) + } + if err != nil { + return newSystemErrorWithCause(err, "jailing process inside rootfs") + } + + if setupDev { + if err := reOpenDevNull(); err != nil { + return newSystemErrorWithCause(err, "reopening /dev/null inside container") + } + } + + if cwd := iConfig.Cwd; cwd != "" { + // Note that spec.Process.Cwd can contain unclean value like "../../../../foo/bar...". + // However, we are safe to call MkDirAll directly because we are in the jail here. + if err := os.MkdirAll(cwd, 0755); err != nil { + return err + } + } + + return nil +} + +// finalizeRootfs sets anything to ro if necessary. You must call +// prepareRootfs first. +func finalizeRootfs(config *configs.Config) (err error) { + // remount dev as ro if specified + for _, m := range config.Mounts { + if libcontainerUtils.CleanPath(m.Destination) == "/dev" { + if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY { + if err := remountReadonly(m); err != nil { + return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination) + } + } + break + } + } + + // set rootfs ( / ) as readonly + if config.Readonlyfs { + if err := setReadonly(); err != nil { + return newSystemErrorWithCause(err, "setting rootfs as readonly") + } + } + + unix.Umask(0022) + return nil +} + +// /tmp has to be mounted as private to allow MS_MOVE to work in all situations +func prepareTmp(topTmpDir string) (string, error) { + tmpdir, err := ioutil.TempDir(topTmpDir, "runctop") + if err != nil { + return "", err + } + if err := unix.Mount(tmpdir, tmpdir, "bind", unix.MS_BIND, ""); err != nil { + return "", err + } + if err := unix.Mount("", tmpdir, "", uintptr(unix.MS_PRIVATE), ""); err != nil { + return "", err + } + return tmpdir, nil +} + +func cleanupTmp(tmpdir string) error { + unix.Unmount(tmpdir, 0) + return os.RemoveAll(tmpdir) +} + +func mountCmd(cmd configs.Command) error { + command := exec.Command(cmd.Path, cmd.Args[:]...) + command.Env = cmd.Env + command.Dir = cmd.Dir + if out, err := command.CombinedOutput(); err != nil { + return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err) + } + return nil +} + +func prepareBindMount(m *configs.Mount, rootfs string) error { + stat, err := os.Stat(m.Source) + if err != nil { + // error out if the source of a bind mount does not exist as we will be + // unable to bind anything to it. + return err + } + // ensure that the destination of the bind mount is resolved of symlinks at mount time because + // any previous mounts can invalidate the next mount's destination. + // this can happen when a user specifies mounts within other mounts to cause breakouts or other + // evil stuff to try to escape the container's rootfs. + var dest string + if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil { + return err + } + if err := checkProcMount(rootfs, dest, m.Source); err != nil { + return err + } + // update the mount with the correct dest after symlinks are resolved. + m.Destination = dest + if err := createIfNotExists(dest, stat.IsDir()); err != nil { + return err + } + + return nil +} + +func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error { + binds, err := getCgroupMounts(m) + if err != nil { + return err + } + var merged []string + for _, b := range binds { + ss := filepath.Base(b.Destination) + if strings.Contains(ss, ",") { + merged = append(merged, ss) + } + } + tmpfs := &configs.Mount{ + Source: "tmpfs", + Device: "tmpfs", + Destination: m.Destination, + Flags: defaultMountFlags, + Data: "mode=755", + PropagationFlags: m.PropagationFlags, + } + if err := mountToRootfs(tmpfs, rootfs, mountLabel, enableCgroupns); err != nil { + return err + } + for _, b := range binds { + if enableCgroupns { + subsystemPath := filepath.Join(rootfs, b.Destination) + if err := os.MkdirAll(subsystemPath, 0755); err != nil { + return err + } + flags := defaultMountFlags + if m.Flags&unix.MS_RDONLY != 0 { + flags = flags | unix.MS_RDONLY + } + cgroupmount := &configs.Mount{ + Source: "cgroup", + Device: "cgroup", + Destination: subsystemPath, + Flags: flags, + Data: filepath.Base(subsystemPath), + } + if err := mountNewCgroup(cgroupmount); err != nil { + return err + } + } else { + if err := mountToRootfs(b, rootfs, mountLabel, enableCgroupns); err != nil { + return err + } + } + } + for _, mc := range merged { + for _, ss := range strings.Split(mc, ",") { + // symlink(2) is very dumb, it will just shove the path into + // the link and doesn't do any checks or relative path + // conversion. Also, don't error out if the cgroup already exists. + if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) { + return err + } + } + } + return nil +} + +func mountCgroupV2(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error { + cgroupPath, err := securejoin.SecureJoin(rootfs, m.Destination) + if err != nil { + return err + } + if err := os.MkdirAll(cgroupPath, 0755); err != nil { + return err + } + if err := unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), m.Data); err != nil { + // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158) + if err == unix.EPERM || err == unix.EBUSY { + return unix.Mount("/sys/fs/cgroup", cgroupPath, "", uintptr(m.Flags)|unix.MS_BIND, "") + } + return err + } + return nil +} + +func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error { + var ( + dest = m.Destination + ) + if !strings.HasPrefix(dest, rootfs) { + dest = filepath.Join(rootfs, dest) + } + + switch m.Device { + case "proc", "sysfs": + // If the destination already exists and is not a directory, we bail + // out This is to avoid mounting through a symlink or similar -- which + // has been a "fun" attack scenario in the past. + // TODO: This won't be necessary once we switch to libpathrs and we can + // stop all of these symlink-exchange attacks. + if fi, err := os.Lstat(dest); err != nil { + if !os.IsNotExist(err) { + return err + } + } else if fi.Mode()&os.ModeDir == 0 { + return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device) + } + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + // Selinux kernels do not support labeling of /proc or /sys + return mountPropagate(m, rootfs, "") + case "mqueue": + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + if err := mountPropagate(m, rootfs, mountLabel); err != nil { + // older kernels do not support labeling of /dev/mqueue + if err := mountPropagate(m, rootfs, ""); err != nil { + return err + } + return label.SetFileLabel(dest, mountLabel) + } + return nil + case "tmpfs": + copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP + tmpDir := "" + stat, err := os.Stat(dest) + if err != nil { + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + } + if copyUp { + tmpdir, err := prepareTmp("/tmp") + if err != nil { + return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir") + } + defer cleanupTmp(tmpdir) + tmpDir, err = ioutil.TempDir(tmpdir, "runctmpdir") + if err != nil { + return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir") + } + defer os.RemoveAll(tmpDir) + m.Destination = tmpDir + } + if err := mountPropagate(m, rootfs, mountLabel); err != nil { + return err + } + if copyUp { + if err := fileutils.CopyDirectory(dest, tmpDir); err != nil { + errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err) + if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil { + return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg) + } + return errMsg + } + if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil { + errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err) + if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil { + return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg) + } + return errMsg + } + } + if stat != nil { + if err = os.Chmod(dest, stat.Mode()); err != nil { + return err + } + } + return nil + case "bind": + if err := prepareBindMount(m, rootfs); err != nil { + return err + } + if err := mountPropagate(m, rootfs, mountLabel); err != nil { + return err + } + // bind mount won't change mount options, we need remount to make mount options effective. + // first check that we have non-default options required before attempting a remount + if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 { + // only remount if unique mount options are set + if err := remount(m, rootfs); err != nil { + return err + } + } + + if m.Relabel != "" { + if err := label.Validate(m.Relabel); err != nil { + return err + } + shared := label.IsShared(m.Relabel) + if err := label.Relabel(m.Source, mountLabel, shared); err != nil { + return err + } + } + case "cgroup": + if cgroups.IsCgroup2UnifiedMode() { + if err := mountCgroupV2(m, rootfs, mountLabel, enableCgroupns); err != nil { + return err + } + } else { + + if err := mountCgroupV1(m, rootfs, mountLabel, enableCgroupns); err != nil { + return err + } + } + if m.Flags&unix.MS_RDONLY != 0 { + // remount cgroup root as readonly + mcgrouproot := &configs.Mount{ + Source: m.Destination, + Device: "bind", + Destination: m.Destination, + Flags: defaultMountFlags | unix.MS_RDONLY | unix.MS_BIND, + } + if err := remount(mcgrouproot, rootfs); err != nil { + return err + } + } + default: + // ensure that the destination of the mount is resolved of symlinks at mount time because + // any previous mounts can invalidate the next mount's destination. + // this can happen when a user specifies mounts within other mounts to cause breakouts or other + // evil stuff to try to escape the container's rootfs. + var err error + if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil { + return err + } + if err := checkProcMount(rootfs, dest, m.Source); err != nil { + return err + } + // update the mount with the correct dest after symlinks are resolved. + m.Destination = dest + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + return mountPropagate(m, rootfs, mountLabel) + } + return nil +} + +func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) { + mounts, err := cgroups.GetCgroupMounts(false) + if err != nil { + return nil, err + } + + cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return nil, err + } + + var binds []*configs.Mount + + for _, mm := range mounts { + dir, err := mm.GetOwnCgroup(cgroupPaths) + if err != nil { + return nil, err + } + relDir, err := filepath.Rel(mm.Root, dir) + if err != nil { + return nil, err + } + binds = append(binds, &configs.Mount{ + Device: "bind", + Source: filepath.Join(mm.Mountpoint, relDir), + Destination: filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)), + Flags: unix.MS_BIND | unix.MS_REC | m.Flags, + PropagationFlags: m.PropagationFlags, + }) + } + + return binds, nil +} + +// checkProcMount checks to ensure that the mount destination is not over the top of /proc. +// dest is required to be an abs path and have any symlinks resolved before calling this function. +// +// if source is nil, don't stat the filesystem. This is used for restore of a checkpoint. +func checkProcMount(rootfs, dest, source string) error { + const procPath = "/proc" + // White list, it should be sub directories of invalid destinations + validDestinations := []string{ + // These entries can be bind mounted by files emulated by fuse, + // so commands like top, free displays stats in container. + "/proc/cpuinfo", + "/proc/diskstats", + "/proc/meminfo", + "/proc/stat", + "/proc/swaps", + "/proc/uptime", + "/proc/loadavg", + "/proc/net/dev", + } + for _, valid := range validDestinations { + path, err := filepath.Rel(filepath.Join(rootfs, valid), dest) + if err != nil { + return err + } + if path == "." { + return nil + } + } + path, err := filepath.Rel(filepath.Join(rootfs, procPath), dest) + if err != nil { + return err + } + // pass if the mount path is located outside of /proc + if strings.HasPrefix(path, "..") { + return nil + } + if path == "." { + // an empty source is pasted on restore + if source == "" { + return nil + } + // only allow a mount on-top of proc if it's source is "proc" + isproc, err := isProc(source) + if err != nil { + return err + } + // pass if the mount is happening on top of /proc and the source of + // the mount is a proc filesystem + if isproc { + return nil + } + return fmt.Errorf("%q cannot be mounted because it is not of type proc", dest) + } + return fmt.Errorf("%q cannot be mounted because it is inside /proc", dest) +} + +func isProc(path string) (bool, error) { + var s unix.Statfs_t + if err := unix.Statfs(path, &s); err != nil { + return false, err + } + return s.Type == unix.PROC_SUPER_MAGIC, nil +} + +func setupDevSymlinks(rootfs string) error { + var links = [][2]string{ + {"/proc/self/fd", "/dev/fd"}, + {"/proc/self/fd/0", "/dev/stdin"}, + {"/proc/self/fd/1", "/dev/stdout"}, + {"/proc/self/fd/2", "/dev/stderr"}, + } + // kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink + // in /dev if it exists in /proc. + if _, err := os.Stat("/proc/kcore"); err == nil { + links = append(links, [2]string{"/proc/kcore", "/dev/core"}) + } + for _, link := range links { + var ( + src = link[0] + dst = filepath.Join(rootfs, link[1]) + ) + if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) { + return fmt.Errorf("symlink %s %s %s", src, dst, err) + } + } + return nil +} + +// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs +// this method will make them point to `/dev/null` in this container's rootfs. This +// needs to be called after we chroot/pivot into the container's rootfs so that any +// symlinks are resolved locally. +func reOpenDevNull() error { + var stat, devNullStat unix.Stat_t + file, err := os.OpenFile("/dev/null", os.O_RDWR, 0) + if err != nil { + return fmt.Errorf("Failed to open /dev/null - %s", err) + } + defer file.Close() + if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil { + return err + } + for fd := 0; fd < 3; fd++ { + if err := unix.Fstat(fd, &stat); err != nil { + return err + } + if stat.Rdev == devNullStat.Rdev { + // Close and re-open the fd. + if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil { + return err + } + } + } + return nil +} + +// Create the device nodes in the container. +func createDevices(config *configs.Config) error { + useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER) + oldMask := unix.Umask(0000) + for _, node := range config.Devices { + // containers running in a user namespace are not allowed to mknod + // devices so we can just bind mount it from the host. + if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil { + unix.Umask(oldMask) + return err + } + } + unix.Umask(oldMask) + return nil +} + +func bindMountDeviceNode(dest string, node *configs.Device) error { + f, err := os.Create(dest) + if err != nil && !os.IsExist(err) { + return err + } + if f != nil { + f.Close() + } + return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "") +} + +// Creates the device node in the rootfs of the container. +func createDeviceNode(rootfs string, node *configs.Device, bind bool) error { + dest := filepath.Join(rootfs, node.Path) + if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { + return err + } + + if bind { + return bindMountDeviceNode(dest, node) + } + if err := mknodDevice(dest, node); err != nil { + if os.IsExist(err) { + return nil + } else if os.IsPermission(err) { + return bindMountDeviceNode(dest, node) + } + return err + } + return nil +} + +func mknodDevice(dest string, node *configs.Device) error { + fileMode := node.FileMode + switch node.Type { + case 'c', 'u': + fileMode |= unix.S_IFCHR + case 'b': + fileMode |= unix.S_IFBLK + case 'p': + fileMode |= unix.S_IFIFO + default: + return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) + } + if err := unix.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil { + return err + } + return unix.Chown(dest, int(node.Uid), int(node.Gid)) +} + +func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info { + for _, m := range mountinfo { + if m.Mountpoint == dir { + return m + } + } + return nil +} + +// Get the parent mount point of directory passed in as argument. Also return +// optional fields. +func getParentMount(rootfs string) (string, string, error) { + var path string + + mountinfos, err := mount.GetMounts() + if err != nil { + return "", "", err + } + + mountinfo := getMountInfo(mountinfos, rootfs) + if mountinfo != nil { + return rootfs, mountinfo.Optional, nil + } + + path = rootfs + for { + path = filepath.Dir(path) + + mountinfo = getMountInfo(mountinfos, path) + if mountinfo != nil { + return path, mountinfo.Optional, nil + } + + if path == "/" { + break + } + } + + // If we are here, we did not find parent mount. Something is wrong. + return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs) +} + +// Make parent mount private if it was shared +func rootfsParentMountPrivate(rootfs string) error { + sharedMount := false + + parentMount, optionalOpts, err := getParentMount(rootfs) + if err != nil { + return err + } + + optsSplit := strings.Split(optionalOpts, " ") + for _, opt := range optsSplit { + if strings.HasPrefix(opt, "shared:") { + sharedMount = true + break + } + } + + // Make parent mount PRIVATE if it was shared. It is needed for two + // reasons. First of all pivot_root() will fail if parent mount is + // shared. Secondly when we bind mount rootfs it will propagate to + // parent namespace and we don't want that to happen. + if sharedMount { + return unix.Mount("", parentMount, "", unix.MS_PRIVATE, "") + } + + return nil +} + +func prepareRoot(config *configs.Config) error { + flag := unix.MS_SLAVE | unix.MS_REC + if config.RootPropagation != 0 { + flag = config.RootPropagation + } + if err := unix.Mount("", "/", "", uintptr(flag), ""); err != nil { + return err + } + + // Make parent mount private to make sure following bind mount does + // not propagate in other namespaces. Also it will help with kernel + // check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent)) + if err := rootfsParentMountPrivate(config.Rootfs); err != nil { + return err + } + + return unix.Mount(config.Rootfs, config.Rootfs, "bind", unix.MS_BIND|unix.MS_REC, "") +} + +func setReadonly() error { + return unix.Mount("/", "/", "bind", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "") +} + +func setupPtmx(config *configs.Config) error { + ptmx := filepath.Join(config.Rootfs, "dev/ptmx") + if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { + return err + } + if err := os.Symlink("pts/ptmx", ptmx); err != nil { + return fmt.Errorf("symlink dev ptmx %s", err) + } + return nil +} + +// pivotRoot will call pivot_root such that rootfs becomes the new root +// filesystem, and everything else is cleaned up. +func pivotRoot(rootfs string) error { + // While the documentation may claim otherwise, pivot_root(".", ".") is + // actually valid. What this results in is / being the new root but + // /proc/self/cwd being the old root. Since we can play around with the cwd + // with pivot_root this allows us to pivot without creating directories in + // the rootfs. Shout-outs to the LXC developers for giving us this idea. + + oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return err + } + defer unix.Close(oldroot) + + newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return err + } + defer unix.Close(newroot) + + // Change to the new root so that the pivot_root actually acts on it. + if err := unix.Fchdir(newroot); err != nil { + return err + } + + if err := unix.PivotRoot(".", "."); err != nil { + return fmt.Errorf("pivot_root %s", err) + } + + // Currently our "." is oldroot (according to the current kernel code). + // However, purely for safety, we will fchdir(oldroot) since there isn't + // really any guarantee from the kernel what /proc/self/cwd will be after a + // pivot_root(2). + + if err := unix.Fchdir(oldroot); err != nil { + return err + } + + // Make oldroot rslave to make sure our unmounts don't propagate to the + // host (and thus bork the machine). We don't use rprivate because this is + // known to cause issues due to races where we still have a reference to a + // mount while a process in the host namespace are trying to operate on + // something they think has no mounts (devicemapper in particular). + if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { + return err + } + // Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd. + if err := unix.Unmount(".", unix.MNT_DETACH); err != nil { + return err + } + + // Switch back to our shiny new root. + if err := unix.Chdir("/"); err != nil { + return fmt.Errorf("chdir / %s", err) + } + return nil +} + +func msMoveRoot(rootfs string) error { + mountinfos, err := mount.GetMounts() + if err != nil { + return err + } + + absRootfs, err := filepath.Abs(rootfs) + if err != nil { + return err + } + + for _, info := range mountinfos { + p, err := filepath.Abs(info.Mountpoint) + if err != nil { + return err + } + // Umount every syfs and proc file systems, except those under the container rootfs + if (info.Fstype != "proc" && info.Fstype != "sysfs") || filepath.HasPrefix(p, absRootfs) { + continue + } + // Be sure umount events are not propagated to the host. + if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { + return err + } + if err := unix.Unmount(p, unix.MNT_DETACH); err != nil { + if err != unix.EINVAL && err != unix.EPERM { + return err + } else { + // If we have not privileges for umounting (e.g. rootless), then + // cover the path. + if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil { + return err + } + } + } + } + if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil { + return err + } + return chroot(rootfs) +} + +func chroot(rootfs string) error { + if err := unix.Chroot("."); err != nil { + return err + } + return unix.Chdir("/") +} + +// createIfNotExists creates a file or a directory only if it does not already exist. +func createIfNotExists(path string, isDir bool) error { + if _, err := os.Stat(path); err != nil { + if os.IsNotExist(err) { + if isDir { + return os.MkdirAll(path, 0755) + } + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return err + } + f, err := os.OpenFile(path, os.O_CREATE, 0755) + if err != nil { + return err + } + f.Close() + } + } + return nil +} + +// readonlyPath will make a path read only. +func readonlyPath(path string) error { + if err := unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REC, ""); err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + return unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "") +} + +// remountReadonly will remount an existing mount point and ensure that it is read-only. +func remountReadonly(m *configs.Mount) error { + var ( + dest = m.Destination + flags = m.Flags + ) + for i := 0; i < 5; i++ { + // There is a special case in the kernel for + // MS_REMOUNT | MS_BIND, which allows us to change only the + // flags even as an unprivileged user (i.e. user namespace) + // assuming we don't drop any security related flags (nodev, + // nosuid, etc.). So, let's use that case so that we can do + // this re-mount without failing in a userns. + flags |= unix.MS_REMOUNT | unix.MS_BIND | unix.MS_RDONLY + if err := unix.Mount("", dest, "", uintptr(flags), ""); err != nil { + switch err { + case unix.EBUSY: + time.Sleep(100 * time.Millisecond) + continue + default: + return err + } + } + return nil + } + return fmt.Errorf("unable to mount %s as readonly max retries reached", dest) +} + +// maskPath masks the top of the specified path inside a container to avoid +// security issues from processes reading information from non-namespace aware +// mounts ( proc/kcore ). +// For files, maskPath bind mounts /dev/null over the top of the specified path. +// For directories, maskPath mounts read-only tmpfs over the top of the specified path. +func maskPath(path string, mountLabel string) error { + if err := unix.Mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !os.IsNotExist(err) { + if err == unix.ENOTDIR { + return unix.Mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel)) + } + return err + } + return nil +} + +// writeSystemProperty writes the value to a path under /proc/sys as determined from the key. +// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward. +func writeSystemProperty(key, value string) error { + keyPath := strings.Replace(key, ".", "/", -1) + return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644) +} + +func remount(m *configs.Mount, rootfs string) error { + var ( + dest = m.Destination + ) + if !strings.HasPrefix(dest, rootfs) { + dest = filepath.Join(rootfs, dest) + } + return unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "") +} + +// Do the mount operation followed by additional mounts required to take care +// of propagation flags. +func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error { + var ( + dest = m.Destination + data = label.FormatMountLabel(m.Data, mountLabel) + flags = m.Flags + ) + if libcontainerUtils.CleanPath(dest) == "/dev" { + flags &= ^unix.MS_RDONLY + } + + copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP + if !(copyUp || strings.HasPrefix(dest, rootfs)) { + dest = filepath.Join(rootfs, dest) + } + + if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil { + return err + } + + for _, pflag := range m.PropagationFlags { + if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil { + return err + } + } + return nil +} + +func mountNewCgroup(m *configs.Mount) error { + var ( + data = m.Data + source = m.Source + ) + if data == "systemd" { + data = cgroups.CgroupNamePrefix + data + source = "systemd" + } + if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil { + return err + } + return nil +} diff --git a/libcontainer/rootfs_linux_test.go b/libcontainer/rootfs_linux_test.go new file mode 100644 index 0000000..1bfe7c6 --- /dev/null +++ b/libcontainer/rootfs_linux_test.go @@ -0,0 +1,101 @@ +// +build linux + +package libcontainer + +import ( + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +func TestCheckMountDestOnProc(t *testing.T) { + dest := "/rootfs/proc/sys" + err := checkProcMount("/rootfs", dest, "") + if err == nil { + t.Fatal("destination inside proc should return an error") + } +} + +func TestCheckMountDestOnProcChroot(t *testing.T) { + dest := "/rootfs/proc/" + err := checkProcMount("/rootfs", dest, "/proc") + if err != nil { + t.Fatal("destination inside proc when using chroot should not return an error") + } +} + +func TestCheckMountDestInSys(t *testing.T) { + dest := "/rootfs//sys/fs/cgroup" + err := checkProcMount("/rootfs", dest, "") + if err != nil { + t.Fatal("destination inside /sys should not return an error") + } +} + +func TestCheckMountDestFalsePositive(t *testing.T) { + dest := "/rootfs/sysfiles/fs/cgroup" + err := checkProcMount("/rootfs", dest, "") + if err != nil { + t.Fatal(err) + } +} + +func TestNeedsSetupDev(t *testing.T) { + config := &configs.Config{ + Mounts: []*configs.Mount{ + { + Device: "bind", + Source: "/dev", + Destination: "/dev", + }, + }, + } + if needsSetupDev(config) { + t.Fatal("expected needsSetupDev to be false, got true") + } +} + +func TestNeedsSetupDevStrangeSource(t *testing.T) { + config := &configs.Config{ + Mounts: []*configs.Mount{ + { + Device: "bind", + Source: "/devx", + Destination: "/dev", + }, + }, + } + if needsSetupDev(config) { + t.Fatal("expected needsSetupDev to be false, got true") + } +} + +func TestNeedsSetupDevStrangeDest(t *testing.T) { + config := &configs.Config{ + Mounts: []*configs.Mount{ + { + Device: "bind", + Source: "/dev", + Destination: "/devx", + }, + }, + } + if !needsSetupDev(config) { + t.Fatal("expected needsSetupDev to be true, got false") + } +} + +func TestNeedsSetupDevStrangeSourceDest(t *testing.T) { + config := &configs.Config{ + Mounts: []*configs.Mount{ + { + Device: "bind", + Source: "/devx", + Destination: "/devx", + }, + }, + } + if !needsSetupDev(config) { + t.Fatal("expected needsSetupDev to be true, got false") + } +} diff --git a/libcontainer/seccomp/config.go b/libcontainer/seccomp/config.go new file mode 100644 index 0000000..c321227 --- /dev/null +++ b/libcontainer/seccomp/config.go @@ -0,0 +1,77 @@ +package seccomp + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +var operators = map[string]configs.Operator{ + "SCMP_CMP_NE": configs.NotEqualTo, + "SCMP_CMP_LT": configs.LessThan, + "SCMP_CMP_LE": configs.LessThanOrEqualTo, + "SCMP_CMP_EQ": configs.EqualTo, + "SCMP_CMP_GE": configs.GreaterThanOrEqualTo, + "SCMP_CMP_GT": configs.GreaterThan, + "SCMP_CMP_MASKED_EQ": configs.MaskEqualTo, +} + +var actions = map[string]configs.Action{ + "SCMP_ACT_KILL": configs.Kill, + "SCMP_ACT_ERRNO": configs.Errno, + "SCMP_ACT_TRAP": configs.Trap, + "SCMP_ACT_ALLOW": configs.Allow, + "SCMP_ACT_TRACE": configs.Trace, + "SCMP_ACT_LOG": configs.Log, +} + +var archs = map[string]string{ + "SCMP_ARCH_X86": "x86", + "SCMP_ARCH_X86_64": "amd64", + "SCMP_ARCH_X32": "x32", + "SCMP_ARCH_ARM": "arm", + "SCMP_ARCH_AARCH64": "arm64", + "SCMP_ARCH_MIPS": "mips", + "SCMP_ARCH_MIPS64": "mips64", + "SCMP_ARCH_MIPS64N32": "mips64n32", + "SCMP_ARCH_MIPSEL": "mipsel", + "SCMP_ARCH_MIPSEL64": "mipsel64", + "SCMP_ARCH_MIPSEL64N32": "mipsel64n32", + "SCMP_ARCH_PPC": "ppc", + "SCMP_ARCH_PPC64": "ppc64", + "SCMP_ARCH_PPC64LE": "ppc64le", + "SCMP_ARCH_S390": "s390", + "SCMP_ARCH_S390X": "s390x", +} + +// ConvertStringToOperator converts a string into a Seccomp comparison operator. +// Comparison operators use the names they are assigned by Libseccomp's header. +// Attempting to convert a string that is not a valid operator results in an +// error. +func ConvertStringToOperator(in string) (configs.Operator, error) { + if op, ok := operators[in]; ok == true { + return op, nil + } + return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in) +} + +// ConvertStringToAction converts a string into a Seccomp rule match action. +// Actions use the names they are assigned in Libseccomp's header, though some +// (notable, SCMP_ACT_TRACE) are not available in this implementation and will +// return errors. +// Attempting to convert a string that is not a valid action results in an +// error. +func ConvertStringToAction(in string) (configs.Action, error) { + if act, ok := actions[in]; ok == true { + return act, nil + } + return 0, fmt.Errorf("string %s is not a valid action for seccomp", in) +} + +// ConvertStringToArch converts a string into a Seccomp comparison arch. +func ConvertStringToArch(in string) (string, error) { + if arch, ok := archs[in]; ok == true { + return arch, nil + } + return "", fmt.Errorf("string %s is not a valid arch for seccomp", in) +} diff --git a/libcontainer/seccomp/fixtures/proc_self_status b/libcontainer/seccomp/fixtures/proc_self_status new file mode 100644 index 0000000..0e0084f --- /dev/null +++ b/libcontainer/seccomp/fixtures/proc_self_status @@ -0,0 +1,47 @@ +Name: cat +State: R (running) +Tgid: 19383 +Ngid: 0 +Pid: 19383 +PPid: 19275 +TracerPid: 0 +Uid: 1000 1000 1000 1000 +Gid: 1000 1000 1000 1000 +FDSize: 256 +Groups: 24 25 27 29 30 44 46 102 104 108 111 1000 1001 +NStgid: 19383 +NSpid: 19383 +NSpgid: 19383 +NSsid: 19275 +VmPeak: 5944 kB +VmSize: 5944 kB +VmLck: 0 kB +VmPin: 0 kB +VmHWM: 744 kB +VmRSS: 744 kB +VmData: 324 kB +VmStk: 136 kB +VmExe: 48 kB +VmLib: 1776 kB +VmPTE: 32 kB +VmPMD: 12 kB +VmSwap: 0 kB +Threads: 1 +SigQ: 0/30067 +SigPnd: 0000000000000000 +ShdPnd: 0000000000000000 +SigBlk: 0000000000000000 +SigIgn: 0000000000000080 +SigCgt: 0000000000000000 +CapInh: 0000000000000000 +CapPrm: 0000000000000000 +CapEff: 0000000000000000 +CapBnd: 0000003fffffffff +CapAmb: 0000000000000000 +Seccomp: 0 +Cpus_allowed: f +Cpus_allowed_list: 0-3 +Mems_allowed: 00000000,00000001 +Mems_allowed_list: 0 +voluntary_ctxt_switches: 0 +nonvoluntary_ctxt_switches: 1 diff --git a/libcontainer/seccomp/seccomp_linux.go b/libcontainer/seccomp/seccomp_linux.go new file mode 100644 index 0000000..1b7a071 --- /dev/null +++ b/libcontainer/seccomp/seccomp_linux.go @@ -0,0 +1,261 @@ +// +build linux,cgo,seccomp + +package seccomp + +import ( + "bufio" + "fmt" + "os" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" + libseccomp "github.com/seccomp/libseccomp-golang" + + "golang.org/x/sys/unix" +) + +var ( + actAllow = libseccomp.ActAllow + actTrap = libseccomp.ActTrap + actKill = libseccomp.ActKill + actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM)) + actLog = libseccomp.ActLog + actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM)) +) + +const ( + // Linux system calls can have at most 6 arguments + syscallMaxArguments int = 6 +) + +// Filters given syscalls in a container, preventing them from being used +// Started in the container init process, and carried over to all child processes +// Setns calls, however, require a separate invocation, as they are not children +// of the init until they join the namespace +func InitSeccomp(config *configs.Seccomp) error { + if config == nil { + return fmt.Errorf("cannot initialize Seccomp - nil config passed") + } + + defaultAction, err := getAction(config.DefaultAction) + if err != nil { + return fmt.Errorf("error initializing seccomp - invalid default action") + } + + filter, err := libseccomp.NewFilter(defaultAction) + if err != nil { + return fmt.Errorf("error creating filter: %s", err) + } + + // Add extra architectures + for _, arch := range config.Architectures { + scmpArch, err := libseccomp.GetArchFromString(arch) + if err != nil { + return fmt.Errorf("error validating Seccomp architecture: %s", err) + } + + if err := filter.AddArch(scmpArch); err != nil { + return fmt.Errorf("error adding architecture to seccomp filter: %s", err) + } + } + + // Unset no new privs bit + if err := filter.SetNoNewPrivsBit(false); err != nil { + return fmt.Errorf("error setting no new privileges: %s", err) + } + + // Add a rule for each syscall + for _, call := range config.Syscalls { + if call == nil { + return fmt.Errorf("encountered nil syscall while initializing Seccomp") + } + + if err = matchCall(filter, call); err != nil { + return err + } + } + + if err = filter.Load(); err != nil { + return fmt.Errorf("error loading seccomp filter into kernel: %s", err) + } + + return nil +} + +// IsEnabled returns if the kernel has been configured to support seccomp. +func IsEnabled() bool { + // Try to read from /proc/self/status for kernels > 3.8 + s, err := parseStatusFile("/proc/self/status") + if err != nil { + // Check if Seccomp is supported, via CONFIG_SECCOMP. + if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL { + // Make sure the kernel has CONFIG_SECCOMP_FILTER. + if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL { + return true + } + } + return false + } + _, ok := s["Seccomp"] + return ok +} + +// Convert Libcontainer Action to Libseccomp ScmpAction +func getAction(act configs.Action) (libseccomp.ScmpAction, error) { + switch act { + case configs.Kill: + return actKill, nil + case configs.Errno: + return actErrno, nil + case configs.Trap: + return actTrap, nil + case configs.Allow: + return actAllow, nil + case configs.Trace: + return actTrace, nil + case configs.Log: + return actLog, nil + default: + return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule") + } +} + +// Convert Libcontainer Operator to Libseccomp ScmpCompareOp +func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) { + switch op { + case configs.EqualTo: + return libseccomp.CompareEqual, nil + case configs.NotEqualTo: + return libseccomp.CompareNotEqual, nil + case configs.GreaterThan: + return libseccomp.CompareGreater, nil + case configs.GreaterThanOrEqualTo: + return libseccomp.CompareGreaterEqual, nil + case configs.LessThan: + return libseccomp.CompareLess, nil + case configs.LessThanOrEqualTo: + return libseccomp.CompareLessOrEqual, nil + case configs.MaskEqualTo: + return libseccomp.CompareMaskedEqual, nil + default: + return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule") + } +} + +// Convert Libcontainer Arg to Libseccomp ScmpCondition +func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) { + cond := libseccomp.ScmpCondition{} + + if arg == nil { + return cond, fmt.Errorf("cannot convert nil to syscall condition") + } + + op, err := getOperator(arg.Op) + if err != nil { + return cond, err + } + + return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo) +} + +// Add a rule to match a single syscall +func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error { + if call == nil || filter == nil { + return fmt.Errorf("cannot use nil as syscall to block") + } + + if len(call.Name) == 0 { + return fmt.Errorf("empty string is not a valid syscall") + } + + // If we can't resolve the syscall, assume it's not supported on this kernel + // Ignore it, don't error out + callNum, err := libseccomp.GetSyscallFromName(call.Name) + if err != nil { + return nil + } + + // Convert the call's action to the libseccomp equivalent + callAct, err := getAction(call.Action) + if err != nil { + return fmt.Errorf("action in seccomp profile is invalid: %s", err) + } + + // Unconditional match - just add the rule + if len(call.Args) == 0 { + if err = filter.AddRule(callNum, callAct); err != nil { + return fmt.Errorf("error adding seccomp filter rule for syscall %s: %s", call.Name, err) + } + } else { + // If two or more arguments have the same condition, + // Revert to old behavior, adding each condition as a separate rule + argCounts := make([]uint, syscallMaxArguments) + conditions := []libseccomp.ScmpCondition{} + + for _, cond := range call.Args { + newCond, err := getCondition(cond) + if err != nil { + return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %s", call.Name, err) + } + + argCounts[cond.Index] += 1 + + conditions = append(conditions, newCond) + } + + hasMultipleArgs := false + for _, count := range argCounts { + if count > 1 { + hasMultipleArgs = true + break + } + } + + if hasMultipleArgs { + // Revert to old behavior + // Add each condition attached to a separate rule + for _, cond := range conditions { + condArr := []libseccomp.ScmpCondition{cond} + + if err = filter.AddRuleConditional(callNum, callAct, condArr); err != nil { + return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err) + } + } + } else { + // No conditions share same argument + // Use new, proper behavior + if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil { + return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err) + } + } + } + + return nil +} + +func parseStatusFile(path string) (map[string]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + s := bufio.NewScanner(f) + status := make(map[string]string) + + for s.Scan() { + text := s.Text() + parts := strings.Split(text, ":") + + if len(parts) <= 1 { + continue + } + + status[parts[0]] = parts[1] + } + if err := s.Err(); err != nil { + return nil, err + } + + return status, nil +} diff --git a/libcontainer/seccomp/seccomp_linux_test.go b/libcontainer/seccomp/seccomp_linux_test.go new file mode 100644 index 0000000..67a2ef6 --- /dev/null +++ b/libcontainer/seccomp/seccomp_linux_test.go @@ -0,0 +1,17 @@ +// +build linux,cgo,seccomp + +package seccomp + +import "testing" + +func TestParseStatusFile(t *testing.T) { + s, err := parseStatusFile("fixtures/proc_self_status") + if err != nil { + t.Fatal(err) + } + + if _, ok := s["Seccomp"]; !ok { + + t.Fatal("expected to find 'Seccomp' in the map but did not.") + } +} diff --git a/libcontainer/seccomp/seccomp_unsupported.go b/libcontainer/seccomp/seccomp_unsupported.go new file mode 100644 index 0000000..44df1ad --- /dev/null +++ b/libcontainer/seccomp/seccomp_unsupported.go @@ -0,0 +1,24 @@ +// +build !linux !cgo !seccomp + +package seccomp + +import ( + "errors" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported") + +// InitSeccomp does nothing because seccomp is not supported. +func InitSeccomp(config *configs.Seccomp) error { + if config != nil { + return ErrSeccompNotEnabled + } + return nil +} + +// IsEnabled returns false, because it is not supported. +func IsEnabled() bool { + return false +} diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go new file mode 100644 index 0000000..888981f --- /dev/null +++ b/libcontainer/setns_init_linux.go @@ -0,0 +1,92 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "os" + "runtime" + + "github.com/opencontainers/runc/libcontainer/apparmor" + "github.com/opencontainers/runc/libcontainer/keys" + "github.com/opencontainers/runc/libcontainer/seccomp" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + + "golang.org/x/sys/unix" +) + +// linuxSetnsInit performs the container's initialization for running a new process +// inside an existing container. +type linuxSetnsInit struct { + pipe *os.File + consoleSocket *os.File + config *initConfig +} + +func (l *linuxSetnsInit) getSessionRingName() string { + return fmt.Sprintf("_ses.%s", l.config.ContainerId) +} + +func (l *linuxSetnsInit) Init() error { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + if !l.config.Config.NoNewKeyring { + if err := label.SetKeyLabel(l.config.ProcessLabel); err != nil { + return err + } + defer label.SetKeyLabel("") + // Do not inherit the parent's session keyring. + if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil { + // Same justification as in standart_init_linux.go as to why we + // don't bail on ENOSYS. + // + // TODO(cyphar): And we should have logging here too. + if errors.Cause(err) != unix.ENOSYS { + return errors.Wrap(err, "join session keyring") + } + } + } + if l.config.CreateConsole { + if err := setupConsole(l.consoleSocket, l.config, false); err != nil { + return err + } + if err := system.Setctty(); err != nil { + return err + } + } + if l.config.NoNewPrivileges { + if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { + return err + } + } + if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { + return err + } + defer label.SetProcessLabel("") + // Without NoNewPrivileges seccomp is a privileged operation, so we need to + // do this before dropping capabilities; otherwise do it as late as possible + // just before execve so as few syscalls take place after it as possible. + if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return err + } + } + if err := finalizeNamespace(l.config); err != nil { + return err + } + if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { + return err + } + // Set seccomp as close to execve as possible, so as few syscalls take + // place afterward (reducing the amount of syscalls that users need to + // enable in their seccomp profiles). + if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return newSystemErrorWithCause(err, "init seccomp") + } + } + return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) +} diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go new file mode 100644 index 0000000..23e225c --- /dev/null +++ b/libcontainer/specconv/example.go @@ -0,0 +1,223 @@ +package specconv + +import ( + "os" + "strings" + + "github.com/opencontainers/runtime-spec/specs-go" +) + +// Example returns an example spec file, with many options set so a user can +// see what a standard spec file looks like. +func Example() *specs.Spec { + return &specs.Spec{ + Version: specs.Version, + Root: &specs.Root{ + Path: "rootfs", + Readonly: true, + }, + Process: &specs.Process{ + Terminal: true, + User: specs.User{}, + Args: []string{ + "sh", + }, + Env: []string{ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm", + }, + Cwd: "/", + NoNewPrivileges: true, + Capabilities: &specs.LinuxCapabilities{ + Bounding: []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", + }, + Permitted: []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", + }, + Inheritable: []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", + }, + Ambient: []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", + }, + Effective: []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", + }, + }, + Rlimits: []specs.POSIXRlimit{ + { + Type: "RLIMIT_NOFILE", + Hard: uint64(1024), + Soft: uint64(1024), + }, + }, + }, + Hostname: "runc", + Mounts: []specs.Mount{ + { + Destination: "/proc", + Type: "proc", + Source: "proc", + Options: nil, + }, + { + Destination: "/dev", + Type: "tmpfs", + Source: "tmpfs", + Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"}, + }, + { + Destination: "/dev/pts", + Type: "devpts", + Source: "devpts", + Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"}, + }, + { + Destination: "/dev/shm", + Type: "tmpfs", + Source: "shm", + Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"}, + }, + { + Destination: "/dev/mqueue", + Type: "mqueue", + Source: "mqueue", + Options: []string{"nosuid", "noexec", "nodev"}, + }, + { + Destination: "/sys", + Type: "sysfs", + Source: "sysfs", + Options: []string{"nosuid", "noexec", "nodev", "ro"}, + }, + { + Destination: "/sys/fs/cgroup", + Type: "cgroup", + Source: "cgroup", + Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"}, + }, + }, + Linux: &specs.Linux{ + MaskedPaths: []string{ + "/proc/acpi", + "/proc/asound", + "/proc/kcore", + "/proc/keys", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + "/proc/scsi", + }, + ReadonlyPaths: []string{ + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger", + }, + Resources: &specs.LinuxResources{ + Devices: []specs.LinuxDeviceCgroup{ + { + Allow: false, + Access: "rwm", + }, + }, + }, + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + }, + { + Type: "network", + }, + { + Type: "ipc", + }, + { + Type: "uts", + }, + { + Type: "mount", + }, + }, + }, + } +} + +// ToRootless converts the given spec file into one that should work with +// rootless containers (euid != 0), by removing incompatible options and adding others that +// are needed. +func ToRootless(spec *specs.Spec) { + var namespaces []specs.LinuxNamespace + + // Remove networkns from the spec. + for _, ns := range spec.Linux.Namespaces { + switch ns.Type { + case specs.NetworkNamespace, specs.UserNamespace: + // Do nothing. + default: + namespaces = append(namespaces, ns) + } + } + // Add userns to the spec. + namespaces = append(namespaces, specs.LinuxNamespace{ + Type: specs.UserNamespace, + }) + spec.Linux.Namespaces = namespaces + + // Add mappings for the current user. + spec.Linux.UIDMappings = []specs.LinuxIDMapping{{ + HostID: uint32(os.Geteuid()), + ContainerID: 0, + Size: 1, + }} + spec.Linux.GIDMappings = []specs.LinuxIDMapping{{ + HostID: uint32(os.Getegid()), + ContainerID: 0, + Size: 1, + }} + + // Fix up mounts. + var mounts []specs.Mount + for _, mount := range spec.Mounts { + // Ignore all mounts that are under /sys. + if strings.HasPrefix(mount.Destination, "/sys") { + continue + } + + // Remove all gid= and uid= mappings. + var options []string + for _, option := range mount.Options { + if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") { + options = append(options, option) + } + } + + mount.Options = options + mounts = append(mounts, mount) + } + // Add the sysfs mount as an rbind. + mounts = append(mounts, specs.Mount{ + Source: "/sys", + Destination: "/sys", + Type: "none", + Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, + }) + spec.Mounts = mounts + + // Remove cgroup settings. + spec.Linux.Resources = nil +} diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go new file mode 100644 index 0000000..d9e73c4 --- /dev/null +++ b/libcontainer/specconv/spec_linux.go @@ -0,0 +1,839 @@ +// +build linux + +// Package specconv implements conversion of specifications to libcontainer +// configurations +package specconv + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/seccomp" + libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" + "github.com/opencontainers/runtime-spec/specs-go" + + "golang.org/x/sys/unix" +) + +const wildcard = -1 + +var namespaceMapping = map[specs.LinuxNamespaceType]configs.NamespaceType{ + specs.PIDNamespace: configs.NEWPID, + specs.NetworkNamespace: configs.NEWNET, + specs.MountNamespace: configs.NEWNS, + specs.UserNamespace: configs.NEWUSER, + specs.IPCNamespace: configs.NEWIPC, + specs.UTSNamespace: configs.NEWUTS, + specs.CgroupNamespace: configs.NEWCGROUP, +} + +var mountPropagationMapping = map[string]int{ + "rprivate": unix.MS_PRIVATE | unix.MS_REC, + "private": unix.MS_PRIVATE, + "rslave": unix.MS_SLAVE | unix.MS_REC, + "slave": unix.MS_SLAVE, + "rshared": unix.MS_SHARED | unix.MS_REC, + "shared": unix.MS_SHARED, + "runbindable": unix.MS_UNBINDABLE | unix.MS_REC, + "unbindable": unix.MS_UNBINDABLE, + "": 0, +} + +// AllowedDevices is exposed for devicefilter_test.go +var AllowedDevices = []*configs.Device{ + // allow mknod for any device + { + Type: 'c', + Major: wildcard, + Minor: wildcard, + Permissions: "m", + Allow: true, + }, + { + Type: 'b', + Major: wildcard, + Minor: wildcard, + Permissions: "m", + Allow: true, + }, + { + Type: 'c', + Path: "/dev/null", + Major: 1, + Minor: 3, + Permissions: "rwm", + Allow: true, + }, + { + Type: 'c', + Path: "/dev/random", + Major: 1, + Minor: 8, + Permissions: "rwm", + Allow: true, + }, + { + Type: 'c', + Path: "/dev/full", + Major: 1, + Minor: 7, + Permissions: "rwm", + Allow: true, + }, + { + Type: 'c', + Path: "/dev/tty", + Major: 5, + Minor: 0, + Permissions: "rwm", + Allow: true, + }, + { + Type: 'c', + Path: "/dev/zero", + Major: 1, + Minor: 5, + Permissions: "rwm", + Allow: true, + }, + { + Type: 'c', + Path: "/dev/urandom", + Major: 1, + Minor: 9, + Permissions: "rwm", + Allow: true, + }, + { + Path: "/dev/console", + Type: 'c', + Major: 5, + Minor: 1, + Permissions: "rwm", + Allow: true, + }, + // /dev/pts/ - pts namespaces are "coming soon" + { + Path: "", + Type: 'c', + Major: 136, + Minor: wildcard, + Permissions: "rwm", + Allow: true, + }, + { + Path: "", + Type: 'c', + Major: 5, + Minor: 2, + Permissions: "rwm", + Allow: true, + }, + // tuntap + { + Path: "", + Type: 'c', + Major: 10, + Minor: 200, + Permissions: "rwm", + Allow: true, + }, +} + +type CreateOpts struct { + CgroupName string + UseSystemdCgroup bool + NoPivotRoot bool + NoNewKeyring bool + Spec *specs.Spec + RootlessEUID bool + RootlessCgroups bool +} + +// CreateLibcontainerConfig creates a new libcontainer configuration from a +// given specification and a cgroup name +func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { + // runc's cwd will always be the bundle path + rcwd, err := os.Getwd() + if err != nil { + return nil, err + } + cwd, err := filepath.Abs(rcwd) + if err != nil { + return nil, err + } + spec := opts.Spec + if spec.Root == nil { + return nil, fmt.Errorf("Root must be specified") + } + rootfsPath := spec.Root.Path + if !filepath.IsAbs(rootfsPath) { + rootfsPath = filepath.Join(cwd, rootfsPath) + } + labels := []string{} + for k, v := range spec.Annotations { + labels = append(labels, fmt.Sprintf("%s=%s", k, v)) + } + config := &configs.Config{ + Rootfs: rootfsPath, + NoPivotRoot: opts.NoPivotRoot, + Readonlyfs: spec.Root.Readonly, + Hostname: spec.Hostname, + Labels: append(labels, fmt.Sprintf("bundle=%s", cwd)), + NoNewKeyring: opts.NoNewKeyring, + RootlessEUID: opts.RootlessEUID, + RootlessCgroups: opts.RootlessCgroups, + } + + exists := false + for _, m := range spec.Mounts { + config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, m)) + } + if err := createDevices(spec, config); err != nil { + return nil, err + } + c, err := CreateCgroupConfig(opts) + if err != nil { + return nil, err + } + config.Cgroups = c + // set linux-specific config + if spec.Linux != nil { + if config.RootPropagation, exists = mountPropagationMapping[spec.Linux.RootfsPropagation]; !exists { + return nil, fmt.Errorf("rootfsPropagation=%v is not supported", spec.Linux.RootfsPropagation) + } + if config.NoPivotRoot && (config.RootPropagation&unix.MS_PRIVATE != 0) { + return nil, fmt.Errorf("rootfsPropagation of [r]private is not safe without pivot_root") + } + + for _, ns := range spec.Linux.Namespaces { + t, exists := namespaceMapping[ns.Type] + if !exists { + return nil, fmt.Errorf("namespace %q does not exist", ns) + } + if config.Namespaces.Contains(t) { + return nil, fmt.Errorf("malformed spec file: duplicated ns %q", ns) + } + config.Namespaces.Add(t, ns.Path) + } + if config.Namespaces.Contains(configs.NEWNET) && config.Namespaces.PathOf(configs.NEWNET) == "" { + config.Networks = []*configs.Network{ + { + Type: "loopback", + }, + } + } + if config.Namespaces.Contains(configs.NEWUSER) { + if err := setupUserNamespace(spec, config); err != nil { + return nil, err + } + } + config.MaskPaths = spec.Linux.MaskedPaths + config.ReadonlyPaths = spec.Linux.ReadonlyPaths + config.MountLabel = spec.Linux.MountLabel + config.Sysctl = spec.Linux.Sysctl + if spec.Linux.Seccomp != nil { + seccomp, err := SetupSeccomp(spec.Linux.Seccomp) + if err != nil { + return nil, err + } + config.Seccomp = seccomp + } + if spec.Linux.IntelRdt != nil { + config.IntelRdt = &configs.IntelRdt{} + if spec.Linux.IntelRdt.L3CacheSchema != "" { + config.IntelRdt.L3CacheSchema = spec.Linux.IntelRdt.L3CacheSchema + } + if spec.Linux.IntelRdt.MemBwSchema != "" { + config.IntelRdt.MemBwSchema = spec.Linux.IntelRdt.MemBwSchema + } + } + } + if spec.Process != nil { + config.OomScoreAdj = spec.Process.OOMScoreAdj + if spec.Process.SelinuxLabel != "" { + config.ProcessLabel = spec.Process.SelinuxLabel + } + if spec.Process.Capabilities != nil { + config.Capabilities = &configs.Capabilities{ + Bounding: spec.Process.Capabilities.Bounding, + Effective: spec.Process.Capabilities.Effective, + Permitted: spec.Process.Capabilities.Permitted, + Inheritable: spec.Process.Capabilities.Inheritable, + Ambient: spec.Process.Capabilities.Ambient, + } + } + } + createHooks(spec, config) + config.Version = specs.Version + return config, nil +} + +func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount { + flags, pgflags, data, ext := parseMountOptions(m.Options) + source := m.Source + device := m.Type + if flags&unix.MS_BIND != 0 { + // Any "type" the user specified is meaningless (and ignored) for + // bind-mounts -- so we set it to "bind" because rootfs_linux.go + // (incorrectly) relies on this for some checks. + device = "bind" + if !filepath.IsAbs(source) { + source = filepath.Join(cwd, m.Source) + } + } + return &configs.Mount{ + Device: device, + Source: source, + Destination: m.Destination, + Data: data, + Flags: flags, + PropagationFlags: pgflags, + Extensions: ext, + } +} + +func CreateCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) { + var ( + myCgroupPath string + + spec = opts.Spec + useSystemdCgroup = opts.UseSystemdCgroup + name = opts.CgroupName + ) + + c := &configs.Cgroup{ + Resources: &configs.Resources{}, + } + + if spec.Linux != nil && spec.Linux.CgroupsPath != "" { + myCgroupPath = libcontainerUtils.CleanPath(spec.Linux.CgroupsPath) + if useSystemdCgroup { + myCgroupPath = spec.Linux.CgroupsPath + } + } + + if useSystemdCgroup { + if myCgroupPath == "" { + c.Parent = "system.slice" + c.ScopePrefix = "runc" + c.Name = name + } else { + // Parse the path from expected "slice:prefix:name" + // for e.g. "system.slice:docker:1234" + parts := strings.Split(myCgroupPath, ":") + if len(parts) != 3 { + return nil, fmt.Errorf("expected cgroupsPath to be of format \"slice:prefix:name\" for systemd cgroups, got %q instead", myCgroupPath) + } + c.Parent = parts[0] + c.ScopePrefix = parts[1] + c.Name = parts[2] + } + } else { + if myCgroupPath == "" { + c.Name = name + } + c.Path = myCgroupPath + } + + // In rootless containers, any attempt to make cgroup changes is likely to fail. + // libcontainer will validate this but ignores the error. + c.Resources.AllowedDevices = AllowedDevices + if spec.Linux != nil { + r := spec.Linux.Resources + if r == nil { + return c, nil + } + for i, d := range spec.Linux.Resources.Devices { + var ( + t = "a" + major = int64(-1) + minor = int64(-1) + ) + if d.Type != "" { + t = d.Type + } + if d.Major != nil { + major = *d.Major + } + if d.Minor != nil { + minor = *d.Minor + } + if d.Access == "" { + return nil, fmt.Errorf("device access at %d field cannot be empty", i) + } + dt, err := stringToCgroupDeviceRune(t) + if err != nil { + return nil, err + } + dd := &configs.Device{ + Type: dt, + Major: major, + Minor: minor, + Permissions: d.Access, + Allow: d.Allow, + } + c.Resources.Devices = append(c.Resources.Devices, dd) + } + if r.Memory != nil { + if r.Memory.Limit != nil { + c.Resources.Memory = *r.Memory.Limit + } + if r.Memory.Reservation != nil { + c.Resources.MemoryReservation = *r.Memory.Reservation + } + if r.Memory.Swap != nil { + c.Resources.MemorySwap = *r.Memory.Swap + } + if r.Memory.Kernel != nil { + c.Resources.KernelMemory = *r.Memory.Kernel + } + if r.Memory.KernelTCP != nil { + c.Resources.KernelMemoryTCP = *r.Memory.KernelTCP + } + if r.Memory.Swappiness != nil { + c.Resources.MemorySwappiness = r.Memory.Swappiness + } + if r.Memory.DisableOOMKiller != nil { + c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller + } + } + if r.CPU != nil { + if r.CPU.Shares != nil { + c.Resources.CpuShares = *r.CPU.Shares + } + if r.CPU.Quota != nil { + c.Resources.CpuQuota = *r.CPU.Quota + } + if r.CPU.Period != nil { + c.Resources.CpuPeriod = *r.CPU.Period + } + if r.CPU.RealtimeRuntime != nil { + c.Resources.CpuRtRuntime = *r.CPU.RealtimeRuntime + } + if r.CPU.RealtimePeriod != nil { + c.Resources.CpuRtPeriod = *r.CPU.RealtimePeriod + } + if r.CPU.Cpus != "" { + c.Resources.CpusetCpus = r.CPU.Cpus + } + if r.CPU.Mems != "" { + c.Resources.CpusetMems = r.CPU.Mems + } + } + if r.Pids != nil { + c.Resources.PidsLimit = r.Pids.Limit + } + if r.BlockIO != nil { + if r.BlockIO.Weight != nil { + c.Resources.BlkioWeight = *r.BlockIO.Weight + } + if r.BlockIO.LeafWeight != nil { + c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight + } + if r.BlockIO.WeightDevice != nil { + for _, wd := range r.BlockIO.WeightDevice { + var weight, leafWeight uint16 + if wd.Weight != nil { + weight = *wd.Weight + } + if wd.LeafWeight != nil { + leafWeight = *wd.LeafWeight + } + weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, weight, leafWeight) + c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice) + } + } + if r.BlockIO.ThrottleReadBpsDevice != nil { + for _, td := range r.BlockIO.ThrottleReadBpsDevice { + rate := td.Rate + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) + c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice) + } + } + if r.BlockIO.ThrottleWriteBpsDevice != nil { + for _, td := range r.BlockIO.ThrottleWriteBpsDevice { + rate := td.Rate + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) + c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice) + } + } + if r.BlockIO.ThrottleReadIOPSDevice != nil { + for _, td := range r.BlockIO.ThrottleReadIOPSDevice { + rate := td.Rate + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) + c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice) + } + } + if r.BlockIO.ThrottleWriteIOPSDevice != nil { + for _, td := range r.BlockIO.ThrottleWriteIOPSDevice { + rate := td.Rate + throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) + c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice) + } + } + } + for _, l := range r.HugepageLimits { + c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{ + Pagesize: l.Pagesize, + Limit: l.Limit, + }) + } + if r.Network != nil { + if r.Network.ClassID != nil { + c.Resources.NetClsClassid = *r.Network.ClassID + } + for _, m := range r.Network.Priorities { + c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{ + Interface: m.Name, + Priority: int64(m.Priority), + }) + } + } + } + // append the default allowed devices to the end of the list + c.Resources.Devices = append(c.Resources.Devices, AllowedDevices...) + return c, nil +} + +func stringToCgroupDeviceRune(s string) (rune, error) { + switch s { + case "a": + return 'a', nil + case "b": + return 'b', nil + case "c": + return 'c', nil + default: + return 0, fmt.Errorf("invalid cgroup device type %q", s) + } +} + +func stringToDeviceRune(s string) (rune, error) { + switch s { + case "p": + return 'p', nil + case "u": + return 'u', nil + case "b": + return 'b', nil + case "c": + return 'c', nil + default: + return 0, fmt.Errorf("invalid device type %q", s) + } +} + +func createDevices(spec *specs.Spec, config *configs.Config) error { + // add whitelisted devices + config.Devices = []*configs.Device{ + { + Type: 'c', + Path: "/dev/null", + Major: 1, + Minor: 3, + FileMode: 0666, + Uid: 0, + Gid: 0, + }, + { + Type: 'c', + Path: "/dev/random", + Major: 1, + Minor: 8, + FileMode: 0666, + Uid: 0, + Gid: 0, + }, + { + Type: 'c', + Path: "/dev/full", + Major: 1, + Minor: 7, + FileMode: 0666, + Uid: 0, + Gid: 0, + }, + { + Type: 'c', + Path: "/dev/tty", + Major: 5, + Minor: 0, + FileMode: 0666, + Uid: 0, + Gid: 0, + }, + { + Type: 'c', + Path: "/dev/zero", + Major: 1, + Minor: 5, + FileMode: 0666, + Uid: 0, + Gid: 0, + }, + { + Type: 'c', + Path: "/dev/urandom", + Major: 1, + Minor: 9, + FileMode: 0666, + Uid: 0, + Gid: 0, + }, + } + // merge in additional devices from the spec + if spec.Linux != nil { + for _, d := range spec.Linux.Devices { + var uid, gid uint32 + var filemode os.FileMode = 0666 + + if d.UID != nil { + uid = *d.UID + } + if d.GID != nil { + gid = *d.GID + } + dt, err := stringToDeviceRune(d.Type) + if err != nil { + return err + } + if d.FileMode != nil { + filemode = *d.FileMode + } + device := &configs.Device{ + Type: dt, + Path: d.Path, + Major: d.Major, + Minor: d.Minor, + FileMode: filemode, + Uid: uid, + Gid: gid, + } + config.Devices = append(config.Devices, device) + } + } + return nil +} + +func setupUserNamespace(spec *specs.Spec, config *configs.Config) error { + create := func(m specs.LinuxIDMapping) configs.IDMap { + return configs.IDMap{ + HostID: int(m.HostID), + ContainerID: int(m.ContainerID), + Size: int(m.Size), + } + } + if spec.Linux != nil { + for _, m := range spec.Linux.UIDMappings { + config.UidMappings = append(config.UidMappings, create(m)) + } + for _, m := range spec.Linux.GIDMappings { + config.GidMappings = append(config.GidMappings, create(m)) + } + } + rootUID, err := config.HostRootUID() + if err != nil { + return err + } + rootGID, err := config.HostRootGID() + if err != nil { + return err + } + for _, node := range config.Devices { + node.Uid = uint32(rootUID) + node.Gid = uint32(rootGID) + } + return nil +} + +// parseMountOptions parses the string and returns the flags, propagation +// flags and any mount data that it contains. +func parseMountOptions(options []string) (int, []int, string, int) { + var ( + flag int + pgflag []int + data []string + extFlags int + ) + flags := map[string]struct { + clear bool + flag int + }{ + "acl": {false, unix.MS_POSIXACL}, + "async": {true, unix.MS_SYNCHRONOUS}, + "atime": {true, unix.MS_NOATIME}, + "bind": {false, unix.MS_BIND}, + "defaults": {false, 0}, + "dev": {true, unix.MS_NODEV}, + "diratime": {true, unix.MS_NODIRATIME}, + "dirsync": {false, unix.MS_DIRSYNC}, + "exec": {true, unix.MS_NOEXEC}, + "iversion": {false, unix.MS_I_VERSION}, + "lazytime": {false, unix.MS_LAZYTIME}, + "loud": {true, unix.MS_SILENT}, + "mand": {false, unix.MS_MANDLOCK}, + "noacl": {true, unix.MS_POSIXACL}, + "noatime": {false, unix.MS_NOATIME}, + "nodev": {false, unix.MS_NODEV}, + "nodiratime": {false, unix.MS_NODIRATIME}, + "noexec": {false, unix.MS_NOEXEC}, + "noiversion": {true, unix.MS_I_VERSION}, + "nolazytime": {true, unix.MS_LAZYTIME}, + "nomand": {true, unix.MS_MANDLOCK}, + "norelatime": {true, unix.MS_RELATIME}, + "nostrictatime": {true, unix.MS_STRICTATIME}, + "nosuid": {false, unix.MS_NOSUID}, + "rbind": {false, unix.MS_BIND | unix.MS_REC}, + "relatime": {false, unix.MS_RELATIME}, + "remount": {false, unix.MS_REMOUNT}, + "ro": {false, unix.MS_RDONLY}, + "rw": {true, unix.MS_RDONLY}, + "silent": {false, unix.MS_SILENT}, + "strictatime": {false, unix.MS_STRICTATIME}, + "suid": {true, unix.MS_NOSUID}, + "sync": {false, unix.MS_SYNCHRONOUS}, + } + propagationFlags := map[string]int{ + "private": unix.MS_PRIVATE, + "shared": unix.MS_SHARED, + "slave": unix.MS_SLAVE, + "unbindable": unix.MS_UNBINDABLE, + "rprivate": unix.MS_PRIVATE | unix.MS_REC, + "rshared": unix.MS_SHARED | unix.MS_REC, + "rslave": unix.MS_SLAVE | unix.MS_REC, + "runbindable": unix.MS_UNBINDABLE | unix.MS_REC, + } + extensionFlags := map[string]struct { + clear bool + flag int + }{ + "tmpcopyup": {false, configs.EXT_COPYUP}, + } + for _, o := range options { + // If the option does not exist in the flags table or the flag + // is not supported on the platform, + // then it is a data value for a specific fs type + if f, exists := flags[o]; exists && f.flag != 0 { + if f.clear { + flag &= ^f.flag + } else { + flag |= f.flag + } + } else if f, exists := propagationFlags[o]; exists && f != 0 { + pgflag = append(pgflag, f) + } else if f, exists := extensionFlags[o]; exists && f.flag != 0 { + if f.clear { + extFlags &= ^f.flag + } else { + extFlags |= f.flag + } + } else { + data = append(data, o) + } + } + return flag, pgflag, strings.Join(data, ","), extFlags +} + +func SetupSeccomp(config *specs.LinuxSeccomp) (*configs.Seccomp, error) { + if config == nil { + return nil, nil + } + + // No default action specified, no syscalls listed, assume seccomp disabled + if config.DefaultAction == "" && len(config.Syscalls) == 0 { + return nil, nil + } + + newConfig := new(configs.Seccomp) + newConfig.Syscalls = []*configs.Syscall{} + + if len(config.Architectures) > 0 { + newConfig.Architectures = []string{} + for _, arch := range config.Architectures { + newArch, err := seccomp.ConvertStringToArch(string(arch)) + if err != nil { + return nil, err + } + newConfig.Architectures = append(newConfig.Architectures, newArch) + } + } + + // Convert default action from string representation + newDefaultAction, err := seccomp.ConvertStringToAction(string(config.DefaultAction)) + if err != nil { + return nil, err + } + newConfig.DefaultAction = newDefaultAction + + // Loop through all syscall blocks and convert them to libcontainer format + for _, call := range config.Syscalls { + newAction, err := seccomp.ConvertStringToAction(string(call.Action)) + if err != nil { + return nil, err + } + + for _, name := range call.Names { + newCall := configs.Syscall{ + Name: name, + Action: newAction, + Args: []*configs.Arg{}, + } + // Loop through all the arguments of the syscall and convert them + for _, arg := range call.Args { + newOp, err := seccomp.ConvertStringToOperator(string(arg.Op)) + if err != nil { + return nil, err + } + + newArg := configs.Arg{ + Index: arg.Index, + Value: arg.Value, + ValueTwo: arg.ValueTwo, + Op: newOp, + } + + newCall.Args = append(newCall.Args, &newArg) + } + newConfig.Syscalls = append(newConfig.Syscalls, &newCall) + } + } + + return newConfig, nil +} + +func createHooks(rspec *specs.Spec, config *configs.Config) { + config.Hooks = &configs.Hooks{} + if rspec.Hooks != nil { + + for _, h := range rspec.Hooks.Prestart { + cmd := createCommandHook(h) + config.Hooks.Prestart = append(config.Hooks.Prestart, configs.NewCommandHook(cmd)) + } + for _, h := range rspec.Hooks.Poststart { + cmd := createCommandHook(h) + config.Hooks.Poststart = append(config.Hooks.Poststart, configs.NewCommandHook(cmd)) + } + for _, h := range rspec.Hooks.Poststop { + cmd := createCommandHook(h) + config.Hooks.Poststop = append(config.Hooks.Poststop, configs.NewCommandHook(cmd)) + } + } +} + +func createCommandHook(h specs.Hook) configs.Command { + cmd := configs.Command{ + Path: h.Path, + Args: h.Args, + Env: h.Env, + } + if h.Timeout != nil { + d := time.Duration(*h.Timeout) * time.Second + cmd.Timeout = &d + } + return cmd +} diff --git a/libcontainer/specconv/spec_linux_test.go b/libcontainer/specconv/spec_linux_test.go new file mode 100644 index 0000000..da6a43a --- /dev/null +++ b/libcontainer/specconv/spec_linux_test.go @@ -0,0 +1,452 @@ +// +build linux + +package specconv + +import ( + "os" + "strings" + "testing" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/configs/validate" + "github.com/opencontainers/runtime-spec/specs-go" +) + +func TestCreateCommandHookTimeout(t *testing.T) { + timeout := 3600 + hook := specs.Hook{ + Path: "/some/hook/path", + Args: []string{"--some", "thing"}, + Env: []string{"SOME=value"}, + Timeout: &timeout, + } + command := createCommandHook(hook) + timeoutStr := command.Timeout.String() + if timeoutStr != "1h0m0s" { + t.Errorf("Expected the Timeout to be 1h0m0s, got: %s", timeoutStr) + } +} + +func TestCreateHooks(t *testing.T) { + rspec := &specs.Spec{ + Hooks: &specs.Hooks{ + Prestart: []specs.Hook{ + { + Path: "/some/hook/path", + }, + { + Path: "/some/hook2/path", + Args: []string{"--some", "thing"}, + }, + }, + Poststart: []specs.Hook{ + { + Path: "/some/hook/path", + Args: []string{"--some", "thing"}, + Env: []string{"SOME=value"}, + }, + { + Path: "/some/hook2/path", + }, + { + Path: "/some/hook3/path", + }, + }, + Poststop: []specs.Hook{ + { + Path: "/some/hook/path", + Args: []string{"--some", "thing"}, + Env: []string{"SOME=value"}, + }, + { + Path: "/some/hook2/path", + }, + { + Path: "/some/hook3/path", + }, + { + Path: "/some/hook4/path", + Args: []string{"--some", "thing"}, + }, + }, + }, + } + conf := &configs.Config{} + createHooks(rspec, conf) + + prestart := conf.Hooks.Prestart + + if len(prestart) != 2 { + t.Error("Expected 2 Prestart hooks") + } + + poststart := conf.Hooks.Poststart + + if len(poststart) != 3 { + t.Error("Expected 3 Poststart hooks") + } + + poststop := conf.Hooks.Poststop + + if len(poststop) != 4 { + t.Error("Expected 4 Poststop hooks") + } + +} +func TestSetupSeccomp(t *testing.T) { + conf := &specs.LinuxSeccomp{ + DefaultAction: "SCMP_ACT_ERRNO", + Architectures: []specs.Arch{specs.ArchX86_64, specs.ArchARM}, + Syscalls: []specs.LinuxSyscall{ + { + Names: []string{"clone"}, + Action: "SCMP_ACT_ALLOW", + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP, + ValueTwo: 0, + Op: "SCMP_CMP_MASKED_EQ", + }, + }, + }, + { + Names: []string{ + "select", + "semctl", + "semget", + "semop", + "semtimedop", + "send", + "sendfile", + }, + Action: "SCMP_ACT_ALLOW", + }, + }, + } + seccomp, err := SetupSeccomp(conf) + + if err != nil { + t.Errorf("Couldn't create Seccomp config: %v", err) + } + + if seccomp.DefaultAction != 2 { // SCMP_ACT_ERRNO + t.Error("Wrong conversion for DefaultAction") + } + + if len(seccomp.Architectures) != 2 { + t.Error("Wrong number of architectures") + } + + if seccomp.Architectures[0] != "amd64" || seccomp.Architectures[1] != "arm" { + t.Error("Expected architectures are not found") + } + + calls := seccomp.Syscalls + + callsLength := len(calls) + if callsLength != 8 { + t.Errorf("Expected 8 syscalls, got :%d", callsLength) + } + + for i, call := range calls { + if i == 0 { + expectedCloneSyscallArgs := configs.Arg{ + Index: 0, + Op: 7, // SCMP_CMP_MASKED_EQ + Value: unix.CLONE_NEWNS | unix.CLONE_NEWUTS | unix.CLONE_NEWIPC | unix.CLONE_NEWUSER | unix.CLONE_NEWPID | unix.CLONE_NEWNET | unix.CLONE_NEWCGROUP, + ValueTwo: 0, + } + if expectedCloneSyscallArgs != *call.Args[0] { + t.Errorf("Wrong arguments conversion for the clone syscall under test") + } + } + if call.Action != 4 { + t.Error("Wrong conversion for the clone syscall action") + } + + } + +} + +func TestLinuxCgroupWithMemoryResource(t *testing.T) { + cgroupsPath := "/user/cgroups/path/id" + + spec := &specs.Spec{} + devices := []specs.LinuxDeviceCgroup{ + { + Allow: false, + Access: "rwm", + }, + } + + limit := int64(100) + reservation := int64(50) + swap := int64(20) + kernel := int64(40) + kernelTCP := int64(45) + swappiness := uint64(1) + swappinessPtr := &swappiness + disableOOMKiller := true + resources := &specs.LinuxResources{ + Devices: devices, + Memory: &specs.LinuxMemory{ + Limit: &limit, + Reservation: &reservation, + Swap: &swap, + Kernel: &kernel, + KernelTCP: &kernelTCP, + Swappiness: swappinessPtr, + DisableOOMKiller: &disableOOMKiller, + }, + } + spec.Linux = &specs.Linux{ + CgroupsPath: cgroupsPath, + Resources: resources, + } + + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + } + + cgroup, err := CreateCgroupConfig(opts) + if err != nil { + t.Errorf("Couldn't create Cgroup config: %v", err) + } + + if cgroup.Path != cgroupsPath { + t.Errorf("Wrong cgroupsPath, expected '%s' got '%s'", cgroupsPath, cgroup.Path) + } + if cgroup.Resources.Memory != limit { + t.Errorf("Expected to have %d as memory limit, got %d", limit, cgroup.Resources.Memory) + } + if cgroup.Resources.MemoryReservation != reservation { + t.Errorf("Expected to have %d as memory reservation, got %d", reservation, cgroup.Resources.MemoryReservation) + } + if cgroup.Resources.MemorySwap != swap { + t.Errorf("Expected to have %d as swap, got %d", swap, cgroup.Resources.MemorySwap) + } + if cgroup.Resources.KernelMemory != kernel { + t.Errorf("Expected to have %d as Kernel Memory, got %d", kernel, cgroup.Resources.KernelMemory) + } + if cgroup.Resources.KernelMemoryTCP != kernelTCP { + t.Errorf("Expected to have %d as TCP Kernel Memory, got %d", kernelTCP, cgroup.Resources.KernelMemoryTCP) + } + if cgroup.Resources.MemorySwappiness != swappinessPtr { + t.Errorf("Expected to have %d as memory swappiness, got %d", swappinessPtr, cgroup.Resources.MemorySwappiness) + } + if cgroup.Resources.OomKillDisable != disableOOMKiller { + t.Errorf("The OOMKiller should be enabled") + } +} + +func TestLinuxCgroupSystemd(t *testing.T) { + cgroupsPath := "parent:scopeprefix:name" + + spec := &specs.Spec{} + spec.Linux = &specs.Linux{ + CgroupsPath: cgroupsPath, + } + + opts := &CreateOpts{ + UseSystemdCgroup: true, + Spec: spec, + } + + cgroup, err := CreateCgroupConfig(opts) + + if err != nil { + t.Errorf("Couldn't create Cgroup config: %v", err) + } + + expectedParent := "parent" + if cgroup.Parent != expectedParent { + t.Errorf("Expected to have %s as Parent instead of %s", expectedParent, cgroup.Parent) + } + + expectedScopePrefix := "scopeprefix" + if cgroup.ScopePrefix != expectedScopePrefix { + t.Errorf("Expected to have %s as ScopePrefix instead of %s", expectedScopePrefix, cgroup.ScopePrefix) + } + + expectedName := "name" + if cgroup.Name != expectedName { + t.Errorf("Expected to have %s as Name instead of %s", expectedName, cgroup.Name) + } +} + +func TestLinuxCgroupSystemdWithEmptyPath(t *testing.T) { + cgroupsPath := "" + + spec := &specs.Spec{} + spec.Linux = &specs.Linux{ + CgroupsPath: cgroupsPath, + } + + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: true, + Spec: spec, + } + + cgroup, err := CreateCgroupConfig(opts) + + if err != nil { + t.Errorf("Couldn't create Cgroup config: %v", err) + } + + expectedParent := "system.slice" + if cgroup.Parent != expectedParent { + t.Errorf("Expected to have %s as Parent instead of %s", expectedParent, cgroup.Parent) + } + + expectedScopePrefix := "runc" + if cgroup.ScopePrefix != expectedScopePrefix { + t.Errorf("Expected to have %s as ScopePrefix instead of %s", expectedScopePrefix, cgroup.ScopePrefix) + } + + if cgroup.Name != opts.CgroupName { + t.Errorf("Expected to have %s as Name instead of %s", opts.CgroupName, cgroup.Name) + } +} + +func TestLinuxCgroupSystemdWithInvalidPath(t *testing.T) { + cgroupsPath := "/user/cgroups/path/id" + + spec := &specs.Spec{} + spec.Linux = &specs.Linux{ + CgroupsPath: cgroupsPath, + } + + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: true, + Spec: spec, + } + + _, err := CreateCgroupConfig(opts) + if err == nil { + t.Error("Expected to produce an error if not using the correct format for cgroup paths belonging to systemd") + } +} +func TestLinuxCgroupsPathSpecified(t *testing.T) { + cgroupsPath := "/user/cgroups/path/id" + + spec := &specs.Spec{} + spec.Linux = &specs.Linux{ + CgroupsPath: cgroupsPath, + } + + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + } + + cgroup, err := CreateCgroupConfig(opts) + if err != nil { + t.Errorf("Couldn't create Cgroup config: %v", err) + } + + if cgroup.Path != cgroupsPath { + t.Errorf("Wrong cgroupsPath, expected '%s' got '%s'", cgroupsPath, cgroup.Path) + } +} + +func TestLinuxCgroupsPathNotSpecified(t *testing.T) { + spec := &specs.Spec{} + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + } + + cgroup, err := CreateCgroupConfig(opts) + if err != nil { + t.Errorf("Couldn't create Cgroup config: %v", err) + } + + if cgroup.Path != "" { + t.Errorf("Wrong cgroupsPath, expected it to be empty string, got '%s'", cgroup.Path) + } +} + +func TestSpecconvExampleValidate(t *testing.T) { + spec := Example() + spec.Root.Path = "/" + + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + } + + config, err := CreateLibcontainerConfig(opts) + if err != nil { + t.Errorf("Couldn't create libcontainer config: %v", err) + } + + validator := validate.New() + if err := validator.Validate(config); err != nil { + t.Errorf("Expected specconv to produce valid container config: %v", err) + } +} + +func TestDupNamespaces(t *testing.T) { + spec := &specs.Spec{ + Root: &specs.Root{ + Path: "rootfs", + }, + Linux: &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + }, + { + Type: "pid", + Path: "/proc/1/ns/pid", + }, + }, + }, + } + + _, err := CreateLibcontainerConfig(&CreateOpts{ + Spec: spec, + }) + + if !strings.Contains(err.Error(), "malformed spec file: duplicated ns") { + t.Errorf("Duplicated namespaces should be forbidden") + } +} + +func TestNonZeroEUIDCompatibleSpecconvValidate(t *testing.T) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + t.Skip("userns is unsupported") + } + + spec := Example() + spec.Root.Path = "/" + ToRootless(spec) + + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + RootlessEUID: true, + RootlessCgroups: true, + } + + config, err := CreateLibcontainerConfig(opts) + if err != nil { + t.Errorf("Couldn't create libcontainer config: %v", err) + } + + validator := validate.New() + if err := validator.Validate(config); err != nil { + t.Errorf("Expected specconv to produce valid rootless container config: %v", err) + } +} diff --git a/libcontainer/stacktrace/capture.go b/libcontainer/stacktrace/capture.go new file mode 100644 index 0000000..0bbe149 --- /dev/null +++ b/libcontainer/stacktrace/capture.go @@ -0,0 +1,27 @@ +package stacktrace + +import "runtime" + +// Capture captures a stacktrace for the current calling go program +// +// skip is the number of frames to skip +func Capture(userSkip int) Stacktrace { + var ( + skip = userSkip + 1 // add one for our own function + frames []Frame + prevPc uintptr + ) + for i := skip; ; i++ { + pc, file, line, ok := runtime.Caller(i) + //detect if caller is repeated to avoid loop, gccgo + //currently runs into a loop without this check + if !ok || pc == prevPc { + break + } + frames = append(frames, NewFrame(pc, file, line)) + prevPc = pc + } + return Stacktrace{ + Frames: frames, + } +} diff --git a/libcontainer/stacktrace/capture_test.go b/libcontainer/stacktrace/capture_test.go new file mode 100644 index 0000000..978f6c4 --- /dev/null +++ b/libcontainer/stacktrace/capture_test.go @@ -0,0 +1,31 @@ +package stacktrace + +import ( + "strings" + "testing" +) + +func captureFunc() Stacktrace { + return Capture(0) +} + +func TestCaptureTestFunc(t *testing.T) { + stack := captureFunc() + + if len(stack.Frames) == 0 { + t.Fatal("expected stack frames to be returned") + } + + // the first frame is the caller + frame := stack.Frames[0] + if expected := "captureFunc"; frame.Function != expected { + t.Fatalf("expected function %q but received %q", expected, frame.Function) + } + expected := "/runc/libcontainer/stacktrace" + if !strings.HasSuffix(frame.Package, expected) { + t.Fatalf("expected package %q but received %q", expected, frame.Package) + } + if expected := "capture_test.go"; frame.File != expected { + t.Fatalf("expected file %q but received %q", expected, frame.File) + } +} diff --git a/libcontainer/stacktrace/frame.go b/libcontainer/stacktrace/frame.go new file mode 100644 index 0000000..0d590d9 --- /dev/null +++ b/libcontainer/stacktrace/frame.go @@ -0,0 +1,38 @@ +package stacktrace + +import ( + "path/filepath" + "runtime" + "strings" +) + +// NewFrame returns a new stack frame for the provided information +func NewFrame(pc uintptr, file string, line int) Frame { + fn := runtime.FuncForPC(pc) + if fn == nil { + return Frame{} + } + pack, name := parseFunctionName(fn.Name()) + return Frame{ + Line: line, + File: filepath.Base(file), + Package: pack, + Function: name, + } +} + +func parseFunctionName(name string) (string, string) { + i := strings.LastIndex(name, ".") + if i == -1 { + return "", name + } + return name[:i], name[i+1:] +} + +// Frame contains all the information for a stack frame within a go program +type Frame struct { + File string + Function string + Package string + Line int +} diff --git a/libcontainer/stacktrace/frame_test.go b/libcontainer/stacktrace/frame_test.go new file mode 100644 index 0000000..c6fc78e --- /dev/null +++ b/libcontainer/stacktrace/frame_test.go @@ -0,0 +1,20 @@ +package stacktrace + +import "testing" + +func TestParsePackageName(t *testing.T) { + var ( + name = "github.com/opencontainers/runc/libcontainer/stacktrace.captureFunc" + expectedPackage = "github.com/opencontainers/runc/libcontainer/stacktrace" + expectedFunction = "captureFunc" + ) + + pack, funcName := parseFunctionName(name) + if pack != expectedPackage { + t.Fatalf("expected package %q but received %q", expectedPackage, pack) + } + + if funcName != expectedFunction { + t.Fatalf("expected function %q but received %q", expectedFunction, funcName) + } +} diff --git a/libcontainer/stacktrace/stacktrace.go b/libcontainer/stacktrace/stacktrace.go new file mode 100644 index 0000000..5e8b58d --- /dev/null +++ b/libcontainer/stacktrace/stacktrace.go @@ -0,0 +1,5 @@ +package stacktrace + +type Stacktrace struct { + Frames []Frame +} diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go new file mode 100644 index 0000000..4e03b8b --- /dev/null +++ b/libcontainer/standard_init_linux.go @@ -0,0 +1,214 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "os" + "os/exec" + "runtime" + "syscall" //only for Exec + + "github.com/opencontainers/runc/libcontainer/apparmor" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/keys" + "github.com/opencontainers/runc/libcontainer/seccomp" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/pkg/errors" + + "golang.org/x/sys/unix" +) + +type linuxStandardInit struct { + pipe *os.File + consoleSocket *os.File + parentPid int + fifoFd int + config *initConfig +} + +func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) { + var newperms uint32 + + if l.config.Config.Namespaces.Contains(configs.NEWUSER) { + // With user ns we need 'other' search permissions. + newperms = 0x8 + } else { + // Without user ns we need 'UID' search permissions. + newperms = 0x80000 + } + + // Create a unique per session container name that we can join in setns; + // However, other containers can also join it. + return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms +} + +func (l *linuxStandardInit) Init() error { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + if !l.config.Config.NoNewKeyring { + if err := label.SetKeyLabel(l.config.ProcessLabel); err != nil { + return err + } + defer label.SetKeyLabel("") + ringname, keepperms, newperms := l.getSessionRingParams() + + // Do not inherit the parent's session keyring. + if sessKeyId, err := keys.JoinSessionKeyring(ringname); err != nil { + // If keyrings aren't supported then it is likely we are on an + // older kernel (or inside an LXC container). While we could bail, + // the security feature we are using here is best-effort (it only + // really provides marginal protection since VFS credentials are + // the only significant protection of keyrings). + // + // TODO(cyphar): Log this so people know what's going on, once we + // have proper logging in 'runc init'. + if errors.Cause(err) != unix.ENOSYS { + return errors.Wrap(err, "join session keyring") + } + } else { + // Make session keyring searcheable. If we've gotten this far we + // bail on any error -- we don't want to have a keyring with bad + // permissions. + if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil { + return errors.Wrap(err, "mod keyring permissions") + } + } + } + + if err := setupNetwork(l.config); err != nil { + return err + } + if err := setupRoute(l.config.Config); err != nil { + return err + } + + label.Init() + if err := prepareRootfs(l.pipe, l.config); err != nil { + return err + } + // Set up the console. This has to be done *before* we finalize the rootfs, + // but *after* we've given the user the chance to set up all of the mounts + // they wanted. + if l.config.CreateConsole { + if err := setupConsole(l.consoleSocket, l.config, true); err != nil { + return err + } + if err := system.Setctty(); err != nil { + return errors.Wrap(err, "setctty") + } + } + + // Finish the rootfs setup. + if l.config.Config.Namespaces.Contains(configs.NEWNS) { + if err := finalizeRootfs(l.config.Config); err != nil { + return err + } + } + + if hostname := l.config.Config.Hostname; hostname != "" { + if err := unix.Sethostname([]byte(hostname)); err != nil { + return errors.Wrap(err, "sethostname") + } + } + if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { + return errors.Wrap(err, "apply apparmor profile") + } + + for key, value := range l.config.Config.Sysctl { + if err := writeSystemProperty(key, value); err != nil { + return errors.Wrapf(err, "write sysctl key %s", key) + } + } + for _, path := range l.config.Config.ReadonlyPaths { + if err := readonlyPath(path); err != nil { + return errors.Wrapf(err, "readonly path %s", path) + } + } + for _, path := range l.config.Config.MaskPaths { + if err := maskPath(path, l.config.Config.MountLabel); err != nil { + return errors.Wrapf(err, "mask path %s", path) + } + } + pdeath, err := system.GetParentDeathSignal() + if err != nil { + return errors.Wrap(err, "get pdeath signal") + } + if l.config.NoNewPrivileges { + if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { + return errors.Wrap(err, "set nonewprivileges") + } + } + // Tell our parent that we're ready to Execv. This must be done before the + // Seccomp rules have been applied, because we need to be able to read and + // write to a socket. + if err := syncParentReady(l.pipe); err != nil { + return errors.Wrap(err, "sync ready") + } + if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { + return errors.Wrap(err, "set process label") + } + defer label.SetProcessLabel("") + // Without NoNewPrivileges seccomp is a privileged operation, so we need to + // do this before dropping capabilities; otherwise do it as late as possible + // just before execve so as few syscalls take place after it as possible. + if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return err + } + } + if err := finalizeNamespace(l.config); err != nil { + return err + } + // finalizeNamespace can change user/group which clears the parent death + // signal, so we restore it here. + if err := pdeath.Restore(); err != nil { + return errors.Wrap(err, "restore pdeath signal") + } + // Compare the parent from the initial start of the init process and make + // sure that it did not change. if the parent changes that means it died + // and we were reparented to something else so we should just kill ourself + // and not cause problems for someone else. + if unix.Getppid() != l.parentPid { + return unix.Kill(unix.Getpid(), unix.SIGKILL) + } + // Check for the arg before waiting to make sure it exists and it is + // returned as a create time error. + name, err := exec.LookPath(l.config.Args[0]) + if err != nil { + return err + } + // Close the pipe to signal that we have completed our init. + l.pipe.Close() + // Wait for the FIFO to be opened on the other side before exec-ing the + // user process. We open it through /proc/self/fd/$fd, because the fd that + // was given to us was an O_PATH fd to the fifo itself. Linux allows us to + // re-open an O_PATH fd through /proc. + fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0) + if err != nil { + return newSystemErrorWithCause(err, "open exec fifo") + } + if _, err := unix.Write(fd, []byte("0")); err != nil { + return newSystemErrorWithCause(err, "write 0 exec fifo") + } + // Close the O_PATH fifofd fd before exec because the kernel resets + // dumpable in the wrong order. This has been fixed in newer kernels, but + // we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels. + // N.B. the core issue itself (passing dirfds to the host filesystem) has + // since been resolved. + // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318 + unix.Close(l.fifoFd) + // Set seccomp as close to execve as possible, so as few syscalls take + // place afterward (reducing the amount of syscalls that users need to + // enable in their seccomp profiles). + if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return newSystemErrorWithCause(err, "init seccomp") + } + } + if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil { + return newSystemErrorWithCause(err, "exec user process") + } + return nil +} diff --git a/libcontainer/state_linux.go b/libcontainer/state_linux.go new file mode 100644 index 0000000..5c16a42 --- /dev/null +++ b/libcontainer/state_linux.go @@ -0,0 +1,251 @@ +// +build linux + +package libcontainer + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/opencontainers/runc/libcontainer/configs" + + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +func newStateTransitionError(from, to containerState) error { + return &stateTransitionError{ + From: from.status().String(), + To: to.status().String(), + } +} + +// stateTransitionError is returned when an invalid state transition happens from one +// state to another. +type stateTransitionError struct { + From string + To string +} + +func (s *stateTransitionError) Error() string { + return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To) +} + +type containerState interface { + transition(containerState) error + destroy() error + status() Status +} + +func destroy(c *linuxContainer) error { + if !c.config.Namespaces.Contains(configs.NEWPID) { + if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil { + logrus.Warn(err) + } + } + err := c.cgroupManager.Destroy() + if c.intelRdtManager != nil { + if ierr := c.intelRdtManager.Destroy(); err == nil { + err = ierr + } + } + if rerr := os.RemoveAll(c.root); err == nil { + err = rerr + } + c.initProcess = nil + if herr := runPoststopHooks(c); err == nil { + err = herr + } + c.state = &stoppedState{c: c} + return err +} + +func runPoststopHooks(c *linuxContainer) error { + if c.config.Hooks != nil { + s, err := c.currentOCIState() + if err != nil { + return err + } + for _, hook := range c.config.Hooks.Poststop { + if err := hook.Run(s); err != nil { + return err + } + } + } + return nil +} + +// stoppedState represents a container is a stopped/destroyed state. +type stoppedState struct { + c *linuxContainer +} + +func (b *stoppedState) status() Status { + return Stopped +} + +func (b *stoppedState) transition(s containerState) error { + switch s.(type) { + case *runningState, *restoredState: + b.c.state = s + return nil + case *stoppedState: + return nil + } + return newStateTransitionError(b, s) +} + +func (b *stoppedState) destroy() error { + return destroy(b.c) +} + +// runningState represents a container that is currently running. +type runningState struct { + c *linuxContainer +} + +func (r *runningState) status() Status { + return Running +} + +func (r *runningState) transition(s containerState) error { + switch s.(type) { + case *stoppedState: + t, err := r.c.runType() + if err != nil { + return err + } + if t == Running { + return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped) + } + r.c.state = s + return nil + case *pausedState: + r.c.state = s + return nil + case *runningState: + return nil + } + return newStateTransitionError(r, s) +} + +func (r *runningState) destroy() error { + t, err := r.c.runType() + if err != nil { + return err + } + if t == Running { + return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped) + } + return destroy(r.c) +} + +type createdState struct { + c *linuxContainer +} + +func (i *createdState) status() Status { + return Created +} + +func (i *createdState) transition(s containerState) error { + switch s.(type) { + case *runningState, *pausedState, *stoppedState: + i.c.state = s + return nil + case *createdState: + return nil + } + return newStateTransitionError(i, s) +} + +func (i *createdState) destroy() error { + i.c.initProcess.signal(unix.SIGKILL) + return destroy(i.c) +} + +// pausedState represents a container that is currently pause. It cannot be destroyed in a +// paused state and must transition back to running first. +type pausedState struct { + c *linuxContainer +} + +func (p *pausedState) status() Status { + return Paused +} + +func (p *pausedState) transition(s containerState) error { + switch s.(type) { + case *runningState, *stoppedState: + p.c.state = s + return nil + case *pausedState: + return nil + } + return newStateTransitionError(p, s) +} + +func (p *pausedState) destroy() error { + t, err := p.c.runType() + if err != nil { + return err + } + if t != Running && t != Created { + if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil { + return err + } + return destroy(p.c) + } + return newGenericError(fmt.Errorf("container is paused"), ContainerPaused) +} + +// restoredState is the same as the running state but also has associated checkpoint +// information that maybe need destroyed when the container is stopped and destroy is called. +type restoredState struct { + imageDir string + c *linuxContainer +} + +func (r *restoredState) status() Status { + return Running +} + +func (r *restoredState) transition(s containerState) error { + switch s.(type) { + case *stoppedState, *runningState: + return nil + } + return newStateTransitionError(r, s) +} + +func (r *restoredState) destroy() error { + if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil { + if !os.IsNotExist(err) { + return err + } + } + return destroy(r.c) +} + +// loadedState is used whenever a container is restored, loaded, or setting additional +// processes inside and it should not be destroyed when it is exiting. +type loadedState struct { + c *linuxContainer + s Status +} + +func (n *loadedState) status() Status { + return n.s +} + +func (n *loadedState) transition(s containerState) error { + n.c.state = s + return nil +} + +func (n *loadedState) destroy() error { + if err := n.c.refreshState(); err != nil { + return err + } + return n.c.state.destroy() +} diff --git a/libcontainer/state_linux_test.go b/libcontainer/state_linux_test.go new file mode 100644 index 0000000..6ef516b --- /dev/null +++ b/libcontainer/state_linux_test.go @@ -0,0 +1,116 @@ +// +build linux + +package libcontainer + +import ( + "reflect" + "testing" +) + +var states = map[containerState]Status{ + &createdState{}: Created, + &runningState{}: Running, + &restoredState{}: Running, + &pausedState{}: Paused, + &stoppedState{}: Stopped, + &loadedState{s: Running}: Running, +} + +func TestStateStatus(t *testing.T) { + for s, status := range states { + if s.status() != status { + t.Fatalf("state returned %s but expected %s", s.status(), status) + } + } +} + +func isStateTransitionError(err error) bool { + _, ok := err.(*stateTransitionError) + return ok +} + +func testTransitions(t *testing.T, initialState containerState, valid []containerState) { + validMap := map[reflect.Type]interface{}{} + for _, validState := range valid { + validMap[reflect.TypeOf(validState)] = nil + t.Run(validState.status().String(), func(t *testing.T) { + if err := initialState.transition(validState); err != nil { + t.Fatal(err) + } + }) + } + for state := range states { + if _, ok := validMap[reflect.TypeOf(state)]; ok { + continue + } + t.Run(state.status().String(), func(t *testing.T) { + err := initialState.transition(state) + if err == nil { + t.Fatal("transition should fail") + } + if !isStateTransitionError(err) { + t.Fatal("expected stateTransitionError") + } + }) + } +} + +func TestStoppedStateTransition(t *testing.T) { + testTransitions( + t, + &stoppedState{c: &linuxContainer{}}, + []containerState{ + &stoppedState{}, + &runningState{}, + &restoredState{}, + }, + ) +} + +func TestPausedStateTransition(t *testing.T) { + testTransitions( + t, + &pausedState{c: &linuxContainer{}}, + []containerState{ + &pausedState{}, + &runningState{}, + &stoppedState{}, + }, + ) +} + +func TestRestoredStateTransition(t *testing.T) { + testTransitions( + t, + &restoredState{c: &linuxContainer{}}, + []containerState{ + &stoppedState{}, + &runningState{}, + }, + ) +} + +func TestRunningStateTransition(t *testing.T) { + testTransitions( + t, + &runningState{c: &linuxContainer{}}, + []containerState{ + &stoppedState{}, + &pausedState{}, + &runningState{}, + }, + ) +} + +func TestCreatedStateTransition(t *testing.T) { + testTransitions( + t, + &createdState{c: &linuxContainer{}}, + []containerState{ + &stoppedState{}, + &pausedState{}, + &runningState{}, + &createdState{}, + }, + ) +} diff --git a/libcontainer/stats_linux.go b/libcontainer/stats_linux.go new file mode 100644 index 0000000..fff9dd3 --- /dev/null +++ b/libcontainer/stats_linux.go @@ -0,0 +1,13 @@ +package libcontainer + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/types" +) + +type Stats struct { + Interfaces []*types.NetworkInterface + CgroupStats *cgroups.Stats + IntelRdtStats *intelrdt.Stats +} diff --git a/libcontainer/sync.go b/libcontainer/sync.go new file mode 100644 index 0000000..a8704a2 --- /dev/null +++ b/libcontainer/sync.go @@ -0,0 +1,104 @@ +package libcontainer + +import ( + "encoding/json" + "fmt" + "io" + + "github.com/opencontainers/runc/libcontainer/utils" +) + +type syncType string + +// Constants that are used for synchronisation between the parent and child +// during container setup. They come in pairs (with procError being a generic +// response which is followed by a &genericError). +// +// [ child ] <-> [ parent ] +// +// procHooks --> [run hooks] +// <-- procResume +// +// procConsole --> +// <-- procConsoleReq +// [send(fd)] --> [recv(fd)] +// <-- procConsoleAck +// +// procReady --> [final setup] +// <-- procRun +const ( + procError syncType = "procError" + procReady syncType = "procReady" + procRun syncType = "procRun" + procHooks syncType = "procHooks" + procResume syncType = "procResume" +) + +type syncT struct { + Type syncType `json:"type"` +} + +// writeSync is used to write to a synchronisation pipe. An error is returned +// if there was a problem writing the payload. +func writeSync(pipe io.Writer, sync syncType) error { + return utils.WriteJSON(pipe, syncT{sync}) +} + +// readSync is used to read from a synchronisation pipe. An error is returned +// if we got a genericError, the pipe was closed, or we got an unexpected flag. +func readSync(pipe io.Reader, expected syncType) error { + var procSync syncT + if err := json.NewDecoder(pipe).Decode(&procSync); err != nil { + if err == io.EOF { + return fmt.Errorf("parent closed synchronisation channel") + } + + if procSync.Type == procError { + var ierr genericError + + if err := json.NewDecoder(pipe).Decode(&ierr); err != nil { + return fmt.Errorf("failed reading error from parent: %v", err) + } + + return &ierr + } + + if procSync.Type != expected { + return fmt.Errorf("invalid synchronisation flag from parent") + } + } + return nil +} + +// parseSync runs the given callback function on each syncT received from the +// child. It will return once io.EOF is returned from the given pipe. +func parseSync(pipe io.Reader, fn func(*syncT) error) error { + dec := json.NewDecoder(pipe) + for { + var sync syncT + if err := dec.Decode(&sync); err != nil { + if err == io.EOF { + break + } + return err + } + + // We handle this case outside fn for cleanliness reasons. + var ierr *genericError + if sync.Type == procError { + if err := dec.Decode(&ierr); err != nil && err != io.EOF { + return newSystemErrorWithCause(err, "decoding proc error from init") + } + if ierr != nil { + return ierr + } + // Programmer error. + panic("No error following JSON procError payload.") + } + + if err := fn(&sync); err != nil { + return err + } + } + return nil +} diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go new file mode 100644 index 0000000..a4ae890 --- /dev/null +++ b/libcontainer/system/linux.go @@ -0,0 +1,155 @@ +// +build linux + +package system + +import ( + "os" + "os/exec" + "syscall" // only for exec + "unsafe" + + "github.com/opencontainers/runc/libcontainer/user" + "golang.org/x/sys/unix" +) + +// If arg2 is nonzero, set the "child subreaper" attribute of the +// calling process; if arg2 is zero, unset the attribute. When a +// process is marked as a child subreaper, all of the children +// that it creates, and their descendants, will be marked as +// having a subreaper. In effect, a subreaper fulfills the role +// of init(1) for its descendant processes. Upon termination of +// a process that is orphaned (i.e., its immediate parent has +// already terminated) and marked as having a subreaper, the +// nearest still living ancestor subreaper will receive a SIGCHLD +// signal and be able to wait(2) on the process to discover its +// termination status. +const PR_SET_CHILD_SUBREAPER = 36 + +type ParentDeathSignal int + +func (p ParentDeathSignal) Restore() error { + if p == 0 { + return nil + } + current, err := GetParentDeathSignal() + if err != nil { + return err + } + if p == current { + return nil + } + return p.Set() +} + +func (p ParentDeathSignal) Set() error { + return SetParentDeathSignal(uintptr(p)) +} + +func Execv(cmd string, args []string, env []string) error { + name, err := exec.LookPath(cmd) + if err != nil { + return err + } + + return syscall.Exec(name, args, env) +} + +func Prlimit(pid, resource int, limit unix.Rlimit) error { + _, _, err := unix.RawSyscall6(unix.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0) + if err != 0 { + return err + } + return nil +} + +func SetParentDeathSignal(sig uintptr) error { + if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil { + return err + } + return nil +} + +func GetParentDeathSignal() (ParentDeathSignal, error) { + var sig int + if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil { + return -1, err + } + return ParentDeathSignal(sig), nil +} + +func SetKeepCaps() error { + if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil { + return err + } + + return nil +} + +func ClearKeepCaps() error { + if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil { + return err + } + + return nil +} + +func Setctty() error { + if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil { + return err + } + return nil +} + +// RunningInUserNS detects whether we are currently running in a user namespace. +// Originally copied from github.com/lxc/lxd/shared/util.go +func RunningInUserNS() bool { + uidmap, err := user.CurrentProcessUIDMap() + if err != nil { + // This kernel-provided file only exists if user namespaces are supported + return false + } + return UIDMapInUserNS(uidmap) +} + +func UIDMapInUserNS(uidmap []user.IDMap) bool { + /* + * We assume we are in the initial user namespace if we have a full + * range - 4294967295 uids starting at uid 0. + */ + if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 { + return false + } + return true +} + +// GetParentNSeuid returns the euid within the parent user namespace +func GetParentNSeuid() int64 { + euid := int64(os.Geteuid()) + uidmap, err := user.CurrentProcessUIDMap() + if err != nil { + // This kernel-provided file only exists if user namespaces are supported + return euid + } + for _, um := range uidmap { + if um.ID <= euid && euid <= um.ID+um.Count-1 { + return um.ParentID + euid - um.ID + } + } + return euid +} + +// SetSubreaper sets the value i as the subreaper setting for the calling process +func SetSubreaper(i int) error { + return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) +} + +// GetSubreaper returns the subreaper setting for the calling process +func GetSubreaper() (int, error) { + var i uintptr + + if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil { + return -1, err + } + + return int(i), nil +} diff --git a/libcontainer/system/linux_test.go b/libcontainer/system/linux_test.go new file mode 100644 index 0000000..4d613d8 --- /dev/null +++ b/libcontainer/system/linux_test.go @@ -0,0 +1,45 @@ +// +build linux + +package system + +import ( + "strings" + "testing" + + "github.com/opencontainers/runc/libcontainer/user" +) + +func TestUIDMapInUserNS(t *testing.T) { + cases := []struct { + s string + expected bool + }{ + { + s: " 0 0 4294967295\n", + expected: false, + }, + { + s: " 0 0 1\n", + expected: true, + }, + { + s: " 0 1001 1\n 1 231072 65536\n", + expected: true, + }, + { + // file exist but empty (the initial state when userns is created. see man 7 user_namespaces) + s: "", + expected: true, + }, + } + for _, c := range cases { + uidmap, err := user.ParseIDMap(strings.NewReader(c.s)) + if err != nil { + t.Fatal(err) + } + actual := UIDMapInUserNS(uidmap) + if c.expected != actual { + t.Fatalf("expected %v, got %v for %q", c.expected, actual, c.s) + } + } +} diff --git a/libcontainer/system/proc.go b/libcontainer/system/proc.go new file mode 100644 index 0000000..79232a4 --- /dev/null +++ b/libcontainer/system/proc.go @@ -0,0 +1,113 @@ +package system + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +// State is the status of a process. +type State rune + +const ( // Only values for Linux 3.14 and later are listed here + Dead State = 'X' + DiskSleep State = 'D' + Running State = 'R' + Sleeping State = 'S' + Stopped State = 'T' + TracingStop State = 't' + Zombie State = 'Z' +) + +// String forms of the state from proc(5)'s documentation for +// /proc/[pid]/status' "State" field. +func (s State) String() string { + switch s { + case Dead: + return "dead" + case DiskSleep: + return "disk sleep" + case Running: + return "running" + case Sleeping: + return "sleeping" + case Stopped: + return "stopped" + case TracingStop: + return "tracing stop" + case Zombie: + return "zombie" + default: + return fmt.Sprintf("unknown (%c)", s) + } +} + +// Stat_t represents the information from /proc/[pid]/stat, as +// described in proc(5) with names based on the /proc/[pid]/status +// fields. +type Stat_t struct { + // PID is the process ID. + PID uint + + // Name is the command run by the process. + Name string + + // State is the state of the process. + State State + + // StartTime is the number of clock ticks after system boot (since + // Linux 2.6). + StartTime uint64 +} + +// Stat returns a Stat_t instance for the specified process. +func Stat(pid int) (stat Stat_t, err error) { + bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) + if err != nil { + return stat, err + } + return parseStat(string(bytes)) +} + +// GetProcessStartTime is deprecated. Use Stat(pid) and +// Stat_t.StartTime instead. +func GetProcessStartTime(pid int) (string, error) { + stat, err := Stat(pid) + if err != nil { + return "", err + } + return fmt.Sprintf("%d", stat.StartTime), nil +} + +func parseStat(data string) (stat Stat_t, err error) { + // From proc(5), field 2 could contain space and is inside `(` and `)`. + // The following is an example: + // 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + i := strings.LastIndex(data, ")") + if i <= 2 || i >= len(data)-1 { + return stat, fmt.Errorf("invalid stat data: %q", data) + } + + parts := strings.SplitN(data[:i], "(", 2) + if len(parts) != 2 { + return stat, fmt.Errorf("invalid stat data: %q", data) + } + + stat.Name = parts[1] + _, err = fmt.Sscanf(parts[0], "%d", &stat.PID) + if err != nil { + return stat, err + } + + // parts indexes should be offset by 3 from the field number given + // proc(5), because parts is zero-indexed and we've removed fields + // one (PID) and two (Name) in the paren-split. + parts = strings.Split(data[i+2:], " ") + var state int + fmt.Sscanf(parts[3-3], "%c", &state) + stat.State = State(state) + fmt.Sscanf(parts[22-3], "%d", &stat.StartTime) + return stat, nil +} diff --git a/libcontainer/system/proc_test.go b/libcontainer/system/proc_test.go new file mode 100644 index 0000000..7e1acc5 --- /dev/null +++ b/libcontainer/system/proc_test.go @@ -0,0 +1,45 @@ +package system + +import "testing" + +func TestParseStartTime(t *testing.T) { + data := map[string]Stat_t{ + "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": { + PID: 4902, + Name: "gunicorn: maste", + State: 'S', + StartTime: 9126532, + }, + "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": { + PID: 9534, + Name: "cat", + State: 'R', + StartTime: 9214966, + }, + + "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": { + PID: 24767, + Name: "irq/44-mei_me", + State: 'S', + StartTime: 8722075, + }, + } + for line, expected := range data { + st, err := parseStat(line) + if err != nil { + t.Fatal(err) + } + if st.PID != expected.PID { + t.Fatalf("expected PID %q but received %q", expected.PID, st.PID) + } + if st.State != expected.State { + t.Fatalf("expected state %q but received %q", expected.State, st.State) + } + if st.Name != expected.Name { + t.Fatalf("expected name %q but received %q", expected.Name, st.Name) + } + if st.StartTime != expected.StartTime { + t.Fatalf("expected start time %q but received %q", expected.StartTime, st.StartTime) + } + } +} diff --git a/libcontainer/system/syscall_linux_32.go b/libcontainer/system/syscall_linux_32.go new file mode 100644 index 0000000..c5ca5d8 --- /dev/null +++ b/libcontainer/system/syscall_linux_32.go @@ -0,0 +1,26 @@ +// +build linux +// +build 386 arm + +package system + +import ( + "golang.org/x/sys/unix" +) + +// Setuid sets the uid of the calling thread to the specified uid. +func Setuid(uid int) (err error) { + _, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +// Setgid sets the gid of the calling thread to the specified gid. +func Setgid(gid int) (err error) { + _, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/libcontainer/system/syscall_linux_64.go b/libcontainer/system/syscall_linux_64.go new file mode 100644 index 0000000..e05e30a --- /dev/null +++ b/libcontainer/system/syscall_linux_64.go @@ -0,0 +1,26 @@ +// +build linux +// +build arm64 amd64 mips mipsle mips64 mips64le ppc ppc64 ppc64le riscv64 s390x + +package system + +import ( + "golang.org/x/sys/unix" +) + +// Setuid sets the uid of the calling thread to the specified uid. +func Setuid(uid int) (err error) { + _, _, e1 := unix.RawSyscall(unix.SYS_SETUID, uintptr(uid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +// Setgid sets the gid of the calling thread to the specified gid. +func Setgid(gid int) (err error) { + _, _, e1 := unix.RawSyscall(unix.SYS_SETGID, uintptr(gid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/libcontainer/system/sysconfig.go b/libcontainer/system/sysconfig.go new file mode 100644 index 0000000..b8434f1 --- /dev/null +++ b/libcontainer/system/sysconfig.go @@ -0,0 +1,12 @@ +// +build cgo,linux + +package system + +/* +#include +*/ +import "C" + +func GetClockTicks() int { + return int(C.sysconf(C._SC_CLK_TCK)) +} diff --git a/libcontainer/system/sysconfig_notcgo.go b/libcontainer/system/sysconfig_notcgo.go new file mode 100644 index 0000000..d93b5d5 --- /dev/null +++ b/libcontainer/system/sysconfig_notcgo.go @@ -0,0 +1,15 @@ +// +build !cgo windows + +package system + +func GetClockTicks() int { + // TODO figure out a better alternative for platforms where we're missing cgo + // + // TODO Windows. This could be implemented using Win32 QueryPerformanceFrequency(). + // https://msdn.microsoft.com/en-us/library/windows/desktop/ms644905(v=vs.85).aspx + // + // An example of its usage can be found here. + // https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx + + return 100 +} diff --git a/libcontainer/system/unsupported.go b/libcontainer/system/unsupported.go new file mode 100644 index 0000000..b94be74 --- /dev/null +++ b/libcontainer/system/unsupported.go @@ -0,0 +1,27 @@ +// +build !linux + +package system + +import ( + "os" + + "github.com/opencontainers/runc/libcontainer/user" +) + +// RunningInUserNS is a stub for non-Linux systems +// Always returns false +func RunningInUserNS() bool { + return false +} + +// UIDMapInUserNS is a stub for non-Linux systems +// Always returns false +func UIDMapInUserNS(uidmap []user.IDMap) bool { + return false +} + +// GetParentNSeuid returns the euid within the parent user namespace +// Always returns os.Geteuid on non-linux +func GetParentNSeuid() int { + return os.Geteuid() +} diff --git a/libcontainer/system/xattrs_linux.go b/libcontainer/system/xattrs_linux.go new file mode 100644 index 0000000..a6823fc --- /dev/null +++ b/libcontainer/system/xattrs_linux.go @@ -0,0 +1,35 @@ +package system + +import "golang.org/x/sys/unix" + +// Returns a []byte slice if the xattr is set and nil otherwise +// Requires path and its attribute as arguments +func Lgetxattr(path string, attr string) ([]byte, error) { + var sz int + // Start with a 128 length byte array + dest := make([]byte, 128) + sz, errno := unix.Lgetxattr(path, attr, dest) + + switch { + case errno == unix.ENODATA: + return nil, errno + case errno == unix.ENOTSUP: + return nil, errno + case errno == unix.ERANGE: + // 128 byte array might just not be good enough, + // A dummy buffer is used to get the real size + // of the xattrs on disk + sz, errno = unix.Lgetxattr(path, attr, []byte{}) + if errno != nil { + return nil, errno + } + dest = make([]byte, sz) + sz, errno = unix.Lgetxattr(path, attr, dest) + if errno != nil { + return nil, errno + } + case errno != nil: + return nil, errno + } + return dest[:sz], nil +} diff --git a/libcontainer/user/MAINTAINERS b/libcontainer/user/MAINTAINERS new file mode 100644 index 0000000..edbe200 --- /dev/null +++ b/libcontainer/user/MAINTAINERS @@ -0,0 +1,2 @@ +Tianon Gravi (@tianon) +Aleksa Sarai (@cyphar) diff --git a/libcontainer/user/lookup.go b/libcontainer/user/lookup.go new file mode 100644 index 0000000..6fd8dd0 --- /dev/null +++ b/libcontainer/user/lookup.go @@ -0,0 +1,41 @@ +package user + +import ( + "errors" +) + +var ( + // The current operating system does not provide the required data for user lookups. + ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data") + // No matching entries found in file. + ErrNoPasswdEntries = errors.New("no matching entries in passwd file") + ErrNoGroupEntries = errors.New("no matching entries in group file") +) + +// LookupUser looks up a user by their username in /etc/passwd. If the user +// cannot be found (or there is no /etc/passwd file on the filesystem), then +// LookupUser returns an error. +func LookupUser(username string) (User, error) { + return lookupUser(username) +} + +// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot +// be found (or there is no /etc/passwd file on the filesystem), then LookupId +// returns an error. +func LookupUid(uid int) (User, error) { + return lookupUid(uid) +} + +// LookupGroup looks up a group by its name in /etc/group. If the group cannot +// be found (or there is no /etc/group file on the filesystem), then LookupGroup +// returns an error. +func LookupGroup(groupname string) (Group, error) { + return lookupGroup(groupname) +} + +// LookupGid looks up a group by its group id in /etc/group. If the group cannot +// be found (or there is no /etc/group file on the filesystem), then LookupGid +// returns an error. +func LookupGid(gid int) (Group, error) { + return lookupGid(gid) +} diff --git a/libcontainer/user/lookup_unix.go b/libcontainer/user/lookup_unix.go new file mode 100644 index 0000000..92b5ae8 --- /dev/null +++ b/libcontainer/user/lookup_unix.go @@ -0,0 +1,144 @@ +// +build darwin dragonfly freebsd linux netbsd openbsd solaris + +package user + +import ( + "io" + "os" + "strconv" + + "golang.org/x/sys/unix" +) + +// Unix-specific path to the passwd and group formatted files. +const ( + unixPasswdPath = "/etc/passwd" + unixGroupPath = "/etc/group" +) + +func lookupUser(username string) (User, error) { + return lookupUserFunc(func(u User) bool { + return u.Name == username + }) +} + +func lookupUid(uid int) (User, error) { + return lookupUserFunc(func(u User) bool { + return u.Uid == uid + }) +} + +func lookupUserFunc(filter func(u User) bool) (User, error) { + // Get operating system-specific passwd reader-closer. + passwd, err := GetPasswd() + if err != nil { + return User{}, err + } + defer passwd.Close() + + // Get the users. + users, err := ParsePasswdFilter(passwd, filter) + if err != nil { + return User{}, err + } + + // No user entries found. + if len(users) == 0 { + return User{}, ErrNoPasswdEntries + } + + // Assume the first entry is the "correct" one. + return users[0], nil +} + +func lookupGroup(groupname string) (Group, error) { + return lookupGroupFunc(func(g Group) bool { + return g.Name == groupname + }) +} + +func lookupGid(gid int) (Group, error) { + return lookupGroupFunc(func(g Group) bool { + return g.Gid == gid + }) +} + +func lookupGroupFunc(filter func(g Group) bool) (Group, error) { + // Get operating system-specific group reader-closer. + group, err := GetGroup() + if err != nil { + return Group{}, err + } + defer group.Close() + + // Get the users. + groups, err := ParseGroupFilter(group, filter) + if err != nil { + return Group{}, err + } + + // No user entries found. + if len(groups) == 0 { + return Group{}, ErrNoGroupEntries + } + + // Assume the first entry is the "correct" one. + return groups[0], nil +} + +func GetPasswdPath() (string, error) { + return unixPasswdPath, nil +} + +func GetPasswd() (io.ReadCloser, error) { + return os.Open(unixPasswdPath) +} + +func GetGroupPath() (string, error) { + return unixGroupPath, nil +} + +func GetGroup() (io.ReadCloser, error) { + return os.Open(unixGroupPath) +} + +// CurrentUser looks up the current user by their user id in /etc/passwd. If the +// user cannot be found (or there is no /etc/passwd file on the filesystem), +// then CurrentUser returns an error. +func CurrentUser() (User, error) { + return LookupUid(unix.Getuid()) +} + +// CurrentGroup looks up the current user's group by their primary group id's +// entry in /etc/passwd. If the group cannot be found (or there is no +// /etc/group file on the filesystem), then CurrentGroup returns an error. +func CurrentGroup() (Group, error) { + return LookupGid(unix.Getgid()) +} + +func currentUserSubIDs(fileName string) ([]SubID, error) { + u, err := CurrentUser() + if err != nil { + return nil, err + } + filter := func(entry SubID) bool { + return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid) + } + return ParseSubIDFileFilter(fileName, filter) +} + +func CurrentUserSubUIDs() ([]SubID, error) { + return currentUserSubIDs("/etc/subuid") +} + +func CurrentUserSubGIDs() ([]SubID, error) { + return currentUserSubIDs("/etc/subgid") +} + +func CurrentProcessUIDMap() ([]IDMap, error) { + return ParseIDMapFile("/proc/self/uid_map") +} + +func CurrentProcessGIDMap() ([]IDMap, error) { + return ParseIDMapFile("/proc/self/gid_map") +} diff --git a/libcontainer/user/lookup_windows.go b/libcontainer/user/lookup_windows.go new file mode 100644 index 0000000..65cd40e --- /dev/null +++ b/libcontainer/user/lookup_windows.go @@ -0,0 +1,40 @@ +// +build windows + +package user + +import ( + "fmt" + "os/user" +) + +func lookupUser(username string) (User, error) { + u, err := user.Lookup(username) + if err != nil { + return User{}, err + } + return userFromOS(u) +} + +func lookupUid(uid int) (User, error) { + u, err := user.LookupId(fmt.Sprintf("%d", uid)) + if err != nil { + return User{}, err + } + return userFromOS(u) +} + +func lookupGroup(groupname string) (Group, error) { + g, err := user.LookupGroup(groupname) + if err != nil { + return Group{}, err + } + return groupFromOS(g) +} + +func lookupGid(gid int) (Group, error) { + g, err := user.LookupGroupId(fmt.Sprintf("%d", gid)) + if err != nil { + return Group{}, err + } + return groupFromOS(g) +} diff --git a/libcontainer/user/user.go b/libcontainer/user/user.go new file mode 100644 index 0000000..7b912bb --- /dev/null +++ b/libcontainer/user/user.go @@ -0,0 +1,608 @@ +package user + +import ( + "bufio" + "fmt" + "io" + "os" + "os/user" + "strconv" + "strings" +) + +const ( + minId = 0 + maxId = 1<<31 - 1 //for 32-bit systems compatibility +) + +var ( + ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minId, maxId) +) + +type User struct { + Name string + Pass string + Uid int + Gid int + Gecos string + Home string + Shell string +} + +// userFromOS converts an os/user.(*User) to local User +// +// (This does not include Pass, Shell or Gecos) +func userFromOS(u *user.User) (User, error) { + newUser := User{ + Name: u.Username, + Home: u.HomeDir, + } + id, err := strconv.Atoi(u.Uid) + if err != nil { + return newUser, err + } + newUser.Uid = id + + id, err = strconv.Atoi(u.Gid) + if err != nil { + return newUser, err + } + newUser.Gid = id + return newUser, nil +} + +type Group struct { + Name string + Pass string + Gid int + List []string +} + +// groupFromOS converts an os/user.(*Group) to local Group +// +// (This does not include Pass, Shell or Gecos) +func groupFromOS(g *user.Group) (Group, error) { + newGroup := Group{ + Name: g.Name, + } + + id, err := strconv.Atoi(g.Gid) + if err != nil { + return newGroup, err + } + newGroup.Gid = id + + return newGroup, nil +} + +// SubID represents an entry in /etc/sub{u,g}id +type SubID struct { + Name string + SubID int64 + Count int64 +} + +// IDMap represents an entry in /proc/PID/{u,g}id_map +type IDMap struct { + ID int64 + ParentID int64 + Count int64 +} + +func parseLine(line string, v ...interface{}) { + parseParts(strings.Split(line, ":"), v...) +} + +func parseParts(parts []string, v ...interface{}) { + if len(parts) == 0 { + return + } + + for i, p := range parts { + // Ignore cases where we don't have enough fields to populate the arguments. + // Some configuration files like to misbehave. + if len(v) <= i { + break + } + + // Use the type of the argument to figure out how to parse it, scanf() style. + // This is legit. + switch e := v[i].(type) { + case *string: + *e = p + case *int: + // "numbers", with conversion errors ignored because of some misbehaving configuration files. + *e, _ = strconv.Atoi(p) + case *int64: + *e, _ = strconv.ParseInt(p, 10, 64) + case *[]string: + // Comma-separated lists. + if p != "" { + *e = strings.Split(p, ",") + } else { + *e = []string{} + } + default: + // Someone goof'd when writing code using this function. Scream so they can hear us. + panic(fmt.Sprintf("parseLine only accepts {*string, *int, *int64, *[]string} as arguments! %#v is not a pointer!", e)) + } + } +} + +func ParsePasswdFile(path string) ([]User, error) { + passwd, err := os.Open(path) + if err != nil { + return nil, err + } + defer passwd.Close() + return ParsePasswd(passwd) +} + +func ParsePasswd(passwd io.Reader) ([]User, error) { + return ParsePasswdFilter(passwd, nil) +} + +func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) { + passwd, err := os.Open(path) + if err != nil { + return nil, err + } + defer passwd.Close() + return ParsePasswdFilter(passwd, filter) +} + +func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) { + if r == nil { + return nil, fmt.Errorf("nil source for passwd-formatted data") + } + + var ( + s = bufio.NewScanner(r) + out = []User{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // see: man 5 passwd + // name:password:UID:GID:GECOS:directory:shell + // Name:Pass:Uid:Gid:Gecos:Home:Shell + // root:x:0:0:root:/root:/bin/bash + // adm:x:3:4:adm:/var/adm:/bin/false + p := User{} + parseLine(line, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell) + + if filter == nil || filter(p) { + out = append(out, p) + } + } + + return out, nil +} + +func ParseGroupFile(path string) ([]Group, error) { + group, err := os.Open(path) + if err != nil { + return nil, err + } + + defer group.Close() + return ParseGroup(group) +} + +func ParseGroup(group io.Reader) ([]Group, error) { + return ParseGroupFilter(group, nil) +} + +func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) { + group, err := os.Open(path) + if err != nil { + return nil, err + } + defer group.Close() + return ParseGroupFilter(group, filter) +} + +func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) { + if r == nil { + return nil, fmt.Errorf("nil source for group-formatted data") + } + + var ( + s = bufio.NewScanner(r) + out = []Group{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + text := s.Text() + if text == "" { + continue + } + + // see: man 5 group + // group_name:password:GID:user_list + // Name:Pass:Gid:List + // root:x:0:root + // adm:x:4:root,adm,daemon + p := Group{} + parseLine(text, &p.Name, &p.Pass, &p.Gid, &p.List) + + if filter == nil || filter(p) { + out = append(out, p) + } + } + + return out, nil +} + +type ExecUser struct { + Uid int + Gid int + Sgids []int + Home string +} + +// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the +// given file paths and uses that data as the arguments to GetExecUser. If the +// files cannot be opened for any reason, the error is ignored and a nil +// io.Reader is passed instead. +func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) { + var passwd, group io.Reader + + if passwdFile, err := os.Open(passwdPath); err == nil { + passwd = passwdFile + defer passwdFile.Close() + } + + if groupFile, err := os.Open(groupPath); err == nil { + group = groupFile + defer groupFile.Close() + } + + return GetExecUser(userSpec, defaults, passwd, group) +} + +// GetExecUser parses a user specification string (using the passwd and group +// readers as sources for /etc/passwd and /etc/group data, respectively). In +// the case of blank fields or missing data from the sources, the values in +// defaults is used. +// +// GetExecUser will return an error if a user or group literal could not be +// found in any entry in passwd and group respectively. +// +// Examples of valid user specifications are: +// * "" +// * "user" +// * "uid" +// * "user:group" +// * "uid:gid +// * "user:gid" +// * "uid:group" +// +// It should be noted that if you specify a numeric user or group id, they will +// not be evaluated as usernames (only the metadata will be filled). So attempting +// to parse a user with user.Name = "1337" will produce the user with a UID of +// 1337. +func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) { + if defaults == nil { + defaults = new(ExecUser) + } + + // Copy over defaults. + user := &ExecUser{ + Uid: defaults.Uid, + Gid: defaults.Gid, + Sgids: defaults.Sgids, + Home: defaults.Home, + } + + // Sgids slice *cannot* be nil. + if user.Sgids == nil { + user.Sgids = []int{} + } + + // Allow for userArg to have either "user" syntax, or optionally "user:group" syntax + var userArg, groupArg string + parseLine(userSpec, &userArg, &groupArg) + + // Convert userArg and groupArg to be numeric, so we don't have to execute + // Atoi *twice* for each iteration over lines. + uidArg, uidErr := strconv.Atoi(userArg) + gidArg, gidErr := strconv.Atoi(groupArg) + + // Find the matching user. + users, err := ParsePasswdFilter(passwd, func(u User) bool { + if userArg == "" { + // Default to current state of the user. + return u.Uid == user.Uid + } + + if uidErr == nil { + // If the userArg is numeric, always treat it as a UID. + return uidArg == u.Uid + } + + return u.Name == userArg + }) + + // If we can't find the user, we have to bail. + if err != nil && passwd != nil { + if userArg == "" { + userArg = strconv.Itoa(user.Uid) + } + return nil, fmt.Errorf("unable to find user %s: %v", userArg, err) + } + + var matchedUserName string + if len(users) > 0 { + // First match wins, even if there's more than one matching entry. + matchedUserName = users[0].Name + user.Uid = users[0].Uid + user.Gid = users[0].Gid + user.Home = users[0].Home + } else if userArg != "" { + // If we can't find a user with the given username, the only other valid + // option is if it's a numeric username with no associated entry in passwd. + + if uidErr != nil { + // Not numeric. + return nil, fmt.Errorf("unable to find user %s: %v", userArg, ErrNoPasswdEntries) + } + user.Uid = uidArg + + // Must be inside valid uid range. + if user.Uid < minId || user.Uid > maxId { + return nil, ErrRange + } + + // Okay, so it's numeric. We can just roll with this. + } + + // On to the groups. If we matched a username, we need to do this because of + // the supplementary group IDs. + if groupArg != "" || matchedUserName != "" { + groups, err := ParseGroupFilter(group, func(g Group) bool { + // If the group argument isn't explicit, we'll just search for it. + if groupArg == "" { + // Check if user is a member of this group. + for _, u := range g.List { + if u == matchedUserName { + return true + } + } + return false + } + + if gidErr == nil { + // If the groupArg is numeric, always treat it as a GID. + return gidArg == g.Gid + } + + return g.Name == groupArg + }) + if err != nil && group != nil { + return nil, fmt.Errorf("unable to find groups for spec %v: %v", matchedUserName, err) + } + + // Only start modifying user.Gid if it is in explicit form. + if groupArg != "" { + if len(groups) > 0 { + // First match wins, even if there's more than one matching entry. + user.Gid = groups[0].Gid + } else { + // If we can't find a group with the given name, the only other valid + // option is if it's a numeric group name with no associated entry in group. + + if gidErr != nil { + // Not numeric. + return nil, fmt.Errorf("unable to find group %s: %v", groupArg, ErrNoGroupEntries) + } + user.Gid = gidArg + + // Must be inside valid gid range. + if user.Gid < minId || user.Gid > maxId { + return nil, ErrRange + } + + // Okay, so it's numeric. We can just roll with this. + } + } else if len(groups) > 0 { + // Supplementary group ids only make sense if in the implicit form. + user.Sgids = make([]int, len(groups)) + for i, group := range groups { + user.Sgids[i] = group.Gid + } + } + } + + return user, nil +} + +// GetAdditionalGroups looks up a list of groups by name or group id +// against the given /etc/group formatted data. If a group name cannot +// be found, an error will be returned. If a group id cannot be found, +// or the given group data is nil, the id will be returned as-is +// provided it is in the legal range. +func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) { + var groups = []Group{} + if group != nil { + var err error + groups, err = ParseGroupFilter(group, func(g Group) bool { + for _, ag := range additionalGroups { + if g.Name == ag || strconv.Itoa(g.Gid) == ag { + return true + } + } + return false + }) + if err != nil { + return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err) + } + } + + gidMap := make(map[int]struct{}) + for _, ag := range additionalGroups { + var found bool + for _, g := range groups { + // if we found a matched group either by name or gid, take the + // first matched as correct + if g.Name == ag || strconv.Itoa(g.Gid) == ag { + if _, ok := gidMap[g.Gid]; !ok { + gidMap[g.Gid] = struct{}{} + found = true + break + } + } + } + // we asked for a group but didn't find it. let's check to see + // if we wanted a numeric group + if !found { + gid, err := strconv.Atoi(ag) + if err != nil { + return nil, fmt.Errorf("Unable to find group %s", ag) + } + // Ensure gid is inside gid range. + if gid < minId || gid > maxId { + return nil, ErrRange + } + gidMap[gid] = struct{}{} + } + } + gids := []int{} + for gid := range gidMap { + gids = append(gids, gid) + } + return gids, nil +} + +// GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups +// that opens the groupPath given and gives it as an argument to +// GetAdditionalGroups. +func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) { + var group io.Reader + + if groupFile, err := os.Open(groupPath); err == nil { + group = groupFile + defer groupFile.Close() + } + return GetAdditionalGroups(additionalGroups, group) +} + +func ParseSubIDFile(path string) ([]SubID, error) { + subid, err := os.Open(path) + if err != nil { + return nil, err + } + defer subid.Close() + return ParseSubID(subid) +} + +func ParseSubID(subid io.Reader) ([]SubID, error) { + return ParseSubIDFilter(subid, nil) +} + +func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error) { + subid, err := os.Open(path) + if err != nil { + return nil, err + } + defer subid.Close() + return ParseSubIDFilter(subid, filter) +} + +func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) { + if r == nil { + return nil, fmt.Errorf("nil source for subid-formatted data") + } + + var ( + s = bufio.NewScanner(r) + out = []SubID{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // see: man 5 subuid + p := SubID{} + parseLine(line, &p.Name, &p.SubID, &p.Count) + + if filter == nil || filter(p) { + out = append(out, p) + } + } + + return out, nil +} + +func ParseIDMapFile(path string) ([]IDMap, error) { + r, err := os.Open(path) + if err != nil { + return nil, err + } + defer r.Close() + return ParseIDMap(r) +} + +func ParseIDMap(r io.Reader) ([]IDMap, error) { + return ParseIDMapFilter(r, nil) +} + +func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error) { + r, err := os.Open(path) + if err != nil { + return nil, err + } + defer r.Close() + return ParseIDMapFilter(r, filter) +} + +func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) { + if r == nil { + return nil, fmt.Errorf("nil source for idmap-formatted data") + } + + var ( + s = bufio.NewScanner(r) + out = []IDMap{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // see: man 7 user_namespaces + p := IDMap{} + parseParts(strings.Fields(line), &p.ID, &p.ParentID, &p.Count) + + if filter == nil || filter(p) { + out = append(out, p) + } + } + + return out, nil +} diff --git a/libcontainer/user/user_test.go b/libcontainer/user/user_test.go new file mode 100644 index 0000000..24ee559 --- /dev/null +++ b/libcontainer/user/user_test.go @@ -0,0 +1,507 @@ +package user + +import ( + "io" + "reflect" + "sort" + "strconv" + "strings" + "testing" + + "github.com/opencontainers/runc/libcontainer/utils" +) + +func TestUserParseLine(t *testing.T) { + var ( + a, b string + c []string + d int + ) + + parseLine("", &a, &b) + if a != "" || b != "" { + t.Fatalf("a and b should be empty ('%v', '%v')", a, b) + } + + parseLine("a", &a, &b) + if a != "a" || b != "" { + t.Fatalf("a should be 'a' and b should be empty ('%v', '%v')", a, b) + } + + parseLine("bad boys:corny cows", &a, &b) + if a != "bad boys" || b != "corny cows" { + t.Fatalf("a should be 'bad boys' and b should be 'corny cows' ('%v', '%v')", a, b) + } + + parseLine("", &c) + if len(c) != 0 { + t.Fatalf("c should be empty (%#v)", c) + } + + parseLine("d,e,f:g:h:i,j,k", &c, &a, &b, &c) + if a != "g" || b != "h" || len(c) != 3 || c[0] != "i" || c[1] != "j" || c[2] != "k" { + t.Fatalf("a should be 'g', b should be 'h', and c should be ['i','j','k'] ('%v', '%v', '%#v')", a, b, c) + } + + parseLine("::::::::::", &a, &b, &c) + if a != "" || b != "" || len(c) != 0 { + t.Fatalf("a, b, and c should all be empty ('%v', '%v', '%#v')", a, b, c) + } + + parseLine("not a number", &d) + if d != 0 { + t.Fatalf("d should be 0 (%v)", d) + } + + parseLine("b:12:c", &a, &d, &b) + if a != "b" || b != "c" || d != 12 { + t.Fatalf("a should be 'b' and b should be 'c', and d should be 12 ('%v', '%v', %v)", a, b, d) + } +} + +func TestUserParsePasswd(t *testing.T) { + users, err := ParsePasswdFilter(strings.NewReader(` +root:x:0:0:root:/root:/bin/bash +adm:x:3:4:adm:/var/adm:/bin/false +this is just some garbage data +`), nil) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if len(users) != 3 { + t.Fatalf("Expected 3 users, got %v", len(users)) + } + if users[0].Uid != 0 || users[0].Name != "root" { + t.Fatalf("Expected users[0] to be 0 - root, got %v - %v", users[0].Uid, users[0].Name) + } + if users[1].Uid != 3 || users[1].Name != "adm" { + t.Fatalf("Expected users[1] to be 3 - adm, got %v - %v", users[1].Uid, users[1].Name) + } +} + +func TestUserParseGroup(t *testing.T) { + groups, err := ParseGroupFilter(strings.NewReader(` +root:x:0:root +adm:x:4:root,adm,daemon +this is just some garbage data +`), nil) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if len(groups) != 3 { + t.Fatalf("Expected 3 groups, got %v", len(groups)) + } + if groups[0].Gid != 0 || groups[0].Name != "root" || len(groups[0].List) != 1 { + t.Fatalf("Expected groups[0] to be 0 - root - 1 member, got %v - %v - %v", groups[0].Gid, groups[0].Name, len(groups[0].List)) + } + if groups[1].Gid != 4 || groups[1].Name != "adm" || len(groups[1].List) != 3 { + t.Fatalf("Expected groups[1] to be 4 - adm - 3 members, got %v - %v - %v", groups[1].Gid, groups[1].Name, len(groups[1].List)) + } +} + +func TestValidGetExecUser(t *testing.T) { + const passwdContent = ` +root:x:0:0:root user:/root:/bin/bash +adm:x:42:43:adm:/var/adm:/bin/false +111:x:222:333::/var/garbage +odd:x:111:112::/home/odd::::: +this is just some garbage data +` + const groupContent = ` +root:x:0:root +adm:x:43: +grp:x:1234:root,adm +444:x:555:111 +odd:x:444: +this is just some garbage data +` + defaultExecUser := ExecUser{ + Uid: 8888, + Gid: 8888, + Sgids: []int{8888}, + Home: "/8888", + } + + tests := []struct { + ref string + expected ExecUser + }{ + { + ref: "root", + expected: ExecUser{ + Uid: 0, + Gid: 0, + Sgids: []int{0, 1234}, + Home: "/root", + }, + }, + { + ref: "adm", + expected: ExecUser{ + Uid: 42, + Gid: 43, + Sgids: []int{1234}, + Home: "/var/adm", + }, + }, + { + ref: "root:adm", + expected: ExecUser{ + Uid: 0, + Gid: 43, + Sgids: defaultExecUser.Sgids, + Home: "/root", + }, + }, + { + ref: "adm:1234", + expected: ExecUser{ + Uid: 42, + Gid: 1234, + Sgids: defaultExecUser.Sgids, + Home: "/var/adm", + }, + }, + { + ref: "42:1234", + expected: ExecUser{ + Uid: 42, + Gid: 1234, + Sgids: defaultExecUser.Sgids, + Home: "/var/adm", + }, + }, + { + ref: "1337:1234", + expected: ExecUser{ + Uid: 1337, + Gid: 1234, + Sgids: defaultExecUser.Sgids, + Home: defaultExecUser.Home, + }, + }, + { + ref: "1337", + expected: ExecUser{ + Uid: 1337, + Gid: defaultExecUser.Gid, + Sgids: defaultExecUser.Sgids, + Home: defaultExecUser.Home, + }, + }, + { + ref: "", + expected: ExecUser{ + Uid: defaultExecUser.Uid, + Gid: defaultExecUser.Gid, + Sgids: defaultExecUser.Sgids, + Home: defaultExecUser.Home, + }, + }, + + // Regression tests for #695. + { + ref: "111", + expected: ExecUser{ + Uid: 111, + Gid: 112, + Sgids: defaultExecUser.Sgids, + Home: "/home/odd", + }, + }, + { + ref: "111:444", + expected: ExecUser{ + Uid: 111, + Gid: 444, + Sgids: defaultExecUser.Sgids, + Home: "/home/odd", + }, + }, + } + + for _, test := range tests { + passwd := strings.NewReader(passwdContent) + group := strings.NewReader(groupContent) + + execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group) + if err != nil { + t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error()) + t.Fail() + continue + } + + if !reflect.DeepEqual(test.expected, *execUser) { + t.Logf("ref: %v", test.ref) + t.Logf("got: %#v", execUser) + t.Logf("expected: %#v", test.expected) + t.Fail() + continue + } + } +} + +func TestInvalidGetExecUser(t *testing.T) { + const passwdContent = ` +root:x:0:0:root user:/root:/bin/bash +adm:x:42:43:adm:/var/adm:/bin/false +-42:x:12:13:broken:/very/broken +this is just some garbage data +` + const groupContent = ` +root:x:0:root +adm:x:43: +grp:x:1234:root,adm +this is just some garbage data +` + + tests := []string{ + // No such user/group. + "notuser", + "notuser:notgroup", + "root:notgroup", + "notuser:adm", + "8888:notgroup", + "notuser:8888", + + // Invalid user/group values. + "-1:0", + "0:-3", + "-5:-2", + "-42", + "-43", + } + + for _, test := range tests { + passwd := strings.NewReader(passwdContent) + group := strings.NewReader(groupContent) + + execUser, err := GetExecUser(test, nil, passwd, group) + if err == nil { + t.Logf("got unexpected success when parsing '%s': %#v", test, execUser) + t.Fail() + continue + } + } +} + +func TestGetExecUserNilSources(t *testing.T) { + const passwdContent = ` +root:x:0:0:root user:/root:/bin/bash +adm:x:42:43:adm:/var/adm:/bin/false +this is just some garbage data +` + const groupContent = ` +root:x:0:root +adm:x:43: +grp:x:1234:root,adm +this is just some garbage data +` + + defaultExecUser := ExecUser{ + Uid: 8888, + Gid: 8888, + Sgids: []int{8888}, + Home: "/8888", + } + + tests := []struct { + ref string + passwd, group bool + expected ExecUser + }{ + { + ref: "", + passwd: false, + group: false, + expected: ExecUser{ + Uid: 8888, + Gid: 8888, + Sgids: []int{8888}, + Home: "/8888", + }, + }, + { + ref: "root", + passwd: true, + group: false, + expected: ExecUser{ + Uid: 0, + Gid: 0, + Sgids: []int{8888}, + Home: "/root", + }, + }, + { + ref: "0", + passwd: false, + group: false, + expected: ExecUser{ + Uid: 0, + Gid: 8888, + Sgids: []int{8888}, + Home: "/8888", + }, + }, + { + ref: "0:0", + passwd: false, + group: false, + expected: ExecUser{ + Uid: 0, + Gid: 0, + Sgids: []int{8888}, + Home: "/8888", + }, + }, + } + + for _, test := range tests { + var passwd, group io.Reader + + if test.passwd { + passwd = strings.NewReader(passwdContent) + } + + if test.group { + group = strings.NewReader(groupContent) + } + + execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group) + if err != nil { + t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error()) + t.Fail() + continue + } + + if !reflect.DeepEqual(test.expected, *execUser) { + t.Logf("got: %#v", execUser) + t.Logf("expected: %#v", test.expected) + t.Fail() + continue + } + } +} + +func TestGetAdditionalGroups(t *testing.T) { + type foo struct { + groups []string + expected []int + hasError bool + } + + const groupContent = ` +root:x:0:root +adm:x:43: +grp:x:1234:root,adm +adm:x:4343:root,adm-duplicate +this is just some garbage data +` + tests := []foo{ + { + // empty group + groups: []string{}, + expected: []int{}, + }, + { + // single group + groups: []string{"adm"}, + expected: []int{43}, + }, + { + // multiple groups + groups: []string{"adm", "grp"}, + expected: []int{43, 1234}, + }, + { + // invalid group + groups: []string{"adm", "grp", "not-exist"}, + expected: nil, + hasError: true, + }, + { + // group with numeric id + groups: []string{"43"}, + expected: []int{43}, + }, + { + // group with unknown numeric id + groups: []string{"adm", "10001"}, + expected: []int{43, 10001}, + }, + { + // groups specified twice with numeric and name + groups: []string{"adm", "43"}, + expected: []int{43}, + }, + { + // groups with too small id + groups: []string{"-1"}, + expected: nil, + hasError: true, + }, + } + + if utils.GetIntSize() > 4 { + tests = append(tests, foo{ + // groups with too large id + groups: []string{strconv.Itoa(1 << 31)}, + expected: nil, + hasError: true, + }) + } + + for _, test := range tests { + group := strings.NewReader(groupContent) + + gids, err := GetAdditionalGroups(test.groups, group) + if test.hasError && err == nil { + t.Errorf("Parse(%#v) expects error but has none", test) + continue + } + if !test.hasError && err != nil { + t.Errorf("Parse(%#v) has error %v", test, err) + continue + } + sort.Sort(sort.IntSlice(gids)) + if !reflect.DeepEqual(gids, test.expected) { + t.Errorf("Gids(%v), expect %v from groups %v", gids, test.expected, test.groups) + } + } +} + +func TestGetAdditionalGroupsNumeric(t *testing.T) { + tests := []struct { + groups []string + expected []int + hasError bool + }{ + { + // numeric groups only + groups: []string{"1234", "5678"}, + expected: []int{1234, 5678}, + }, + { + // numeric and alphabetic + groups: []string{"1234", "fake"}, + expected: nil, + hasError: true, + }, + } + + for _, test := range tests { + gids, err := GetAdditionalGroups(test.groups, nil) + if test.hasError && err == nil { + t.Errorf("Parse(%#v) expects error but has none", test) + continue + } + if !test.hasError && err != nil { + t.Errorf("Parse(%#v) has error %v", test, err) + continue + } + sort.Sort(sort.IntSlice(gids)) + if !reflect.DeepEqual(gids, test.expected) { + t.Errorf("Gids(%v), expect %v from groups %v", gids, test.expected, test.groups) + } + } +} diff --git a/libcontainer/utils/cmsg.go b/libcontainer/utils/cmsg.go new file mode 100644 index 0000000..c8a9364 --- /dev/null +++ b/libcontainer/utils/cmsg.go @@ -0,0 +1,93 @@ +// +build linux + +package utils + +/* + * Copyright 2016, 2017 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import ( + "fmt" + "os" + + "golang.org/x/sys/unix" +) + +// MaxSendfdLen is the maximum length of the name of a file descriptor being +// sent using SendFd. The name of the file handle returned by RecvFd will never +// be larger than this value. +const MaxNameLen = 4096 + +// oobSpace is the size of the oob slice required to store a single FD. Note +// that unix.UnixRights appears to make the assumption that fd is always int32, +// so sizeof(fd) = 4. +var oobSpace = unix.CmsgSpace(4) + +// RecvFd waits for a file descriptor to be sent over the given AF_UNIX +// socket. The file name of the remote file descriptor will be recreated +// locally (it is sent as non-auxiliary data in the same payload). +func RecvFd(socket *os.File) (*os.File, error) { + // For some reason, unix.Recvmsg uses the length rather than the capacity + // when passing the msg_controllen and other attributes to recvmsg. So we + // have to actually set the length. + name := make([]byte, MaxNameLen) + oob := make([]byte, oobSpace) + + sockfd := socket.Fd() + n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0) + if err != nil { + return nil, err + } + + if n >= MaxNameLen || oobn != oobSpace { + return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn) + } + + // Truncate. + name = name[:n] + oob = oob[:oobn] + + scms, err := unix.ParseSocketControlMessage(oob) + if err != nil { + return nil, err + } + if len(scms) != 1 { + return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms)) + } + scm := scms[0] + + fds, err := unix.ParseUnixRights(&scm) + if err != nil { + return nil, err + } + if len(fds) != 1 { + return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds)) + } + fd := uintptr(fds[0]) + + return os.NewFile(fd, string(name)), nil +} + +// SendFd sends a file descriptor over the given AF_UNIX socket. In +// addition, the file.Name() of the given file will also be sent as +// non-auxiliary data in the same payload (allowing to send contextual +// information for a file descriptor). +func SendFd(socket *os.File, name string, fd uintptr) error { + if len(name) >= MaxNameLen { + return fmt.Errorf("sendfd: filename too long: %s", name) + } + oob := unix.UnixRights(int(fd)) + return unix.Sendmsg(int(socket.Fd()), []byte(name), oob, nil, 0) +} diff --git a/libcontainer/utils/utils.go b/libcontainer/utils/utils.go new file mode 100644 index 0000000..40ccfaa --- /dev/null +++ b/libcontainer/utils/utils.go @@ -0,0 +1,112 @@ +package utils + +import ( + "encoding/json" + "io" + "os" + "path/filepath" + "strings" + "unsafe" + + "golang.org/x/sys/unix" +) + +const ( + exitSignalOffset = 128 +) + +// ResolveRootfs ensures that the current working directory is +// not a symlink and returns the absolute path to the rootfs +func ResolveRootfs(uncleanRootfs string) (string, error) { + rootfs, err := filepath.Abs(uncleanRootfs) + if err != nil { + return "", err + } + return filepath.EvalSymlinks(rootfs) +} + +// ExitStatus returns the correct exit status for a process based on if it +// was signaled or exited cleanly +func ExitStatus(status unix.WaitStatus) int { + if status.Signaled() { + return exitSignalOffset + int(status.Signal()) + } + return status.ExitStatus() +} + +// WriteJSON writes the provided struct v to w using standard json marshaling +func WriteJSON(w io.Writer, v interface{}) error { + data, err := json.Marshal(v) + if err != nil { + return err + } + _, err = w.Write(data) + return err +} + +// CleanPath makes a path safe for use with filepath.Join. This is done by not +// only cleaning the path, but also (if the path is relative) adding a leading +// '/' and cleaning it (then removing the leading '/'). This ensures that a +// path resulting from prepending another path will always resolve to lexically +// be a subdirectory of the prefixed path. This is all done lexically, so paths +// that include symlinks won't be safe as a result of using CleanPath. +func CleanPath(path string) string { + // Deal with empty strings nicely. + if path == "" { + return "" + } + + // Ensure that all paths are cleaned (especially problematic ones like + // "/../../../../../" which can cause lots of issues). + path = filepath.Clean(path) + + // If the path isn't absolute, we need to do more processing to fix paths + // such as "../../../..//some/path". We also shouldn't convert absolute + // paths to relative ones. + if !filepath.IsAbs(path) { + path = filepath.Clean(string(os.PathSeparator) + path) + // This can't fail, as (by definition) all paths are relative to root. + path, _ = filepath.Rel(string(os.PathSeparator), path) + } + + // Clean the path again for good measure. + return filepath.Clean(path) +} + +// SearchLabels searches a list of key-value pairs for the provided key and +// returns the corresponding value. The pairs must be separated with '='. +func SearchLabels(labels []string, query string) string { + for _, l := range labels { + parts := strings.SplitN(l, "=", 2) + if len(parts) < 2 { + continue + } + if parts[0] == query { + return parts[1] + } + } + return "" +} + +// Annotations returns the bundle path and user defined annotations from the +// libcontainer state. We need to remove the bundle because that is a label +// added by libcontainer. +func Annotations(labels []string) (bundle string, userAnnotations map[string]string) { + userAnnotations = make(map[string]string) + for _, l := range labels { + parts := strings.SplitN(l, "=", 2) + if len(parts) < 2 { + continue + } + if parts[0] == "bundle" { + bundle = parts[1] + } else { + userAnnotations[parts[0]] = parts[1] + } + } + return +} + +func GetIntSize() int { + return int(unsafe.Sizeof(1)) +} diff --git a/libcontainer/utils/utils_test.go b/libcontainer/utils/utils_test.go new file mode 100644 index 0000000..395eedc --- /dev/null +++ b/libcontainer/utils/utils_test.go @@ -0,0 +1,142 @@ +package utils + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "testing" + + "golang.org/x/sys/unix" +) + +var labelTest = []struct { + labels []string + query string + expectedValue string +}{ + {[]string{"bundle=/path/to/bundle"}, "bundle", "/path/to/bundle"}, + {[]string{"test=a", "test=b"}, "bundle", ""}, + {[]string{"bundle=a", "test=b", "bundle=c"}, "bundle", "a"}, + {[]string{"", "test=a", "bundle=b"}, "bundle", "b"}, + {[]string{"test", "bundle=a"}, "bundle", "a"}, + {[]string{"test=a", "bundle="}, "bundle", ""}, +} + +func TestSearchLabels(t *testing.T) { + for _, tt := range labelTest { + if v := SearchLabels(tt.labels, tt.query); v != tt.expectedValue { + t.Errorf("expected value '%s' for query '%s'; got '%s'", tt.expectedValue, tt.query, v) + } + } +} + +func TestResolveRootfs(t *testing.T) { + dir := "rootfs" + os.Mkdir(dir, 0600) + defer os.Remove(dir) + + path, err := ResolveRootfs(dir) + if err != nil { + t.Fatal(err) + } + pwd, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + if path != fmt.Sprintf("%s/%s", pwd, "rootfs") { + t.Errorf("expected rootfs to be abs and was %s", path) + } +} + +func TestResolveRootfsWithSymlink(t *testing.T) { + dir := "rootfs" + tmpDir, _ := filepath.EvalSymlinks(os.TempDir()) + os.Symlink(tmpDir, dir) + defer os.Remove(dir) + + path, err := ResolveRootfs(dir) + if err != nil { + t.Fatal(err) + } + + if path != tmpDir { + t.Errorf("expected rootfs to be the real path %s and was %s", path, os.TempDir()) + } +} + +func TestResolveRootfsWithNonExistingDir(t *testing.T) { + _, err := ResolveRootfs("foo") + if err == nil { + t.Error("expected error to happen but received nil") + } +} + +func TestExitStatus(t *testing.T) { + status := unix.WaitStatus(0) + ex := ExitStatus(status) + if ex != 0 { + t.Errorf("expected exit status to equal 0 and received %d", ex) + } +} + +func TestExitStatusSignaled(t *testing.T) { + status := unix.WaitStatus(2) + ex := ExitStatus(status) + if ex != 130 { + t.Errorf("expected exit status to equal 130 and received %d", ex) + } +} + +func TestWriteJSON(t *testing.T) { + person := struct { + Name string + Age int + }{ + Name: "Alice", + Age: 30, + } + + var b bytes.Buffer + err := WriteJSON(&b, person) + if err != nil { + t.Fatal(err) + } + + expected := `{"Name":"Alice","Age":30}` + if b.String() != expected { + t.Errorf("expected to write %s but was %s", expected, b.String()) + } +} + +func TestCleanPath(t *testing.T) { + path := CleanPath("") + if path != "" { + t.Errorf("expected to receive empty string and received %s", path) + } + + path = CleanPath("rootfs") + if path != "rootfs" { + t.Errorf("expected to receive 'rootfs' and received %s", path) + } + + path = CleanPath("../../../var") + if path != "var" { + t.Errorf("expected to receive 'var' and received %s", path) + } + + path = CleanPath("/../../../var") + if path != "/var" { + t.Errorf("expected to receive '/var' and received %s", path) + } + + path = CleanPath("/foo/bar/") + if path != "/foo/bar" { + t.Errorf("expected to receive '/foo/bar' and received %s", path) + } + + path = CleanPath("/foo/bar/../") + if path != "/foo" { + t.Errorf("expected to receive '/foo' and received %s", path) + } +} diff --git a/libcontainer/utils/utils_unix.go b/libcontainer/utils/utils_unix.go new file mode 100644 index 0000000..1576f2d --- /dev/null +++ b/libcontainer/utils/utils_unix.go @@ -0,0 +1,68 @@ +// +build !windows + +package utils + +import ( + "fmt" + "os" + "strconv" + + "golang.org/x/sys/unix" +) + +// EnsureProcHandle returns whether or not the given file handle is on procfs. +func EnsureProcHandle(fh *os.File) error { + var buf unix.Statfs_t + if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil { + return fmt.Errorf("ensure %s is on procfs: %v", fh.Name(), err) + } + if buf.Type != unix.PROC_SUPER_MAGIC { + return fmt.Errorf("%s is not on procfs", fh.Name()) + } + return nil +} + +// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for +// the process (except for those below the given fd value). +func CloseExecFrom(minFd int) error { + fdDir, err := os.Open("/proc/self/fd") + if err != nil { + return err + } + defer fdDir.Close() + + if err := EnsureProcHandle(fdDir); err != nil { + return err + } + + fdList, err := fdDir.Readdirnames(-1) + if err != nil { + return err + } + for _, fdStr := range fdList { + fd, err := strconv.Atoi(fdStr) + // Ignore non-numeric file names. + if err != nil { + continue + } + // Ignore descriptors lower than our specified minimum. + if fd < minFd { + continue + } + // Intentionally ignore errors from unix.CloseOnExec -- the cases where + // this might fail are basically file descriptors that have already + // been closed (including and especially the one that was created when + // ioutil.ReadDir did the "opendir" syscall). + unix.CloseOnExec(fd) + } + return nil +} + +// NewSockPair returns a new unix socket pair +func NewSockPair(name string) (parent *os.File, child *os.File, err error) { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil +} diff --git a/list.go b/list.go new file mode 100644 index 0000000..0313d8c --- /dev/null +++ b/list.go @@ -0,0 +1,175 @@ +// +build linux + +package main + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "syscall" + "text/tabwriter" + "time" + + "encoding/json" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/user" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/urfave/cli" +) + +const formatOptions = `table or json` + +// containerState represents the platform agnostic pieces relating to a +// running container's status and state +type containerState struct { + // Version is the OCI version for the container + Version string `json:"ociVersion"` + // ID is the container ID + ID string `json:"id"` + // InitProcessPid is the init process id in the parent namespace + InitProcessPid int `json:"pid"` + // Status is the current status of the container, running, paused, ... + Status string `json:"status"` + // Bundle is the path on the filesystem to the bundle + Bundle string `json:"bundle"` + // Rootfs is a path to a directory containing the container's root filesystem. + Rootfs string `json:"rootfs"` + // Created is the unix timestamp for the creation time of the container in UTC + Created time.Time `json:"created"` + // Annotations is the user defined annotations added to the config. + Annotations map[string]string `json:"annotations,omitempty"` + // The owner of the state directory (the owner of the container). + Owner string `json:"owner"` +} + +var listCommand = cli.Command{ + Name: "list", + Usage: "lists containers started by runc with the given root", + ArgsUsage: ` + +Where the given root is specified via the global option "--root" +(default: "/run/runc"). + +EXAMPLE 1: +To list containers created via the default "--root": + # runc list + +EXAMPLE 2: +To list containers created using a non-default value for "--root": + # runc --root value list`, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "format, f", + Value: "table", + Usage: `select one of: ` + formatOptions, + }, + cli.BoolFlag{ + Name: "quiet, q", + Usage: "display only container IDs", + }, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 0, exactArgs); err != nil { + return err + } + s, err := getContainers(context) + if err != nil { + return err + } + + if context.Bool("quiet") { + for _, item := range s { + fmt.Println(item.ID) + } + return nil + } + + switch context.String("format") { + case "table": + w := tabwriter.NewWriter(os.Stdout, 12, 1, 3, ' ', 0) + fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tOWNER\n") + for _, item := range s { + fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\t%s\n", + item.ID, + item.InitProcessPid, + item.Status, + item.Bundle, + item.Created.Format(time.RFC3339Nano), + item.Owner) + } + if err := w.Flush(); err != nil { + return err + } + case "json": + if err := json.NewEncoder(os.Stdout).Encode(s); err != nil { + return err + } + default: + return fmt.Errorf("invalid format option") + } + return nil + }, +} + +func getContainers(context *cli.Context) ([]containerState, error) { + factory, err := loadFactory(context) + if err != nil { + return nil, err + } + root := context.GlobalString("root") + absRoot, err := filepath.Abs(root) + if err != nil { + return nil, err + } + list, err := ioutil.ReadDir(absRoot) + if err != nil { + fatal(err) + } + + var s []containerState + for _, item := range list { + if item.IsDir() { + // This cast is safe on Linux. + stat := item.Sys().(*syscall.Stat_t) + owner, err := user.LookupUid(int(stat.Uid)) + if err != nil { + owner.Name = fmt.Sprintf("#%d", stat.Uid) + } + + container, err := factory.Load(item.Name()) + if err != nil { + fmt.Fprintf(os.Stderr, "load container %s: %v\n", item.Name(), err) + continue + } + containerStatus, err := container.Status() + if err != nil { + fmt.Fprintf(os.Stderr, "status for %s: %v\n", item.Name(), err) + continue + } + state, err := container.State() + if err != nil { + fmt.Fprintf(os.Stderr, "state for %s: %v\n", item.Name(), err) + continue + } + pid := state.BaseState.InitProcessPid + if containerStatus == libcontainer.Stopped { + pid = 0 + } + bundle, annotations := utils.Annotations(state.Config.Labels) + s = append(s, containerState{ + Version: state.BaseState.Config.Version, + ID: state.BaseState.ID, + InitProcessPid: pid, + Status: containerStatus.String(), + Bundle: bundle, + Rootfs: state.BaseState.Config.Rootfs, + Created: state.BaseState.Created, + Annotations: annotations, + Owner: owner.Name, + }) + } + } + return s, nil +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..3a8c163 --- /dev/null +++ b/main.go @@ -0,0 +1,176 @@ +package main + +import ( + "fmt" + "io" + "os" + "strings" + + "github.com/opencontainers/runc/libcontainer/logs" + + "github.com/opencontainers/runtime-spec/specs-go" + + "github.com/sirupsen/logrus" + "github.com/urfave/cli" +) + +// version will be populated by the Makefile, read from +// VERSION file of the source code. +var version = "" + +// gitCommit will be the hash that the binary was built from +// and will be populated by the Makefile +var gitCommit = "" + +const ( + specConfig = "config.json" + usage = `Open Container Initiative runtime + +runc is a command line client for running applications packaged according to +the Open Container Initiative (OCI) format and is a compliant implementation of the +Open Container Initiative specification. + +runc integrates well with existing process supervisors to provide a production +container runtime environment for applications. It can be used with your +existing process monitoring tools and the container will be spawned as a +direct child of the process supervisor. + +Containers are configured using bundles. A bundle for a container is a directory +that includes a specification file named "` + specConfig + `" and a root filesystem. +The root filesystem contains the contents of the container. + +To start a new instance of a container: + + # runc run [ -b bundle ] + +Where "" is your name for the instance of the container that you +are starting. The name you provide for the container instance must be unique on +your host. Providing the bundle directory using "-b" is optional. The default +value for "bundle" is the current directory.` +) + +func main() { + app := cli.NewApp() + app.Name = "runc" + app.Usage = usage + + var v []string + if version != "" { + v = append(v, version) + } + if gitCommit != "" { + v = append(v, fmt.Sprintf("commit: %s", gitCommit)) + } + v = append(v, fmt.Sprintf("spec: %s", specs.Version)) + app.Version = strings.Join(v, "\n") + + root := "/run/runc" + if shouldHonorXDGRuntimeDir() { + if runtimeDir := os.Getenv("XDG_RUNTIME_DIR"); runtimeDir != "" { + root = runtimeDir + "/runc" + // According to the XDG specification, we need to set anything in + // XDG_RUNTIME_DIR to have a sticky bit if we don't want it to get + // auto-pruned. + if err := os.MkdirAll(root, 0700); err != nil { + fatal(err) + } + if err := os.Chmod(root, 0700|os.ModeSticky); err != nil { + fatal(err) + } + } + } + + app.Flags = []cli.Flag{ + cli.BoolFlag{ + Name: "debug", + Usage: "enable debug output for logging", + }, + cli.StringFlag{ + Name: "log", + Value: "", + Usage: "set the log file path where internal debug information is written", + }, + cli.StringFlag{ + Name: "log-format", + Value: "text", + Usage: "set the format used by logs ('text' (default), or 'json')", + }, + cli.StringFlag{ + Name: "root", + Value: root, + Usage: "root directory for storage of container state (this should be located in tmpfs)", + }, + cli.StringFlag{ + Name: "criu", + Value: "criu", + Usage: "path to the criu binary used for checkpoint and restore", + }, + cli.BoolFlag{ + Name: "systemd-cgroup", + Usage: "enable systemd cgroup support, expects cgroupsPath to be of form \"slice:prefix:name\" for e.g. \"system.slice:runc:434234\"", + }, + cli.StringFlag{ + Name: "rootless", + Value: "auto", + Usage: "ignore cgroup permission errors ('true', 'false', or 'auto')", + }, + } + app.Commands = []cli.Command{ + checkpointCommand, + createCommand, + deleteCommand, + eventsCommand, + execCommand, + initCommand, + killCommand, + listCommand, + pauseCommand, + psCommand, + restoreCommand, + resumeCommand, + runCommand, + specCommand, + startCommand, + stateCommand, + updateCommand, + } + app.Before = func(context *cli.Context) error { + return logs.ConfigureLogging(createLogConfig(context)) + } + + // If the command returns an error, cli takes upon itself to print + // the error on cli.ErrWriter and exit. + // Use our own writer here to ensure the log gets sent to the right location. + cli.ErrWriter = &FatalWriter{cli.ErrWriter} + if err := app.Run(os.Args); err != nil { + fatal(err) + } +} + +type FatalWriter struct { + cliErrWriter io.Writer +} + +func (f *FatalWriter) Write(p []byte) (n int, err error) { + logrus.Error(string(p)) + return f.cliErrWriter.Write(p) +} + +func createLogConfig(context *cli.Context) logs.Config { + logFilePath := context.GlobalString("log") + logPipeFd := "" + if logFilePath == "" { + logPipeFd = "2" + } + config := logs.Config{ + LogPipeFd: logPipeFd, + LogLevel: logrus.InfoLevel, + LogFilePath: logFilePath, + LogFormat: context.GlobalString("log-format"), + } + if context.GlobalBool("debug") { + config.LogLevel = logrus.DebugLevel + } + + return config +} diff --git a/man/README.md b/man/README.md new file mode 100644 index 0000000..1d7a54f --- /dev/null +++ b/man/README.md @@ -0,0 +1,11 @@ +runc man pages +==================== + +This directory contains man pages for runc in markdown format. + +To generate man pages from it, use this command + + ./md2man-all.sh + +You will see man pages generated under the man8 directory. + diff --git a/man/md2man-all.sh b/man/md2man-all.sh new file mode 100755 index 0000000..f850ddf --- /dev/null +++ b/man/md2man-all.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +# get into this script's directory +cd "$(dirname "$(readlink -f "$BASH_SOURCE")")" + +[ "$1" = '-q' ] || { + set -x + pwd +} + +if ! ( which go-md2man &>/dev/null ); then + echo "To install man pages, please install 'go-md2man'." + exit 0 +fi + +for FILE in *.md; do + base="$(basename "$FILE")" + name="${base%.md}" + num="${name##*.}" + if [ -z "$num" -o "$name" = "$num" ]; then + # skip files that aren't of the format xxxx.N.md (like README.md) + continue + fi + mkdir -p "./man${num}" + go-md2man -in "$FILE" -out "./man${num}/${name}" +done diff --git a/man/runc-checkpoint.8.md b/man/runc-checkpoint.8.md new file mode 100644 index 0000000..08e6b1f --- /dev/null +++ b/man/runc-checkpoint.8.md @@ -0,0 +1,30 @@ +% runc-checkpoint "8" + +# NAME + runc checkpoint - checkpoint a running container + +# SYNOPSIS + runc checkpoint [command options] `` + +Where "``" is the name for the instance of the container to be +checkpointed. + +# DESCRIPTION + The checkpoint command saves the state of the container instance. + +# OPTIONS + --image-path value path for saving criu image files + --work-path value path for saving work files and logs + --parent-path value path for previous criu image files in pre-dump + --leave-running leave the process running after checkpointing + --tcp-established allow open tcp connections + --ext-unix-sk allow external unix sockets + --shell-job allow shell jobs + --lazy-pages use userfaultfd to lazily restore memory pages + --status-fd value criu writes \0 to this FD once lazy-pages is ready + --page-server value ADDRESS:PORT of the page server + --file-locks handle file locks, for safety + --pre-dump dump container's memory information only, leave the container running after this + --manage-cgroups-mode value cgroups mode: 'soft' (default), 'full' and 'strict' + --empty-ns value create a namespace, but don't restore its properties + --auto-dedup enable auto deduplication of memory images diff --git a/man/runc-create.8.md b/man/runc-create.8.md new file mode 100644 index 0000000..99c0a2c --- /dev/null +++ b/man/runc-create.8.md @@ -0,0 +1,29 @@ +% runc-create "8" + +# NAME + runc create - create a container + +# SYNOPSIS + runc create [command options] `` + +Where "``" is your name for the instance of the container that you +are starting. The name you provide for the container instance must be unique on +your host. + +# DESCRIPTION + The create command creates an instance of a container for a bundle. The bundle +is a directory with a specification file named "config.json" and a root +filesystem. + +The specification file includes an args parameter. The args parameter is used +to specify command(s) that get run when the container is started. To change the +command(s) that get executed on start, edit the args parameter of the spec. See +"runc spec --help" for more explanation. + +# OPTIONS + --bundle value, -b value path to the root of the bundle directory, defaults to the current directory + --console-socket value path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal + --pid-file value specify the file to write the process id to + --no-pivot do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk + --no-new-keyring do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key + --preserve-fds value Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total) (default: 0) diff --git a/man/runc-delete.8.md b/man/runc-delete.8.md new file mode 100644 index 0000000..84922a0 --- /dev/null +++ b/man/runc-delete.8.md @@ -0,0 +1,19 @@ +% runc-delete "8" + +# NAME + runc delete - delete any resources held by the container often used with detached container + +# SYNOPSIS + runc delete [command options] `` + +Where "``" is the name for the instance of the container. + +# OPTIONS + --force, -f Forcibly deletes the container if it is still running (uses SIGKILL) + +# EXAMPLE +For example, if the container id is "ubuntu01" and runc list currently shows the +status of "ubuntu01" as "stopped" the following will delete resources held for +"ubuntu01" removing "ubuntu01" from the runc list of containers: + + # runc delete ubuntu01 diff --git a/man/runc-events.8.md b/man/runc-events.8.md new file mode 100644 index 0000000..d998a38 --- /dev/null +++ b/man/runc-events.8.md @@ -0,0 +1,17 @@ +% runc-events "8" + +# NAME + runc events - display container events such as OOM notifications, cpu, memory, and IO usage statistics + +# SYNOPSIS + runc events [command options] `` + +Where "``" is the name for the instance of the container. + +# DESCRIPTION + The events command displays information about the container. By default the +information is displayed once every 5 seconds. + +# OPTIONS + --interval value set the stats collection interval (default: 5s) + --stats display the container's stats then exit diff --git a/man/runc-exec.8.md b/man/runc-exec.8.md new file mode 100644 index 0000000..dbaaefe --- /dev/null +++ b/man/runc-exec.8.md @@ -0,0 +1,33 @@ +% runc-exec "8" + +# NAME + runc exec - execute new process inside the container + +# SYNOPSIS + runc exec [command options] `` -- `` [args...] + +Where "``" is the name for the instance of the container and +"``" is the command to be executed in the container. + +# EXAMPLE +For example, if the container is configured to run the linux ps command the +following will output a list of processes running in the container: + + # runc exec ps + +# OPTIONS + --console value specify the pty slave path for use with the container + --cwd value current working directory in the container + --env value, -e value set environment variables + --tty, -t allocate a pseudo-TTY + --user value, -u value UID (format: [:]) + --additional-gids value, -g value additional gids + --process value, -p value path to the process.json + --detach, -d detach from the container's process + --pid-file value specify the file to write the process id to + --process-label value set the asm process label for the process commonly used with selinux + --apparmor value set the apparmor profile for the process + --no-new-privs set the no new privileges value for the process + --cap value, -c value add a capability to the bounding set for the process + --no-subreaper disable the use of the subreaper used to reap reparented processes + --preserve-fds value pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total) (default: 0) diff --git a/man/runc-kill.8.md b/man/runc-kill.8.md new file mode 100644 index 0000000..1ea579a --- /dev/null +++ b/man/runc-kill.8.md @@ -0,0 +1,20 @@ +% runc-kill "8" + +# NAME + runc kill - kill sends the specified signal (default: SIGTERM) to the container's init process + +# SYNOPSIS + runc kill [command options] `` `` + +Where "``" is the name for the instance of the container and +"``" is the signal to be sent to the init process. + +# OPTIONS + --all, -a send the specified signal to all processes inside the container + +# EXAMPLE + +For example, if the container id is "ubuntu01" the following will send a "KILL" +signal to the init process of the "ubuntu01" container: + + # runc kill ubuntu01 KILL diff --git a/man/runc-list.8.md b/man/runc-list.8.md new file mode 100644 index 0000000..46cd5d0 --- /dev/null +++ b/man/runc-list.8.md @@ -0,0 +1,21 @@ +% runc-list "8" + +# NAME + runc list - lists containers started by runc with the given root + +# SYNOPSIS + runc list [command options] + +# EXAMPLE +Where the given root is specified via the global option "--root" +(default: "/run/runc"). + +To list containers created via the default "--root": + # runc list + +To list containers created using a non-default value for "--root": + # runc --root value list + +# OPTIONS + --format value, -f value select one of: table or json (default: "table") + --quiet, -q display only container IDs diff --git a/man/runc-pause.8.md b/man/runc-pause.8.md new file mode 100644 index 0000000..965f7da --- /dev/null +++ b/man/runc-pause.8.md @@ -0,0 +1,14 @@ +% runc-pause "8" + +# NAME + runc pause - pause suspends all processes inside the container + +# SYNOPSIS + runc pause `` + +Where "``" is the name for the instance of the container to be +paused. + +# DESCRIPTION + The pause command suspends all processes in the instance of the container. +Use runc list to identify instances of containers and their current status. diff --git a/man/runc-ps.8.md b/man/runc-ps.8.md new file mode 100644 index 0000000..1fad467 --- /dev/null +++ b/man/runc-ps.8.md @@ -0,0 +1,15 @@ +% runc-ps "8" + +# NAME + runc ps - ps displays the processes running inside a container + +# SYNOPSIS + runc ps [command options] `` [ps options] + +# OPTIONS + --format value, -f value select one of: table(default) or json + +The default format is table. The following will output the processes of a container +in json format: + + # runc ps -f json diff --git a/man/runc-restore.8.md b/man/runc-restore.8.md new file mode 100644 index 0000000..e475bd5 --- /dev/null +++ b/man/runc-restore.8.md @@ -0,0 +1,28 @@ +% runc-restore "8" + +# NAME + runc restore - restore a container from a previous checkpoint + +# SYNOPSIS + runc restore [command options] `` + +Where "``" is the name for the instance of the container to be +restored. + +# DESCRIPTION + Restores the saved state of the container instance that was previously saved +using the runc checkpoint command. + +# OPTIONS + --image-path value path to criu image files for restoring + --work-path value path for saving work files and logs + --tcp-established allow open tcp connections + --ext-unix-sk allow external unix sockets + --shell-job allow shell jobs + --file-locks handle file locks, for safety + --manage-cgroups-mode value cgroups mode: 'soft' (default), 'full' and 'strict' + --bundle value, -b value path to the root of the bundle directory + --detach, -d detach from the container's process + --pid-file value specify the file to write the process id to + --no-subreaper disable the use of the subreaper used to reap reparented processes + --no-pivot do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk diff --git a/man/runc-resume.8.md b/man/runc-resume.8.md new file mode 100644 index 0000000..25d342f --- /dev/null +++ b/man/runc-resume.8.md @@ -0,0 +1,14 @@ +% runc-resume "8" + +# NAME + runc resume - resumes all processes that have been previously paused + +# SYNOPSIS + runc resume `` + +Where "``" is the name for the instance of the container to be +resumed. + +# DESCRIPTION + The resume command resumes all processes in the instance of the container. +Use runc list to identify instances of containers and their current status. diff --git a/man/runc-run.8.md b/man/runc-run.8.md new file mode 100644 index 0000000..ad2b8b2 --- /dev/null +++ b/man/runc-run.8.md @@ -0,0 +1,31 @@ +% runc-run "8" + +# NAME + runc run - create and run a container + +# SYNOPSIS + runc run [command options] `` + +Where "``" is your name for the instance of the container that you +are starting. The name you provide for the container instance must be unique on +your host. + +# DESCRIPTION + The run command creates an instance of a container for a bundle. The bundle +is a directory with a specification file named "config.json" and a root +filesystem. + +The specification file includes an args parameter. The args parameter is used +to specify command(s) that get run when the container is started. To change the +command(s) that get executed on start, edit the args parameter of the spec. See +"runc spec --help" for more explanation. + +# OPTIONS + --bundle value, -b value path to the root of the bundle directory, defaults to the current directory + --console-socket value path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal + --detach, -d detach from the container's process + --pid-file value specify the file to write the process id to + --no-subreaper disable the use of the subreaper used to reap reparented processes + --no-pivot do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk + --no-new-keyring do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key + --preserve-fds value Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total) (default: 0) diff --git a/man/runc-spec.8.md b/man/runc-spec.8.md new file mode 100644 index 0000000..6a181cd --- /dev/null +++ b/man/runc-spec.8.md @@ -0,0 +1,56 @@ +% runc-spec "8" + +# NAME + runc spec - create a new specification file + +# SYNOPSIS + runc spec [command options] [arguments...] + +# DESCRIPTION + The spec command creates the new specification file named "config.json" for +the bundle. + +The spec generated is just a starter file. Editing of the spec is required to +achieve desired results. For example, the newly generated spec includes an args +parameter that is initially set to call the "sh" command when the container is +started. Calling "sh" may work for an ubuntu container or busybox, but will not +work for containers that do not include the "sh" program. + +# EXAMPLE + To run docker's hello-world container one needs to set the args parameter +in the spec to call hello. This can be done using the sed command or a text +editor. The following commands create a bundle for hello-world, change the +default args parameter in the spec from "sh" to "/hello", then run the hello +command in a new hello-world container named container1: + + mkdir hello + cd hello + docker pull hello-world + docker export $(docker create hello-world) > hello-world.tar + mkdir rootfs + tar -C rootfs -xf hello-world.tar + runc spec + sed -i 's;"sh";"/hello";' config.json + runc start container1 + +In the start command above, "container1" is the name for the instance of the +container that you are starting. The name you provide for the container instance +must be unique on your host. + +An alternative for generating a customized spec config is to use "oci-runtime-tool", the +sub-command "oci-runtime-tool generate" has lots of options that can be used to do any +customizations as you want, see [runtime-tools](https://github.com/opencontainers/runtime-tools) +to get more information. + +When starting a container through runc, runc needs root privilege. If not +already running as root, you can use sudo to give runc root privilege. For +example: "sudo runc start container1" will give runc root privilege to start the +container on your host. + +Alternatively, you can start a rootless container, which has the ability to run without root privileges. +For this to work, the specification file needs to be adjusted accordingly. +You can pass the parameter **--rootless** to this command to generate a proper rootless spec file. + +# OPTIONS + --bundle value, -b value path to the root of the bundle directory + --rootless generate a configuration for a rootless container diff --git a/man/runc-start.8.md b/man/runc-start.8.md new file mode 100644 index 0000000..e4bbacc --- /dev/null +++ b/man/runc-start.8.md @@ -0,0 +1,14 @@ +% runc-start "8" + +# NAME + runc start - start executes the user defined process in a created container + +# SYNOPSIS + runc start `` + +Where "``" is your name for the instance of the container that you +are starting. The name you provide for the container instance must be unique on +your host. + +# DESCRIPTION + The start command executes the user defined process in a created container. diff --git a/man/runc-state.8.md b/man/runc-state.8.md new file mode 100644 index 0000000..768f79f --- /dev/null +++ b/man/runc-state.8.md @@ -0,0 +1,13 @@ +% runc-state "8" + +# NAME + runc state - output the state of a container + +# SYNOPSIS + runc state `` + +Where "``" is your name for the instance of the container. + +# DESCRIPTION + The state command outputs current state information for the +instance of a container. diff --git a/man/runc-update.8.md b/man/runc-update.8.md new file mode 100644 index 0000000..fa269d6 --- /dev/null +++ b/man/runc-update.8.md @@ -0,0 +1,55 @@ +% runc-update "8" + +# NAME + runc update - update container resource constraints + +# SYNOPSIS + runc update [command options] `` + +# DESCRIPTION + The data can be read from a file or the standard input, the +accepted format is as follow (unchanged values can be omitted): + + { + "memory": { + "limit": 0, + "reservation": 0, + "swap": 0, + "kernel": 0, + "kernelTCP": 0 + }, + "cpu": { + "shares": 0, + "quota": 0, + "period": 0, + "realtimeRuntime": 0, + "realtimePeriod": 0, + "cpus": "", + "mems": "" + }, + "blockIO": { + "blkioWeight": 0 + } + } + +Note: if data is to be read from a file or the standard input, all +other options are ignored. + +# OPTIONS + --resources value, -r value path to the file containing the resources to update or '-' to read from the standard input + --blkio-weight value Specifies per cgroup weight, range is from 10 to 1000 (default: 0) + --cpu-period value CPU CFS period to be used for hardcapping (in usecs). 0 to use system default + --cpu-quota value CPU CFS hardcap limit (in usecs). Allowed cpu time in a given period + --cpu-rt-period value CPU realtime period to be used for hardcapping (in usecs). 0 to use system default + --cpu-rt-runtime value CPU realtime hardcap limit (in usecs). Allowed cpu time in a given period + --cpu-share value CPU shares (relative weight vs. other containers) + --cpuset-cpus value CPU(s) to use + --cpuset-mems value Memory node(s) to use + --kernel-memory value Kernel memory limit (in bytes) + --kernel-memory-tcp value Kernel memory limit (in bytes) for tcp buffer + --memory value Memory limit (in bytes) + --memory-reservation value Memory reservation or soft_limit (in bytes) + --memory-swap value Total memory usage (memory + swap); set '-1' to enable unlimited swap + --pids-limit value Maximum number of pids allowed in the container (default: 0) + --l3-cache-schema The string of Intel RDT/CAT L3 cache schema + --mem-bw-schema The string of Intel RDT/MBA memory bandwidth schema diff --git a/man/runc.8.md b/man/runc.8.md new file mode 100644 index 0000000..49df525 --- /dev/null +++ b/man/runc.8.md @@ -0,0 +1,61 @@ +% runc "8" + +# NAME + runc - Open Container Initiative runtime + +# SYNOPSIS + runc [global options] command [command options] [arguments...] + +# DESCRIPTION +runc is a command line client for running applications packaged according to +the Open Container Initiative (OCI) format and is a compliant implementation of the +Open Container Initiative specification. + +runc integrates well with existing process supervisors to provide a production +container runtime environment for applications. It can be used with your +existing process monitoring tools and the container will be spawned as a +direct child of the process supervisor. + +Containers are configured using bundles. A bundle for a container is a directory +that includes a specification file named "config.json" and a root filesystem. +The root filesystem contains the contents of the container. + +To start a new instance of a container: + + # runc start [ -b bundle ] + +Where "``" is your name for the instance of the container that you +are starting. The name you provide for the container instance must be unique on +your host. Providing the bundle directory using "-b" is optional. The default +value for "bundle" is the current directory. + +# COMMANDS + checkpoint checkpoint a running container + create create a container + delete delete any resources held by the container often used with detached containers + events display container events such as OOM notifications, cpu, memory, IO and network stats + exec execute new process inside the container + init initialize the namespaces and launch the process (do not call it outside of runc) + kill kill sends the specified signal (default: SIGTERM) to the container's init process + list lists containers started by runc with the given root + pause pause suspends all processes inside the container + ps displays the processes running inside a container + restore restore a container from a previous checkpoint + resume resumes all processes that have been previously paused + run create and run a container + spec create a new specification file + start executes the user defined process in a created container + state output the state of a container + update update container resource constraints + help, h Shows a list of commands or help for one command + +# GLOBAL OPTIONS + --debug enable debug output for logging + --log value set the log file path where internal debug information is written (default: "/dev/null") + --log-format value set the format used by logs ('text' (default), or 'json') (default: "text") + --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc" or $XDG_RUNTIME_DIR/runc for rootless containers) + --criu value path to the criu binary used for checkpoint and restore (default: "criu") + --systemd-cgroup enable systemd cgroup support, expects cgroupsPath to be of form "slice:prefix:name" for e.g. "system.slice:runc:434234" + --rootless value enable rootless mode ('true', 'false', or 'auto') (default: "auto") + --help, -h show help + --version, -v print the version diff --git a/notify_socket.go b/notify_socket.go new file mode 100644 index 0000000..e7453c6 --- /dev/null +++ b/notify_socket.go @@ -0,0 +1,116 @@ +// +build linux + +package main + +import ( + "bytes" + "fmt" + "net" + "os" + "path/filepath" + + "github.com/opencontainers/runtime-spec/specs-go" + + "github.com/sirupsen/logrus" + "github.com/urfave/cli" +) + +type notifySocket struct { + socket *net.UnixConn + host string + socketPath string +} + +func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *notifySocket { + if notifySocketHost == "" { + return nil + } + + root := filepath.Join(context.GlobalString("root"), id) + path := filepath.Join(root, "notify.sock") + + notifySocket := ¬ifySocket{ + socket: nil, + host: notifySocketHost, + socketPath: path, + } + + return notifySocket +} + +func (s *notifySocket) Close() error { + return s.socket.Close() +} + +// If systemd is supporting sd_notify protocol, this function will add support +// for sd_notify protocol from within the container. +func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) { + mount := specs.Mount{Destination: s.host, Source: s.socketPath, Options: []string{"bind"}} + spec.Mounts = append(spec.Mounts, mount) + spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host)) +} + +func (s *notifySocket) setupSocket() error { + addr := net.UnixAddr{ + Name: s.socketPath, + Net: "unixgram", + } + + socket, err := net.ListenUnixgram("unixgram", &addr) + if err != nil { + return err + } + + err = os.Chmod(s.socketPath, 0777) + if err != nil { + socket.Close() + return err + } + + s.socket = socket + return nil +} + +// pid1 must be set only with -d, as it is used to set the new process as the main process +// for the service in systemd +func (s *notifySocket) run(pid1 int) { + buf := make([]byte, 512) + notifySocketHostAddr := net.UnixAddr{Name: s.host, Net: "unixgram"} + client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr) + if err != nil { + logrus.Error(err) + return + } + for { + r, err := s.socket.Read(buf) + if err != nil { + break + } + var out bytes.Buffer + for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) { + if bytes.HasPrefix(line, []byte("READY=")) { + _, err = out.Write(line) + if err != nil { + return + } + + _, err = out.Write([]byte{'\n'}) + if err != nil { + return + } + + _, err = client.Write(out.Bytes()) + if err != nil { + return + } + + // now we can inform systemd to use pid1 as the pid to monitor + if pid1 > 0 { + newPid := fmt.Sprintf("MAINPID=%d\n", pid1) + client.Write([]byte(newPid)) + } + return + } + } + } +} diff --git a/pause.go b/pause.go new file mode 100644 index 0000000..224c79f --- /dev/null +++ b/pause.go @@ -0,0 +1,66 @@ +// +build linux + +package main + +import ( + "github.com/sirupsen/logrus" + "github.com/urfave/cli" +) + +var pauseCommand = cli.Command{ + Name: "pause", + Usage: "pause suspends all processes inside the container", + ArgsUsage: ` + +Where "" is the name for the instance of the container to be +paused. `, + Description: `The pause command suspends all processes in the instance of the container. + +Use runc list to identify instances of containers and their current status.`, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, exactArgs); err != nil { + return err + } + rootlessCg, err := shouldUseRootlessCgroupManager(context) + if err != nil { + return err + } + if rootlessCg { + logrus.Warnf("runc pause may fail if you don't have the full access to cgroups") + } + container, err := getContainer(context) + if err != nil { + return err + } + return container.Pause() + }, +} + +var resumeCommand = cli.Command{ + Name: "resume", + Usage: "resumes all processes that have been previously paused", + ArgsUsage: ` + +Where "" is the name for the instance of the container to be +resumed.`, + Description: `The resume command resumes all processes in the instance of the container. + +Use runc list to identify instances of containers and their current status.`, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, exactArgs); err != nil { + return err + } + rootlessCg, err := shouldUseRootlessCgroupManager(context) + if err != nil { + return err + } + if rootlessCg { + logrus.Warn("runc resume may fail if you don't have the full access to cgroups") + } + container, err := getContainer(context) + if err != nil { + return err + } + return container.Resume() + }, +} diff --git a/ps.go b/ps.go new file mode 100644 index 0000000..e7f635f --- /dev/null +++ b/ps.go @@ -0,0 +1,113 @@ +// +build linux + +package main + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "strconv" + "strings" + + "github.com/sirupsen/logrus" + "github.com/urfave/cli" +) + +var psCommand = cli.Command{ + Name: "ps", + Usage: "ps displays the processes running inside a container", + ArgsUsage: ` [ps options]`, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "format, f", + Value: "table", + Usage: `select one of: ` + formatOptions, + }, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, minArgs); err != nil { + return err + } + rootlessCg, err := shouldUseRootlessCgroupManager(context) + if err != nil { + return err + } + if rootlessCg { + logrus.Warn("runc ps may fail if you don't have the full access to cgroups") + } + + container, err := getContainer(context) + if err != nil { + return err + } + + pids, err := container.Processes() + if err != nil { + return err + } + + switch context.String("format") { + case "table": + case "json": + return json.NewEncoder(os.Stdout).Encode(pids) + default: + return fmt.Errorf("invalid format option") + } + + // [1:] is to remove command name, ex: + // context.Args(): [containet_id ps_arg1 ps_arg2 ...] + // psArgs: [ps_arg1 ps_arg2 ...] + // + psArgs := context.Args()[1:] + if len(psArgs) == 0 { + psArgs = []string{"-ef"} + } + + cmd := exec.Command("ps", psArgs...) + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("%s: %s", err, output) + } + + lines := strings.Split(string(output), "\n") + pidIndex, err := getPidIndex(lines[0]) + if err != nil { + return err + } + + fmt.Println(lines[0]) + for _, line := range lines[1:] { + if len(line) == 0 { + continue + } + fields := strings.Fields(line) + p, err := strconv.Atoi(fields[pidIndex]) + if err != nil { + return fmt.Errorf("unexpected pid '%s': %s", fields[pidIndex], err) + } + + for _, pid := range pids { + if pid == p { + fmt.Println(line) + break + } + } + } + return nil + }, + SkipArgReorder: true, +} + +func getPidIndex(title string) (int, error) { + titles := strings.Fields(title) + + pidIndex := -1 + for i, name := range titles { + if name == "PID" { + return i, nil + } + } + + return pidIndex, fmt.Errorf("couldn't find PID field in ps output") +} diff --git a/restore.go b/restore.go new file mode 100644 index 0000000..53f50d2 --- /dev/null +++ b/restore.go @@ -0,0 +1,142 @@ +// +build linux + +package main + +import ( + "os" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/sirupsen/logrus" + "github.com/urfave/cli" +) + +var restoreCommand = cli.Command{ + Name: "restore", + Usage: "restore a container from a previous checkpoint", + ArgsUsage: ` + +Where "" is the name for the instance of the container to be +restored.`, + Description: `Restores the saved state of the container instance that was previously saved +using the runc checkpoint command.`, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "console-socket", + Value: "", + Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal", + }, + cli.StringFlag{ + Name: "image-path", + Value: "", + Usage: "path to criu image files for restoring", + }, + cli.StringFlag{ + Name: "work-path", + Value: "", + Usage: "path for saving work files and logs", + }, + cli.BoolFlag{ + Name: "tcp-established", + Usage: "allow open tcp connections", + }, + cli.BoolFlag{ + Name: "ext-unix-sk", + Usage: "allow external unix sockets", + }, + cli.BoolFlag{ + Name: "shell-job", + Usage: "allow shell jobs", + }, + cli.BoolFlag{ + Name: "file-locks", + Usage: "handle file locks, for safety", + }, + cli.StringFlag{ + Name: "manage-cgroups-mode", + Value: "", + Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'", + }, + cli.StringFlag{ + Name: "bundle, b", + Value: "", + Usage: "path to the root of the bundle directory", + }, + cli.BoolFlag{ + Name: "detach,d", + Usage: "detach from the container's process", + }, + cli.StringFlag{ + Name: "pid-file", + Value: "", + Usage: "specify the file to write the process id to", + }, + cli.BoolFlag{ + Name: "no-subreaper", + Usage: "disable the use of the subreaper used to reap reparented processes", + }, + cli.BoolFlag{ + Name: "no-pivot", + Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk", + }, + cli.StringSliceFlag{ + Name: "empty-ns", + Usage: "create a namespace, but don't restore its properties", + }, + cli.BoolFlag{ + Name: "auto-dedup", + Usage: "enable auto deduplication of memory images", + }, + cli.BoolFlag{ + Name: "lazy-pages", + Usage: "use userfaultfd to lazily restore memory pages", + }, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, exactArgs); err != nil { + return err + } + // XXX: Currently this is untested with rootless containers. + if os.Geteuid() != 0 || system.RunningInUserNS() { + logrus.Warn("runc checkpoint is untested with rootless containers") + } + + spec, err := setupSpec(context) + if err != nil { + return err + } + options := criuOptions(context) + if err := setEmptyNsMask(context, options); err != nil { + return err + } + status, err := startContainer(context, spec, CT_ACT_RESTORE, options) + if err != nil { + return err + } + // exit with the container's exit status so any external supervisor is + // notified of the exit with the correct exit status. + os.Exit(status) + return nil + }, +} + +func criuOptions(context *cli.Context) *libcontainer.CriuOpts { + imagePath := getCheckpointImagePath(context) + if err := os.MkdirAll(imagePath, 0755); err != nil { + fatal(err) + } + return &libcontainer.CriuOpts{ + ImagesDirectory: imagePath, + WorkDirectory: context.String("work-path"), + ParentImage: context.String("parent-path"), + LeaveRunning: context.Bool("leave-running"), + TcpEstablished: context.Bool("tcp-established"), + ExternalUnixConnections: context.Bool("ext-unix-sk"), + ShellJob: context.Bool("shell-job"), + FileLocks: context.Bool("file-locks"), + PreDump: context.Bool("pre-dump"), + AutoDedup: context.Bool("auto-dedup"), + LazyPages: context.Bool("lazy-pages"), + StatusFd: context.String("status-fd"), + } +} diff --git a/rlimit_linux.go b/rlimit_linux.go new file mode 100644 index 0000000..c97a0fb --- /dev/null +++ b/rlimit_linux.go @@ -0,0 +1,49 @@ +package main + +import "fmt" + +const ( + RLIMIT_CPU = iota // CPU time in sec + RLIMIT_FSIZE // Maximum filesize + RLIMIT_DATA // max data size + RLIMIT_STACK // max stack size + RLIMIT_CORE // max core file size + RLIMIT_RSS // max resident set size + RLIMIT_NPROC // max number of processes + RLIMIT_NOFILE // max number of open files + RLIMIT_MEMLOCK // max locked-in-memory address space + RLIMIT_AS // address space limit + RLIMIT_LOCKS // maximum file locks held + RLIMIT_SIGPENDING // max number of pending signals + RLIMIT_MSGQUEUE // maximum bytes in POSIX mqueues + RLIMIT_NICE // max nice prio allowed to raise to + RLIMIT_RTPRIO // maximum realtime priority + RLIMIT_RTTIME // timeout for RT tasks in us +) + +var rlimitMap = map[string]int{ + "RLIMIT_CPU": RLIMIT_CPU, + "RLIMIT_FSIZE": RLIMIT_FSIZE, + "RLIMIT_DATA": RLIMIT_DATA, + "RLIMIT_STACK": RLIMIT_STACK, + "RLIMIT_CORE": RLIMIT_CORE, + "RLIMIT_RSS": RLIMIT_RSS, + "RLIMIT_NPROC": RLIMIT_NPROC, + "RLIMIT_NOFILE": RLIMIT_NOFILE, + "RLIMIT_MEMLOCK": RLIMIT_MEMLOCK, + "RLIMIT_AS": RLIMIT_AS, + "RLIMIT_LOCKS": RLIMIT_LOCKS, + "RLIMIT_SIGPENDING": RLIMIT_SIGPENDING, + "RLIMIT_MSGQUEUE": RLIMIT_MSGQUEUE, + "RLIMIT_NICE": RLIMIT_NICE, + "RLIMIT_RTPRIO": RLIMIT_RTPRIO, + "RLIMIT_RTTIME": RLIMIT_RTTIME, +} + +func strToRlimit(key string) (int, error) { + rl, ok := rlimitMap[key] + if !ok { + return 0, fmt.Errorf("wrong rlimit value: %s", key) + } + return rl, nil +} diff --git a/rootless_linux.go b/rootless_linux.go new file mode 100644 index 0000000..3c425dc --- /dev/null +++ b/rootless_linux.go @@ -0,0 +1,58 @@ +// +build linux + +package main + +import ( + "os" + + "github.com/opencontainers/runc/libcontainer/system" + "github.com/urfave/cli" +) + +func shouldUseRootlessCgroupManager(context *cli.Context) (bool, error) { + if context != nil { + b, err := parseBoolOrAuto(context.GlobalString("rootless")) + if err != nil { + return false, err + } + // nil b stands for "auto detect" + if b != nil { + return *b, nil + } + + if context.GlobalBool("systemd-cgroup") { + return false, nil + } + } + if os.Geteuid() != 0 { + return true, nil + } + if !system.RunningInUserNS() { + // euid == 0 , in the initial ns (i.e. the real root) + return false, nil + } + // euid = 0, in a userns. + // As we are unaware of cgroups path, we can't determine whether we have the full + // access to the cgroups path. + // Either way, we can safely decide to use the rootless cgroups manager. + return true, nil +} + +func shouldHonorXDGRuntimeDir() bool { + if os.Getenv("XDG_RUNTIME_DIR") == "" { + return false + } + if os.Geteuid() != 0 { + return true + } + if !system.RunningInUserNS() { + // euid == 0 , in the initial ns (i.e. the real root) + // in this case, we should use /run/runc and ignore + // $XDG_RUNTIME_DIR (e.g. /run/user/0) for backward + // compatibility. + return false + } + // euid = 0, in a userns. + u, ok := os.LookupEnv("USER") + return !ok || u != "root" +} diff --git a/run.go b/run.go new file mode 100644 index 0000000..f8d6317 --- /dev/null +++ b/run.go @@ -0,0 +1,84 @@ +// +build linux + +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +// default action is to start a container +var runCommand = cli.Command{ + Name: "run", + Usage: "create and run a container", + ArgsUsage: ` + +Where "" is your name for the instance of the container that you +are starting. The name you provide for the container instance must be unique on +your host.`, + Description: `The run command creates an instance of a container for a bundle. The bundle +is a directory with a specification file named "` + specConfig + `" and a root +filesystem. + +The specification file includes an args parameter. The args parameter is used +to specify command(s) that get run when the container is started. To change the +command(s) that get executed on start, edit the args parameter of the spec. See +"runc spec --help" for more explanation.`, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "bundle, b", + Value: "", + Usage: `path to the root of the bundle directory, defaults to the current directory`, + }, + cli.StringFlag{ + Name: "console-socket", + Value: "", + Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal", + }, + cli.BoolFlag{ + Name: "detach, d", + Usage: "detach from the container's process", + }, + cli.StringFlag{ + Name: "pid-file", + Value: "", + Usage: "specify the file to write the process id to", + }, + cli.BoolFlag{ + Name: "no-subreaper", + Usage: "disable the use of the subreaper used to reap reparented processes", + }, + cli.BoolFlag{ + Name: "no-pivot", + Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk", + }, + cli.BoolFlag{ + Name: "no-new-keyring", + Usage: "do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key", + }, + cli.IntFlag{ + Name: "preserve-fds", + Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)", + }, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, exactArgs); err != nil { + return err + } + if err := revisePidFile(context); err != nil { + return err + } + spec, err := setupSpec(context) + if err != nil { + return err + } + status, err := startContainer(context, spec, CT_ACT_RUN, nil) + if err == nil { + // exit with the container's exit status so any external supervisor is + // notified of the exit with the correct exit status. + os.Exit(status) + } + return err + }, +} diff --git a/script/.validate b/script/.validate new file mode 100644 index 0000000..170d674 --- /dev/null +++ b/script/.validate @@ -0,0 +1,33 @@ +#!/bin/bash + +if [ -z "$VALIDATE_UPSTREAM" ]; then + # this is kind of an expensive check, so let's not do this twice if we + # are running more than one validate bundlescript + + VALIDATE_REPO='https://github.com/opencontainers/runc.git' + VALIDATE_BRANCH='master' + + if [ "$TRAVIS" = 'true' -a "$TRAVIS_PULL_REQUEST" != 'false' ]; then + VALIDATE_REPO="https://github.com/${TRAVIS_REPO_SLUG}.git" + VALIDATE_BRANCH="${TRAVIS_BRANCH}" + fi + + VALIDATE_HEAD="$(git rev-parse --verify HEAD)" + + git fetch -q "$VALIDATE_REPO" "refs/heads/$VALIDATE_BRANCH" + VALIDATE_UPSTREAM="$(git rev-parse --verify FETCH_HEAD)" + + VALIDATE_COMMIT_LOG="$VALIDATE_UPSTREAM..$VALIDATE_HEAD" + VALIDATE_COMMIT_DIFF="$VALIDATE_UPSTREAM...$VALIDATE_HEAD" + + validate_diff() { + if [ "$VALIDATE_UPSTREAM" != "$VALIDATE_HEAD" ]; then + git diff "$VALIDATE_COMMIT_DIFF" "$@" + fi + } + validate_log() { + if [ "$VALIDATE_UPSTREAM" != "$VALIDATE_HEAD" ]; then + git log "$VALIDATE_COMMIT_LOG" "$@" + fi + } +fi diff --git a/script/check-config.sh b/script/check-config.sh new file mode 100755 index 0000000..6b8158e --- /dev/null +++ b/script/check-config.sh @@ -0,0 +1,253 @@ +#!/usr/bin/env bash +set -e + +# bits of this were adapted from check_config.sh in docker +# see also https://github.com/docker/docker/blob/master/contrib/check-config.sh + +possibleConfigs=( + '/proc/config.gz' + "/boot/config-$(uname -r)" + "/usr/src/linux-$(uname -r)/.config" + '/usr/src/linux/.config' +) +possibleConfigFiles=( + 'config.gz' + "config-$(uname -r)" + '.config' +) + +if ! command -v zgrep &>/dev/null; then + zgrep() { + zcat "$2" | grep "$1" + } +fi + +kernelVersion="$(uname -r)" +kernelMajor="${kernelVersion%%.*}" +kernelMinor="${kernelVersion#$kernelMajor.}" +kernelMinor="${kernelMinor%%.*}" + +is_set() { + zgrep "CONFIG_$1=[y|m]" "$CONFIG" >/dev/null +} +is_set_in_kernel() { + zgrep "CONFIG_$1=y" "$CONFIG" >/dev/null +} +is_set_as_module() { + zgrep "CONFIG_$1=m" "$CONFIG" >/dev/null +} + +color() { + local codes=() + if [ "$1" = 'bold' ]; then + codes=("${codes[@]}" '1') + shift + fi + if [ "$#" -gt 0 ]; then + local code + case "$1" in + # see https://en.wikipedia.org/wiki/ANSI_escape_code#Colors + black) code=30 ;; + red) code=31 ;; + green) code=32 ;; + yellow) code=33 ;; + blue) code=34 ;; + magenta) code=35 ;; + cyan) code=36 ;; + white) code=37 ;; + esac + if [ "$code" ]; then + codes=("${codes[@]}" "$code") + fi + fi + local IFS=';' + echo -en '\033['"${codes[*]}"'m' +} +wrap_color() { + text="$1" + shift + color "$@" + echo -n "$text" + color reset + echo +} + +wrap_good() { + echo "$(wrap_color "$1" white): $(wrap_color "$2" green)" +} +wrap_bad() { + echo "$(wrap_color "$1" bold): $(wrap_color "$2" bold red)" +} +wrap_warning() { + wrap_color >&2 "$*" red +} + +check_flag() { + if is_set_in_kernel "$1"; then + wrap_good "CONFIG_$1" 'enabled' + elif is_set_as_module "$1"; then + wrap_good "CONFIG_$1" 'enabled (as module)' + else + wrap_bad "CONFIG_$1" 'missing' + fi +} + +check_flags() { + for flag in "$@"; do + echo "- $(check_flag "$flag")" + done +} + +check_distro_userns() { + source /etc/os-release 2>/dev/null || /bin/true + if [[ "${ID}" =~ ^(centos|rhel)$ && "${VERSION_ID}" =~ ^7 ]]; then + # this is a CentOS7 or RHEL7 system + grep -q "user_namespace.enable=1" /proc/cmdline || { + # no user namespace support enabled + wrap_bad " (RHEL7/CentOS7" "User namespaces disabled; add 'user_namespace.enable=1' to boot command line)" + } + fi +} + +is_config() { + local config="$1" + + # Todo: more check + [[ -f "$config" ]] && return 0 + return 1 +} + +search_config() { + local target_dir="$1" + [[ "$target_dir" ]] || target_dir=("${possibleConfigs[@]}") + + local tryConfig + for tryConfig in "${target_dir[@]}"; do + is_config "$tryConfig" && { + CONFIG="$tryConfig" + return + } + [[ -d "$tryConfig" ]] && { + for tryFile in "${possibleConfigFiles[@]}"; do + is_config "$tryConfig/$tryFile" && { + CONFIG="$tryConfig/$tryFile" + return + } + done + } + done + + wrap_warning "error: cannot find kernel config" + wrap_warning " try running this script again, specifying the kernel config:" + wrap_warning " CONFIG=/path/to/kernel/.config $0 or $0 /path/to/kernel/.config" + exit 1 +} + +CONFIG="$1" + +is_config "$CONFIG" || { + if [[ ! "$CONFIG" ]]; then + wrap_color "info: no config specified, searching for kernel config ..." white + search_config + elif [[ -d "$CONFIG" ]]; then + wrap_color "info: input is a directory, searching for kernel config in this directory..." white + search_config "$CONFIG" + else + wrap_warning "warning: $CONFIG seems not a kernel config, searching other paths for kernel config ..." + search_config + fi +} + +wrap_color "info: reading kernel config from $CONFIG ..." white +echo + +echo 'Generally Necessary:' + +echo -n '- ' +cgroupSubsystemDir="$(awk '/[, ](cpu|cpuacct|cpuset|devices|freezer|memory)[, ]/ && $3 == "cgroup" { print $2 }' /proc/mounts | head -n1)" +cgroupDir="$(dirname "$cgroupSubsystemDir")" +if [ -d "$cgroupDir/cpu" -o -d "$cgroupDir/cpuacct" -o -d "$cgroupDir/cpuset" -o -d "$cgroupDir/devices" -o -d "$cgroupDir/freezer" -o -d "$cgroupDir/memory" ]; then + echo "$(wrap_good 'cgroup hierarchy' 'properly mounted') [$cgroupDir]" +else + if [ "$cgroupSubsystemDir" ]; then + echo "$(wrap_bad 'cgroup hierarchy' 'single mountpoint!') [$cgroupSubsystemDir]" + else + echo "$(wrap_bad 'cgroup hierarchy' 'nonexistent??')" + fi + echo " $(wrap_color '(see https://github.com/tianon/cgroupfs-mount)' yellow)" +fi + +if [ "$(cat /sys/module/apparmor/parameters/enabled 2>/dev/null)" = 'Y' ]; then + echo -n '- ' + if command -v apparmor_parser &>/dev/null; then + echo "$(wrap_good 'apparmor' 'enabled and tools installed')" + else + echo "$(wrap_bad 'apparmor' 'enabled, but apparmor_parser missing')" + echo -n ' ' + if command -v apt-get &>/dev/null; then + echo "$(wrap_color '(use "apt-get install apparmor" to fix this)')" + elif command -v yum &>/dev/null; then + echo "$(wrap_color '(your best bet is "yum install apparmor-parser")')" + else + echo "$(wrap_color '(look for an "apparmor" package for your distribution)')" + fi + fi +fi + +flags=( + NAMESPACES {NET,PID,IPC,UTS}_NS + CGROUPS CGROUP_CPUACCT CGROUP_DEVICE CGROUP_FREEZER CGROUP_SCHED CPUSETS MEMCG + KEYS + VETH BRIDGE BRIDGE_NETFILTER + NF_NAT_IPV4 IP_NF_FILTER IP_NF_TARGET_MASQUERADE + NETFILTER_XT_MATCH_{ADDRTYPE,CONNTRACK,IPVS} + IP_NF_NAT NF_NAT NF_NAT_NEEDED + + # required for bind-mounting /dev/mqueue into containers + POSIX_MQUEUE +) +check_flags "${flags[@]}" +echo + +echo 'Optional Features:' +{ + check_flags USER_NS + check_distro_userns + + check_flags SECCOMP + check_flags CGROUP_PIDS + + check_flags MEMCG_SWAP MEMCG_SWAP_ENABLED + if is_set MEMCG_SWAP && ! is_set MEMCG_SWAP_ENABLED; then + echo " $(wrap_color '(note that cgroup swap accounting is not enabled in your kernel config, you can enable it by setting boot option "swapaccount=1")' bold black)" + fi +} + +if [ "$kernelMajor" -lt 4 ] || [ "$kernelMajor" -eq 4 -a "$kernelMinor" -le 5 ]; then + check_flags MEMCG_KMEM +fi + +if [ "$kernelMajor" -lt 3 ] || [ "$kernelMajor" -eq 3 -a "$kernelMinor" -le 18 ]; then + check_flags RESOURCE_COUNTERS +fi + +if [ "$kernelMajor" -lt 3 ] || [ "$kernelMajor" -eq 3 -a "$kernelMinor" -le 13 ]; then + netprio=NETPRIO_CGROUP +else + netprio=CGROUP_NET_PRIO +fi + +flags=( + BLK_CGROUP BLK_DEV_THROTTLING IOSCHED_CFQ CFQ_GROUP_IOSCHED + CGROUP_PERF + CGROUP_HUGETLB + NET_CLS_CGROUP $netprio + CFS_BANDWIDTH FAIR_GROUP_SCHED RT_GROUP_SCHED + IP_NF_TARGET_REDIRECT + IP_VS + IP_VS_NFCT + IP_VS_PROTO_TCP + IP_VS_PROTO_UDP + IP_VS_RR +) +check_flags "${flags[@]}" diff --git a/script/release.sh b/script/release.sh new file mode 100755 index 0000000..a1ebc95 --- /dev/null +++ b/script/release.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# Copyright (C) 2017 SUSE LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +## ---> +# Project-specific options and functions. In *theory* you shouldn't need to +# touch anything else in this script in order to use this elsewhere. +project="runc" +root="$(readlink -f "$(dirname "${BASH_SOURCE}")/..")" + +# This function takes an output path as an argument, where the built +# (preferably static) binary should be placed. +function build_project() { + builddir="$(dirname "$1")" + + # Build with all tags enabled. + make -C "$root" COMMIT_NO= BUILDTAGS="seccomp selinux apparmor" static + mv "$root/$project" "$1" +} + +# End of the easy-to-configure portion. +## <--- + +# Print usage information. +function usage() { + echo "usage: release.sh [-S ] [-c ] [-r ] [-v ]" >&2 + exit 1 +} + +# Log something to stderr. +function log() { + echo "[*] $*" >&2 +} + +# Log something to stderr and then exit with 0. +function bail() { + log "$@" + exit 0 +} + +# Conduct a sanity-check to make sure that GPG provided with the given +# arguments can sign something. Inability to sign things is not a fatal error. +function gpg_cansign() { + gpg "$@" --clear-sign /dev/null +} + +# When creating releases we need to build static binaries, an archive of the +# current commit, and generate detached signatures for both. +keyid="" +commit="HEAD" +version="" +releasedir="" +hashcmd="" +while getopts "S:c:r:v:h:" opt; do + case "$opt" in + S) + keyid="$OPTARG" + ;; + c) + commit="$OPTARG" + ;; + r) + releasedir="$OPTARG" + ;; + v) + version="$OPTARG" + ;; + h) + hashcmd="$OPTARG" + ;; + \:) + echo "Missing argument: -$OPTARG" >&2 + usage + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + usage + ;; + esac +done + +version="${version:-$(<"$root/VERSION")}" +releasedir="${releasedir:-release/$version}" +hashcmd="${hashcmd:-sha256sum}" +goarch="$(go env GOARCH || echo "amd64")" + +log "creating $project release in '$releasedir'" +log " version: $version" +log " commit: $commit" +log " key: ${keyid:-DEFAULT}" +log " hash: $hashcmd" + +# Make explicit what we're doing. +set -x + +# Make the release directory. +rm -rf "$releasedir" && mkdir -p "$releasedir" + +# Build project. +build_project "$releasedir/$project.$goarch" + +# Generate new archive. +git archive --format=tar --prefix="$project-$version/" "$commit" | xz > "$releasedir/$project.tar.xz" + +# Generate sha256 checksums for both. +( cd "$releasedir" ; "$hashcmd" "$project".{"$goarch",tar.xz} > "$project.$hashcmd" ; ) + +# Set up the gpgflags. +[[ "$keyid" ]] && export gpgflags="--default-key $keyid" +gpg_cansign $gpgflags || bail "Could not find suitable GPG key, skipping signing step." + +# Sign everything. +gpg $gpgflags --detach-sign --armor "$releasedir/$project.$goarch" +gpg $gpgflags --detach-sign --armor "$releasedir/$project.tar.xz" +gpg $gpgflags --clear-sign --armor \ + --output "$releasedir/$project.$hashcmd"{.tmp,} && \ + mv "$releasedir/$project.$hashcmd"{.tmp,} diff --git a/script/tmpmount b/script/tmpmount new file mode 100755 index 0000000..5ac6bc2 --- /dev/null +++ b/script/tmpmount @@ -0,0 +1,4 @@ +#!/bin/bash + +mount -t tmpfs none /tmp +exec "$@" diff --git a/script/validate-c b/script/validate-c new file mode 100755 index 0000000..7c01b51 --- /dev/null +++ b/script/validate-c @@ -0,0 +1,42 @@ +#!/bin/bash + +source "$(dirname "$BASH_SOURCE")/.validate" + +IFS=$'\n' +files=($(validate_diff --diff-filter=ACMR --name-only -- '*.c' | grep -v '^vendor/' || true)) +unset IFS + +# indent(1): "You must use the ‘-T’ option to tell indent the name of all the typenames in your program that are defined by typedef." +INDENT="indent -linux -l120 -T size_t -T jmp_buf" +if [ -z "$(indent --version 2>&1 | grep GNU)" ]; then + echo "Skipping C indentation checks, as GNU indent is not installed." + exit 0 +fi + +badFiles=() +for f in "${files[@]}"; do + orig=$(mktemp) + formatted=$(mktemp) + # we use "git show" here to validate that what's committed is formatted + git show "$VALIDATE_HEAD:$f" > ${orig} + ${INDENT} ${orig} -o ${formatted} + if [ "$(diff -u ${orig} ${formatted})" ]; then + badFiles+=("$f") + fi + rm -f ${orig} ${formatted} +done + +if [ ${#badFiles[@]} -eq 0 ]; then + echo 'Congratulations! All C source files are properly formatted.' +else + { + echo "These files are not properly formatted:" + for f in "${badFiles[@]}"; do + echo " - $f" + done + echo + echo "Please reformat the above files using \"${INDENT}\" and commit the result." + echo + } >&2 + false +fi diff --git a/script/validate-gofmt b/script/validate-gofmt new file mode 100755 index 0000000..8337ed2 --- /dev/null +++ b/script/validate-gofmt @@ -0,0 +1,30 @@ +#!/bin/bash + +source "$(dirname "$BASH_SOURCE")/.validate" + +IFS=$'\n' +files=($(validate_diff --diff-filter=ACMR --name-only -- '*.go' | grep -v '^vendor/' || true)) +unset IFS + +badFiles=() +for f in "${files[@]}"; do + # we use "git show" here to validate that what's committed is formatted + if [ "$(git show "$VALIDATE_HEAD:$f" | gofmt -s -l)" ]; then + badFiles+=("$f") + fi +done + +if [ ${#badFiles[@]} -eq 0 ]; then + echo 'Congratulations! All Go source files are properly formatted.' +else + { + echo "These files are not properly gofmt'd:" + for f in "${badFiles[@]}"; do + echo " - $f" + done + echo + echo 'Please reformat the above files using "gofmt -s -w" and commit the result.' + echo + } >&2 + false +fi diff --git a/signalmap.go b/signalmap.go new file mode 100644 index 0000000..f9a6347 --- /dev/null +++ b/signalmap.go @@ -0,0 +1,47 @@ +// +build linux +// +build !mips,!mipsle,!mips64,!mips64le + +package main + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +var signalMap = map[string]syscall.Signal{ + "ABRT": unix.SIGABRT, + "ALRM": unix.SIGALRM, + "BUS": unix.SIGBUS, + "CHLD": unix.SIGCHLD, + "CLD": unix.SIGCLD, + "CONT": unix.SIGCONT, + "FPE": unix.SIGFPE, + "HUP": unix.SIGHUP, + "ILL": unix.SIGILL, + "INT": unix.SIGINT, + "IO": unix.SIGIO, + "IOT": unix.SIGIOT, + "KILL": unix.SIGKILL, + "PIPE": unix.SIGPIPE, + "POLL": unix.SIGPOLL, + "PROF": unix.SIGPROF, + "PWR": unix.SIGPWR, + "QUIT": unix.SIGQUIT, + "SEGV": unix.SIGSEGV, + "STKFLT": unix.SIGSTKFLT, + "STOP": unix.SIGSTOP, + "SYS": unix.SIGSYS, + "TERM": unix.SIGTERM, + "TRAP": unix.SIGTRAP, + "TSTP": unix.SIGTSTP, + "TTIN": unix.SIGTTIN, + "TTOU": unix.SIGTTOU, + "URG": unix.SIGURG, + "USR1": unix.SIGUSR1, + "USR2": unix.SIGUSR2, + "VTALRM": unix.SIGVTALRM, + "WINCH": unix.SIGWINCH, + "XCPU": unix.SIGXCPU, + "XFSZ": unix.SIGXFSZ, +} diff --git a/signalmap_mipsx.go b/signalmap_mipsx.go new file mode 100644 index 0000000..046bf15 --- /dev/null +++ b/signalmap_mipsx.go @@ -0,0 +1,45 @@ +// +build linux,mips linux,mipsle linux,mips64 linux,mips64le + +package main + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +var signalMap = map[string]syscall.Signal{ + "ABRT": unix.SIGABRT, + "ALRM": unix.SIGALRM, + "BUS": unix.SIGBUS, + "CHLD": unix.SIGCHLD, + "CLD": unix.SIGCLD, + "CONT": unix.SIGCONT, + "FPE": unix.SIGFPE, + "HUP": unix.SIGHUP, + "ILL": unix.SIGILL, + "INT": unix.SIGINT, + "IO": unix.SIGIO, + "IOT": unix.SIGIOT, + "KILL": unix.SIGKILL, + "PIPE": unix.SIGPIPE, + "POLL": unix.SIGPOLL, + "PROF": unix.SIGPROF, + "PWR": unix.SIGPWR, + "QUIT": unix.SIGQUIT, + "SEGV": unix.SIGSEGV, + "STOP": unix.SIGSTOP, + "SYS": unix.SIGSYS, + "TERM": unix.SIGTERM, + "TRAP": unix.SIGTRAP, + "TSTP": unix.SIGTSTP, + "TTIN": unix.SIGTTIN, + "TTOU": unix.SIGTTOU, + "URG": unix.SIGURG, + "USR1": unix.SIGUSR1, + "USR2": unix.SIGUSR2, + "VTALRM": unix.SIGVTALRM, + "WINCH": unix.SIGWINCH, + "XCPU": unix.SIGXCPU, + "XFSZ": unix.SIGXFSZ, +} diff --git a/signals.go b/signals.go new file mode 100644 index 0000000..b67f65a --- /dev/null +++ b/signals.go @@ -0,0 +1,139 @@ +// +build linux + +package main + +import ( + "os" + "os/signal" + "syscall" // only for Signal + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/utils" + + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +const signalBufferSize = 2048 + +// newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals +// while still forwarding all other signals to the process. +// If notifySocket is present, use it to read systemd notifications from the container and +// forward them to notifySocketHost. +func newSignalHandler(enableSubreaper bool, notifySocket *notifySocket) *signalHandler { + if enableSubreaper { + // set us as the subreaper before registering the signal handler for the container + if err := system.SetSubreaper(1); err != nil { + logrus.Warn(err) + } + } + // ensure that we have a large buffer size so that we do not miss any signals + // in case we are not processing them fast enough. + s := make(chan os.Signal, signalBufferSize) + // handle all signals for the process. + signal.Notify(s) + return &signalHandler{ + signals: s, + notifySocket: notifySocket, + } +} + +// exit models a process exit status with the pid and +// exit status. +type exit struct { + pid int + status int +} + +type signalHandler struct { + signals chan os.Signal + notifySocket *notifySocket +} + +// forward handles the main signal event loop forwarding, resizing, or reaping depending +// on the signal received. +func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach bool) (int, error) { + // make sure we know the pid of our main process so that we can return + // after it dies. + if detach && h.notifySocket == nil { + return 0, nil + } + + pid1, err := process.Pid() + if err != nil { + return -1, err + } + + if h.notifySocket != nil { + if detach { + h.notifySocket.run(pid1) + return 0, nil + } + go h.notifySocket.run(0) + } + + // Perform the initial tty resize. Always ignore errors resizing because + // stdout might have disappeared (due to races with when SIGHUP is sent). + _ = tty.resize() + // Handle and forward signals. + for s := range h.signals { + switch s { + case unix.SIGWINCH: + // Ignore errors resizing, as above. + _ = tty.resize() + case unix.SIGCHLD: + exits, err := h.reap() + if err != nil { + logrus.Error(err) + } + for _, e := range exits { + logrus.WithFields(logrus.Fields{ + "pid": e.pid, + "status": e.status, + }).Debug("process exited") + if e.pid == pid1 { + // call Wait() on the process even though we already have the exit + // status because we must ensure that any of the go specific process + // fun such as flushing pipes are complete before we return. + process.Wait() + if h.notifySocket != nil { + h.notifySocket.Close() + } + return e.status, nil + } + } + default: + logrus.Debugf("sending signal to process %s", s) + if err := unix.Kill(pid1, s.(syscall.Signal)); err != nil { + logrus.Error(err) + } + } + } + return -1, nil +} + +// reap runs wait4 in a loop until we have finished processing any existing exits +// then returns all exits to the main event loop for further processing. +func (h *signalHandler) reap() (exits []exit, err error) { + var ( + ws unix.WaitStatus + rus unix.Rusage + ) + for { + pid, err := unix.Wait4(-1, &ws, unix.WNOHANG, &rus) + if err != nil { + if err == unix.ECHILD { + return exits, nil + } + return nil, err + } + if pid <= 0 { + return exits, nil + } + exits = append(exits, exit{ + pid: pid, + status: utils.ExitStatus(ws), + }) + } +} diff --git a/spec.go b/spec.go new file mode 100644 index 0000000..322a83d --- /dev/null +++ b/spec.go @@ -0,0 +1,145 @@ +// +build linux + +package main + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/specconv" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/urfave/cli" +) + +var specCommand = cli.Command{ + Name: "spec", + Usage: "create a new specification file", + ArgsUsage: "", + Description: `The spec command creates the new specification file named "` + specConfig + `" for +the bundle. + +The spec generated is just a starter file. Editing of the spec is required to +achieve desired results. For example, the newly generated spec includes an args +parameter that is initially set to call the "sh" command when the container is +started. Calling "sh" may work for an ubuntu container or busybox, but will not +work for containers that do not include the "sh" program. + +EXAMPLE: + To run docker's hello-world container one needs to set the args parameter +in the spec to call hello. This can be done using the sed command or a text +editor. The following commands create a bundle for hello-world, change the +default args parameter in the spec from "sh" to "/hello", then run the hello +command in a new hello-world container named container1: + + mkdir hello + cd hello + docker pull hello-world + docker export $(docker create hello-world) > hello-world.tar + mkdir rootfs + tar -C rootfs -xf hello-world.tar + runc spec + sed -i 's;"sh";"/hello";' ` + specConfig + ` + runc run container1 + +In the run command above, "container1" is the name for the instance of the +container that you are starting. The name you provide for the container instance +must be unique on your host. + +An alternative for generating a customized spec config is to use "oci-runtime-tool", the +sub-command "oci-runtime-tool generate" has lots of options that can be used to do any +customizations as you want, see runtime-tools (https://github.com/opencontainers/runtime-tools) +to get more information. + +When starting a container through runc, runc needs root privilege. If not +already running as root, you can use sudo to give runc root privilege. For +example: "sudo runc start container1" will give runc root privilege to start the +container on your host. + +Alternatively, you can start a rootless container, which has the ability to run +without root privileges. For this to work, the specification file needs to be +adjusted accordingly. You can pass the parameter --rootless to this command to +generate a proper rootless spec file. + +Note that --rootless is not needed when you execute runc as the root in a user namespace +created by an unprivileged user. +`, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "bundle, b", + Value: "", + Usage: "path to the root of the bundle directory", + }, + cli.BoolFlag{ + Name: "rootless", + Usage: "generate a configuration for a rootless container", + }, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 0, exactArgs); err != nil { + return err + } + spec := specconv.Example() + + rootless := context.Bool("rootless") + if rootless { + specconv.ToRootless(spec) + } + + checkNoFile := func(name string) error { + _, err := os.Stat(name) + if err == nil { + return fmt.Errorf("File %s exists. Remove it first", name) + } + if !os.IsNotExist(err) { + return err + } + return nil + } + bundle := context.String("bundle") + if bundle != "" { + if err := os.Chdir(bundle); err != nil { + return err + } + } + if err := checkNoFile(specConfig); err != nil { + return err + } + data, err := json.MarshalIndent(spec, "", "\t") + if err != nil { + return err + } + return ioutil.WriteFile(specConfig, data, 0666) + }, +} + +// loadSpec loads the specification from the provided path. +func loadSpec(cPath string) (spec *specs.Spec, err error) { + cf, err := os.Open(cPath) + if err != nil { + if os.IsNotExist(err) { + return nil, fmt.Errorf("JSON specification file %s not found", cPath) + } + return nil, err + } + defer cf.Close() + + if err = json.NewDecoder(cf).Decode(&spec); err != nil { + return nil, err + } + return spec, validateProcessSpec(spec.Process) +} + +func createLibContainerRlimit(rlimit specs.POSIXRlimit) (configs.Rlimit, error) { + rl, err := strToRlimit(rlimit.Type) + if err != nil { + return configs.Rlimit{}, err + } + return configs.Rlimit{ + Type: rl, + Hard: rlimit.Hard, + Soft: rlimit.Soft, + }, nil +} diff --git a/start.go b/start.go new file mode 100644 index 0000000..2bb698b --- /dev/null +++ b/start.go @@ -0,0 +1,43 @@ +package main + +import ( + "errors" + "fmt" + + "github.com/opencontainers/runc/libcontainer" + "github.com/urfave/cli" +) + +var startCommand = cli.Command{ + Name: "start", + Usage: "executes the user defined process in a created container", + ArgsUsage: ` + +Where "" is your name for the instance of the container that you +are starting. The name you provide for the container instance must be unique on +your host.`, + Description: `The start command executes the user defined process in a created container.`, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, exactArgs); err != nil { + return err + } + container, err := getContainer(context) + if err != nil { + return err + } + status, err := container.Status() + if err != nil { + return err + } + switch status { + case libcontainer.Created: + return container.Exec() + case libcontainer.Stopped: + return errors.New("cannot start a container that has stopped") + case libcontainer.Running: + return errors.New("cannot start an already running container") + default: + return fmt.Errorf("cannot start a container in the %s state\n", status) + } + }, +} diff --git a/state.go b/state.go new file mode 100644 index 0000000..718813c --- /dev/null +++ b/state.go @@ -0,0 +1,60 @@ +// +build linux + +package main + +import ( + "encoding/json" + "os" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/urfave/cli" +) + +var stateCommand = cli.Command{ + Name: "state", + Usage: "output the state of a container", + ArgsUsage: ` + +Where "" is your name for the instance of the container.`, + Description: `The state command outputs current state information for the +instance of a container.`, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, exactArgs); err != nil { + return err + } + container, err := getContainer(context) + if err != nil { + return err + } + containerStatus, err := container.Status() + if err != nil { + return err + } + state, err := container.State() + if err != nil { + return err + } + pid := state.BaseState.InitProcessPid + if containerStatus == libcontainer.Stopped { + pid = 0 + } + bundle, annotations := utils.Annotations(state.Config.Labels) + cs := containerState{ + Version: state.BaseState.Config.Version, + ID: state.BaseState.ID, + InitProcessPid: pid, + Status: containerStatus.String(), + Bundle: bundle, + Rootfs: state.BaseState.Config.Rootfs, + Created: state.BaseState.Created, + Annotations: annotations, + } + data, err := json.MarshalIndent(cs, "", " ") + if err != nil { + return err + } + os.Stdout.Write(data) + return nil + }, +} diff --git a/tests/integration/README.md b/tests/integration/README.md new file mode 100644 index 0000000..8ee6ebf --- /dev/null +++ b/tests/integration/README.md @@ -0,0 +1,83 @@ +# runc Integration Tests + +Integration tests provide end-to-end testing of runc. + +Note that integration tests do **not** replace unit tests. + +As a rule of thumb, code should be tested thoroughly with unit tests. +Integration tests on the other hand are meant to test a specific feature end +to end. + +Integration tests are written in *bash* using the +[bats](https://github.com/sstephenson/bats) framework. + +## Running integration tests + +The easiest way to run integration tests is with Docker: +``` +$ make integration +``` +Alternatively, you can run integration tests directly on your host through make: +``` +$ sudo make localintegration +``` +Or you can just run them directly using bats +``` +$ sudo bats tests/integration +``` +To run a single test bucket: +``` +$ make integration TESTPATH="/checkpoint.bats" +``` + + +To run them on your host, you will need to setup a development environment plus +[bats](https://github.com/sstephenson/bats#installing-bats-from-source) +For example: +``` +$ cd ~/go/src/github.com +$ git clone https://github.com/sstephenson/bats.git +$ cd bats +$ ./install.sh /usr/local +``` + +> **Note**: There are known issues running the integration tests using +> **devicemapper** as a storage driver, make sure that your docker daemon +> is using **aufs** if you want to successfully run the integration tests. + +## Writing integration tests + +[helper functions] +(https://github.com/opencontainers/runc/blob/master/test/integration/helpers.bash) +are provided in order to facilitate writing tests. + +```sh +#!/usr/bin/env bats + +# This will load the helpers. +load helpers + +# setup is called at the beginning of every test. +function setup() { + # see functions teardown_hello and setup_hello in helpers.bash, used to + # create a pristine environment for running your tests + teardown_hello + setup_hello +} + +# teardown is called at the end of every test. +function teardown() { + teardown_hello +} + +@test "this is a simple test" { + runc run containerid + # "The runc macro" automatically populates $status, $output and $lines. + # Please refer to bats documentation to find out more. + [ "$status" -eq 0 ] + + # check expected output + [[ "${output}" == *"Hello"* ]] +} + +``` diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats new file mode 100644 index 0000000..17812ab --- /dev/null +++ b/tests/integration/cgroups.bats @@ -0,0 +1,127 @@ +#!/usr/bin/env bats + +load helpers + +function teardown() { + rm -f $BATS_TMPDIR/runc-cgroups-integration-test.json + teardown_running_container test_cgroups_kmem + teardown_running_container test_cgroups_permissions + teardown_busybox +} + +function setup() { + teardown + setup_busybox +} + +function check_cgroup_value() { + cgroup=$1 + source=$2 + expected=$3 + + current=$(cat $cgroup/$source) + echo $cgroup/$source + echo "current" $current "!?" "$expected" + [ "$current" -eq "$expected" ] +} + +@test "runc update --kernel-memory (initialized)" { + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup + requires cgroups_kmem + + set_cgroups_path "$BUSYBOX_BUNDLE" + + # Set some initial known values + DATA=$(cat <<-EOF + "memory": { + "kernel": 16777216 + }, +EOF + ) + DATA=$(echo ${DATA} | sed 's/\n/\\n/g') + sed -i "s/\(\"resources\": {\)/\1\n${DATA}/" ${BUSYBOX_BUNDLE}/config.json + + # run a detached busybox to work with + runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_kmem + [ "$status" -eq 0 ] + + # update kernel memory limit + runc update test_cgroups_kmem --kernel-memory 50331648 + [ "$status" -eq 0 ] + + # check the value + check_cgroup_value $CGROUP_MEMORY "memory.kmem.limit_in_bytes" 50331648 +} + +@test "runc update --kernel-memory (uninitialized)" { + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup + requires cgroups_kmem + + set_cgroups_path "$BUSYBOX_BUNDLE" + + # run a detached busybox to work with + runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_kmem + [ "$status" -eq 0 ] + + # update kernel memory limit + runc update test_cgroups_kmem --kernel-memory 50331648 + # Since kernel 4.6, we can update kernel memory without initialization + # because it's accounted by default. + if [ "$KERNEL_MAJOR" -lt 4 ] || [ "$KERNEL_MAJOR" -eq 4 -a "$KERNEL_MINOR" -le 5 ]; then + [ ! "$status" -eq 0 ] + else + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_MEMORY "memory.kmem.limit_in_bytes" 50331648 + fi +} + +@test "runc create (no limits + no cgrouppath + no permission) succeeds" { + runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions + [ "$status" -eq 0 ] +} + +@test "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error" { + requires rootless + requires rootless_no_cgroup + + set_cgroups_path "$BUSYBOX_BUNDLE" + + runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions + [ "$status" -eq 1 ] + [[ ${lines[1]} == *"permission denied"* ]] +} + +@test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" { + requires rootless + requires rootless_no_cgroup + + set_resources_limit "$BUSYBOX_BUNDLE" + + runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions + [ "$status" -eq 1 ] + [[ ${lines[1]} == *"cannot set pids limit: container could not join or create cgroup"* ]] +} + +@test "runc create (limits + cgrouppath + permission on the cgroup dir) succeeds" { + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup + + set_cgroups_path "$BUSYBOX_BUNDLE" + set_resources_limit "$BUSYBOX_BUNDLE" + + runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions + [ "$status" -eq 0 ] +} + +@test "runc exec (limits + cgrouppath + permission on the cgroup dir) succeeds" { + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup + + set_cgroups_path "$BUSYBOX_BUNDLE" + set_resources_limit "$BUSYBOX_BUNDLE" + + runc run -d --console-socket $CONSOLE_SOCKET test_cgroups_permissions + [ "$status" -eq 0 ] + + runc exec test_cgroups_permissions echo "cgroups_exec" + [ "$status" -eq 0 ] + [[ ${lines[0]} == *"cgroups_exec"* ]] +} diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats new file mode 100644 index 0000000..87696df --- /dev/null +++ b/tests/integration/checkpoint.bats @@ -0,0 +1,348 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + if [[ -n "${RUNC_USE_SYSTEMD}" ]] ; then + skip "CRIU test suite is skipped on systemd cgroup driver for now." + fi + + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "checkpoint and restore" { + # XXX: currently criu require root containers. + requires criu root + + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox running + + for i in `seq 2`; do + # checkpoint the running container + runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox + ret=$? + # if you are having problems getting criu to work uncomment the following dump: + #cat /run/opencontainer/containers/test_busybox/criu.work/dump.log + cat ./work-dir/dump.log | grep -B 5 Error || true + [ "$ret" -eq 0 ] + + # after checkpoint busybox is no longer running + runc state test_busybox + [ "$status" -ne 0 ] + + # restore from checkpoint + runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket $CONSOLE_SOCKET test_busybox + ret=$? + cat ./work-dir/restore.log | grep -B 5 Error || true + [ "$ret" -eq 0 ] + + # busybox should be back up and running + testcontainer test_busybox running + done +} + +@test "checkpoint --pre-dump and restore" { + # XXX: currently criu require root containers. + requires criu root + + # The changes to 'terminal' are needed for running in detached mode + sed -i 's;"terminal": true;"terminal": false;' config.json + sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json + + # The following code creates pipes for stdin and stdout. + # CRIU can't handle fifo-s, so we need all these tricks. + fifo=`mktemp -u /tmp/runc-fifo-XXXXXX` + mkfifo $fifo + + # stdout + cat $fifo | cat $fifo & + pid=$! + exec 50/proc/$pid/fd/0 + + # stdin + cat $fifo | cat $fifo & + pid=$! + exec 60/proc/$pid/fd/0 + + echo -n > $fifo + unlink $fifo + + # run busybox + __runc run -d test_busybox <&60 >&51 2>&51 + [ $? -eq 0 ] + + testcontainer test_busybox running + + #test checkpoint pre-dump + mkdir parent-dir + runc --criu "$CRIU" checkpoint --pre-dump --image-path ./parent-dir test_busybox + [ "$status" -eq 0 ] + + # busybox should still be running + runc state test_busybox + [ "$status" -eq 0 ] + [[ "${output}" == *"running"* ]] + + # checkpoint the running container + mkdir image-dir + mkdir work-dir + runc --criu "$CRIU" checkpoint --parent-path ./parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox + cat ./work-dir/dump.log | grep -B 5 Error || true + [ "$status" -eq 0 ] + + # after checkpoint busybox is no longer running + runc state test_busybox + [ "$status" -ne 0 ] + + # restore from checkpoint + __runc --criu "$CRIU" restore -d --work-path ./work-dir --image-path ./image-dir test_busybox <&60 >&51 2>&51 + ret=$? + cat ./work-dir/restore.log | grep -B 5 Error || true + [ $ret -eq 0 ] + + # busybox should be back up and running + testcontainer test_busybox running + + runc exec --cwd /bin test_busybox echo ok + [ "$status" -eq 0 ] + [[ ${output} == "ok" ]] + + echo Ping >&61 + exec 61>&- + exec 51>&- + run cat <&50 + [ "$status" -eq 0 ] + [[ "${output}" == *"ponG Ping"* ]] +} + +@test "checkpoint --lazy-pages and restore" { + # XXX: currently criu require root containers. + requires criu root + + # check if lazy-pages is supported + run ${CRIU} check --feature uffd-noncoop + if [ "$status" -eq 1 ]; then + # this criu does not support lazy migration; skip the test + skip "this criu does not support lazy migration" + fi + + # The changes to 'terminal' are needed for running in detached mode + sed -i 's;"terminal": true;"terminal": false;' config.json + # This should not be necessary: https://github.com/checkpoint-restore/criu/issues/575 + sed -i 's;"readonly": true;"readonly": false;' config.json + sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json + + # The following code creates pipes for stdin and stdout. + # CRIU can't handle fifo-s, so we need all these tricks. + fifo=`mktemp -u /tmp/runc-fifo-XXXXXX` + mkfifo $fifo + + # For lazy migration we need to know when CRIU is ready to serve + # the memory pages via TCP. + lazy_pipe=`mktemp -u /tmp/lazy-pipe-XXXXXX` + mkfifo $lazy_pipe + + # TCP port for lazy migration + port=27277 + + # stdout + cat $fifo | cat $fifo & + pid=$! + exec 50/proc/$pid/fd/0 + + # stdin + cat $fifo | cat $fifo & + pid=$! + exec 60/proc/$pid/fd/0 + + echo -n > $fifo + unlink $fifo + + # run busybox + __runc run -d test_busybox <&60 >&51 2>&51 + [ $? -eq 0 ] + + testcontainer test_busybox running + + # checkpoint the running container + mkdir image-dir + mkdir work-dir + # Double fork taken from helpers.bats + # We need to start 'runc checkpoint --lazy-pages' in the background, + # so we double fork in the shell. + (runc --criu "$CRIU" checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd ${lazy_pipe} --work-path ./work-dir --image-path ./image-dir test_busybox & ) & + # Sleeping here. This is ugly, but not sure how else to handle it. + # The return code of the in the background running runc is needed, if + # there is some basic error. If the lazy migration is ready can + # be handled by $lazy_pipe. Which probably will always be ready + # after sleeping two seconds. + sleep 2 + # Check if inventory.img was written + [ -e image-dir/inventory.img ] + # If the inventory.img exists criu checkpointed some things, let's see + # if there were other errors in the log file. + run grep -B 5 Error ./work-dir/dump.log -q + [ "$status" -eq 1 ] + + # This will block until CRIU is ready to serve memory pages + cat $lazy_pipe + [ "$status" -eq 1 ] + + unlink $lazy_pipe + + # Double fork taken from helpers.bats + # We need to start 'criu lazy-pages' in the background, + # so we double fork in the shell. + # Start CRIU in lazy-daemon mode + $(${CRIU} lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir &) & + + # Restore lazily from checkpoint. + # The restored container needs a different name as the checkpointed + # container is not yet destroyed. It is only destroyed at that point + # in time when the last page is lazily transferred to the destination. + # Killing the CRIU on the checkpoint side will let the container + # continue to run if the migration failed at some point. + __runc --criu "$CRIU" restore -d --work-path ./image-dir --image-path ./image-dir --lazy-pages test_busybox_restore <&60 >&51 2>&51 + ret=$? + [ $ret -eq 0 ] + run grep -B 5 Error ./work-dir/dump.log -q + [ "$status" -eq 1 ] + + # busybox should be back up and running + testcontainer test_busybox_restore running + + runc exec --cwd /bin test_busybox_restore echo ok + [ "$status" -eq 0 ] + [[ ${output} == "ok" ]] + + echo Ping >&61 + exec 61>&- + exec 51>&- + run cat <&50 + [ "$status" -eq 0 ] + [[ "${output}" == *"ponG Ping"* ]] +} + +@test "checkpoint and restore in external network namespace" { + # XXX: currently criu require root containers. + requires criu root + + # check if external_net_ns is supported; only with criu 3.10++ + run ${CRIU} check --feature external_net_ns + if [ "$status" -eq 1 ]; then + # this criu does not support external_net_ns; skip the test + skip "this criu does not support external network namespaces" + fi + + # create a temporary name for the test network namespace + tmp=`mktemp` + rm -f $tmp + ns_name=`basename $tmp` + # create network namespace + ip netns add $ns_name + ns_path=`ip netns add $ns_name 2>&1 | sed -e 's/.*"\(.*\)".*/\1/'` + + ns_inode=`ls -iL $ns_path | awk '{ print $1 }'` + + # tell runc which network namespace to use + sed -i "s;\"type\": \"network\";\"type\": \"network\",\"path\": \"$ns_path\";" config.json + + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox running + + for i in `seq 2`; do + # checkpoint the running container; this automatically tells CRIU to + # handle the network namespace defined in config.json as an external + runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox + ret=$? + # if you are having problems getting criu to work uncomment the following dump: + #cat /run/opencontainer/containers/test_busybox/criu.work/dump.log + cat ./work-dir/dump.log | grep -B 5 Error || true + [ "$ret" -eq 0 ] + + # after checkpoint busybox is no longer running + runc state test_busybox + [ "$status" -ne 0 ] + + # restore from checkpoint; this should restore the container into the existing network namespace + runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket $CONSOLE_SOCKET test_busybox + ret=$? + cat ./work-dir/restore.log | grep -B 5 Error || true + [ "$ret" -eq 0 ] + + # busybox should be back up and running + testcontainer test_busybox running + + # container should be running in same network namespace as before + pid=`__runc state test_busybox | jq '.pid'` + ns_inode_new=`readlink /proc/$pid/ns/net | sed -e 's/.*\[\(.*\)\]/\1/'` + echo "old network namespace inode $ns_inode" + echo "new network namespace inode $ns_inode_new" + [ "$ns_inode" -eq "$ns_inode_new" ] + done + ip netns del $ns_name +} + +@test "checkpoint and restore with container specific CRIU config" { + # XXX: currently criu require root containers. + requires criu root + + tmp=`mktemp /tmp/runc-criu-XXXXXX.conf` + # This is the file we write to /etc/criu/default.conf + tmplog1=`mktemp /tmp/runc-criu-log-XXXXXX.log` + unlink $tmplog1 + tmplog1=`basename $tmplog1` + # That is the actual configuration file to be used + tmplog2=`mktemp /tmp/runc-criu-log-XXXXXX.log` + unlink $tmplog2 + tmplog2=`basename $tmplog2` + # This adds the annotation 'org.criu.config' to set a container + # specific CRIU config file. + sed -i "s;\"process\";\"annotations\":{\"org.criu.config\": \"$tmp\"},\"process\";" config.json + # Tell CRIU to use another configuration file + mkdir -p /etc/criu + echo "log-file=$tmplog1" > /etc/criu/default.conf + # Make sure the RPC defined configuration file overwrites the previous + echo "log-file=$tmplog2" > $tmp + + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox running + + # checkpoint the running container + runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox + [ "$status" -eq 0 ] + ! test -f ./work-dir/$tmplog1 + test -f ./work-dir/$tmplog2 + + # after checkpoint busybox is no longer running + runc state test_busybox + [ "$status" -ne 0 ] + + test -f ./work-dir/$tmplog2 && unlink ./work-dir/$tmplog2 + # restore from checkpoint + runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + ! test -f ./work-dir/$tmplog1 + test -f ./work-dir/$tmplog2 + + # busybox should be back up and running + testcontainer test_busybox running + unlink $tmp + test -f ./work-dir/$tmplog2 && unlink ./work-dir/$tmplog2 +} + diff --git a/tests/integration/config.json b/tests/integration/config.json new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/create.bats b/tests/integration/create.bats new file mode 100644 index 0000000..abd4da2 --- /dev/null +++ b/tests/integration/create.bats @@ -0,0 +1,89 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "runc create" { + runc create --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox created + + # start the command + runc start test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox running +} + +@test "runc create exec" { + runc create --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox created + + runc exec test_busybox true + [ "$status" -eq 0 ] + + testcontainer test_busybox created + + # start the command + runc start test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox running +} + +@test "runc create --pid-file" { + runc create --pid-file pid.txt --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox created + + # check pid.txt was generated + [ -e pid.txt ] + + run cat pid.txt + [ "$status" -eq 0 ] + [[ ${lines[0]} == $(__runc state test_busybox | jq '.pid') ]] + + # start the command + runc start test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox running +} + +@test "runc create --pid-file with new CWD" { + # create pid_file directory as the CWD + run mkdir pid_file + [ "$status" -eq 0 ] + run cd pid_file + [ "$status" -eq 0 ] + + runc create --pid-file pid.txt -b $BUSYBOX_BUNDLE --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox created + + # check pid.txt was generated + [ -e pid.txt ] + + run cat pid.txt + [ "$status" -eq 0 ] + [[ ${lines[0]} == $(__runc state test_busybox | jq '.pid') ]] + + # start the command + runc start test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox running +} diff --git a/tests/integration/debug.bats b/tests/integration/debug.bats new file mode 100644 index 0000000..e02cf4a --- /dev/null +++ b/tests/integration/debug.bats @@ -0,0 +1,81 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_hello + setup_hello +} + +function teardown() { + teardown_hello +} + +@test "global --debug" { + # run hello-world + runc --debug run test_hello + echo "${output}" + [ "$status" -eq 0 ] + + # check expected debug output was sent to stderr + [[ "${output}" == *"level=debug"* ]] + [[ "${output}" == *"nsexec started"* ]] + [[ "${output}" == *"child process in init()"* ]] +} + +@test "global --debug to --log" { + # run hello-world + runc --log log.out --debug run test_hello + [ "$status" -eq 0 ] + + # check output does not include debug info + [[ "${output}" != *"level=debug"* ]] + + # check log.out was generated + [ -e log.out ] + + # check expected debug output was sent to log.out + run cat log.out + [ "$status" -eq 0 ] + [[ "${output}" == *"level=debug"* ]] + [[ "${output}" == *"nsexec started"* ]] + [[ "${output}" == *"child process in init()"* ]] +} + +@test "global --debug to --log --log-format 'text'" { + # run hello-world + runc --log log.out --log-format "text" --debug run test_hello + [ "$status" -eq 0 ] + + # check output does not include debug info + [[ "${output}" != *"level=debug"* ]] + + # check log.out was generated + [ -e log.out ] + + # check expected debug output was sent to log.out + run cat log.out + [ "$status" -eq 0 ] + [[ "${output}" == *"level=debug"* ]] + [[ "${output}" == *"nsexec started"* ]] + [[ "${output}" == *"child process in init()"* ]] +} + +@test "global --debug to --log --log-format 'json'" { + # run hello-world + runc --log log.out --log-format "json" --debug run test_hello + [ "$status" -eq 0 ] + + # check output does not include debug info + [[ "${output}" != *"level=debug"* ]] + + # check log.out was generated + [ -e log.out ] + + # check expected debug output was sent to log.out + run cat log.out + [ "$status" -eq 0 ] + [[ "${output}" == *'"level":"debug"'* ]] + [[ "${output}" == *"nsexec started"* ]] + [[ "${output}" == *"child process in init()"* ]] +} diff --git a/tests/integration/delete.bats b/tests/integration/delete.bats new file mode 100644 index 0000000..c5ed215 --- /dev/null +++ b/tests/integration/delete.bats @@ -0,0 +1,53 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "runc delete" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running + + runc kill test_busybox KILL + [ "$status" -eq 0 ] + # wait for busybox to be in the destroyed state + retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" + + # delete test_busybox + runc delete test_busybox + [ "$status" -eq 0 ] + + runc state test_busybox + [ "$status" -ne 0 ] +} + +@test "runc delete --force" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running + + # force delete test_busybox + runc delete --force test_busybox + + runc state test_busybox + [ "$status" -ne 0 ] +} + +@test "runc delete --force ignore not exist" { + runc delete --force notexists + [ "$status" -eq 0 ] +} diff --git a/tests/integration/events.bats b/tests/integration/events.bats new file mode 100644 index 0000000..b3e6315 --- /dev/null +++ b/tests/integration/events.bats @@ -0,0 +1,109 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "events --stats" { + # XXX: currently cgroups require root containers. + requires root + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # generate stats + runc events --stats test_busybox + [ "$status" -eq 0 ] + [[ "${lines[0]}" == [\{]"\"type\""[:]"\"stats\""[,]"\"id\""[:]"\"test_busybox\""[,]* ]] + [[ "${lines[0]}" == *"data"* ]] +} + +@test "events --interval default " { + # XXX: currently cgroups require root containers. + requires root + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # spawn two sub processes (shells) + # the first sub process is an event logger that sends stats events to events.log + # the second sub process waits for an event that includes test_busybox then + # kills the test_busybox container which causes the event logger to exit + (__runc events test_busybox > events.log) & + ( + retry 10 1 eval "grep -q 'test_busybox' events.log" + teardown_running_container test_busybox + ) & + wait # wait for the above sub shells to finish + + [ -e events.log ] + + run cat events.log + [ "$status" -eq 0 ] + [[ "${lines[0]}" == [\{]"\"type\""[:]"\"stats\""[,]"\"id\""[:]"\"test_busybox\""[,]* ]] + [[ "${lines[0]}" == *"data"* ]] +} + +@test "events --interval 1s " { + # XXX: currently cgroups require root containers. + requires root + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # spawn two sub processes (shells) + # the first sub process is an event logger that sends stats events to events.log once a second + # the second sub process tries 3 times for an event that incudes test_busybox + # pausing 1s between each attempt then kills the test_busybox container which + # causes the event logger to exit + (__runc events --interval 1s test_busybox > events.log) & + ( + retry 3 1 eval "grep -q 'test_busybox' events.log" + teardown_running_container test_busybox + ) & + wait # wait for the above sub shells to finish + + [ -e events.log ] + + run eval "grep -q 'test_busybox' events.log" + [ "$status" -eq 0 ] +} + +@test "events --interval 100ms " { + # XXX: currently cgroups require root containers. + requires root + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + #prove there is no carry over of events.log from a prior test + [ ! -e events.log ] + + # spawn two sub processes (shells) + # the first sub process is an event logger that sends stats events to events.log once every 100ms + # the second sub process tries 3 times for an event that incudes test_busybox + # pausing 100s between each attempt then kills the test_busybox container which + # causes the event logger to exit + (__runc events --interval 100ms test_busybox > events.log) & + ( + retry 3 0.100 eval "grep -q 'test_busybox' events.log" + teardown_running_container test_busybox + ) & + wait # wait for the above sub shells to finish + + [ -e events.log ] + + run eval "grep -q 'test_busybox' events.log" + [ "$status" -eq 0 ] +} diff --git a/tests/integration/exec.bats b/tests/integration/exec.bats new file mode 100644 index 0000000..19647c1 --- /dev/null +++ b/tests/integration/exec.bats @@ -0,0 +1,140 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "runc exec" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + runc exec test_busybox echo Hello from exec + [ "$status" -eq 0 ] + echo text echoed = "'""${output}""'" + [[ "${output}" == *"Hello from exec"* ]] +} + +@test "runc exec --pid-file" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + runc exec --pid-file pid.txt test_busybox echo Hello from exec + [ "$status" -eq 0 ] + echo text echoed = "'""${output}""'" + [[ "${output}" == *"Hello from exec"* ]] + + # check pid.txt was generated + [ -e pid.txt ] + + run cat pid.txt + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ [0-9]+ ]] + [[ ${lines[0]} != $(__runc state test_busybox | jq '.pid') ]] +} + +@test "runc exec --pid-file with new CWD" { + # create pid_file directory as the CWD + run mkdir pid_file + [ "$status" -eq 0 ] + run cd pid_file + [ "$status" -eq 0 ] + + # run busybox detached + runc run -d -b $BUSYBOX_BUNDLE --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + runc exec --pid-file pid.txt test_busybox echo Hello from exec + [ "$status" -eq 0 ] + echo text echoed = "'""${output}""'" + [[ "${output}" == *"Hello from exec"* ]] + + # check pid.txt was generated + [ -e pid.txt ] + + run cat pid.txt + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ [0-9]+ ]] + [[ ${lines[0]} != $(__runc state test_busybox | jq '.pid') ]] +} + +@test "runc exec ls -la" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + runc exec test_busybox ls -la + [ "$status" -eq 0 ] + [[ ${lines[0]} == *"total"* ]] + [[ ${lines[1]} == *"."* ]] + [[ ${lines[2]} == *".."* ]] +} + +@test "runc exec ls -la with --cwd" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + runc exec --cwd /bin test_busybox pwd + [ "$status" -eq 0 ] + [[ ${output} == "/bin"* ]] +} + +@test "runc exec --env" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + runc exec --env RUNC_EXEC_TEST=true test_busybox env + [ "$status" -eq 0 ] + + [[ ${output} == *"RUNC_EXEC_TEST=true"* ]] +} + +@test "runc exec --user" { + # --user can't work in rootless containers that don't have idmap. + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + runc exec --user 1000:1000 test_busybox id + [ "$status" -eq 0 ] + + [[ "${output}" == "uid=1000 gid=1000"* ]] +} + +@test "runc exec --additional-gids" { + requires root + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + wait_for_container 15 1 test_busybox + + runc exec --user 1000:1000 --additional-gids 100 --additional-gids 65534 test_busybox id + [ "$status" -eq 0 ] + + [[ ${output} == "uid=1000 gid=1000 groups=100(users),65534(nogroup)" ]] +} + +@test "runc exec --preserve-fds" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + run bash -c "cat hello > preserve-fds.test; exec 3/ { print $5; exit }') +CGROUP_CPU_BASE_PATH=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~ /\/ { print $5; exit }') +if [[ -n "${RUNC_USE_SYSTEMD}" ]] ; then + CGROUPS_PATH="/machine.slice/runc-cgroups-integration-test.scope" +else + CGROUPS_PATH="/runc-cgroups-integration-test/test-cgroup" +fi +CGROUP_MEMORY="${CGROUP_MEMORY_BASE_PATH}${CGROUPS_PATH}" + +# CONFIG_MEMCG_KMEM support +KMEM="${CGROUP_MEMORY_BASE_PATH}/memory.kmem.limit_in_bytes" +RT_PERIOD="${CGROUP_CPU_BASE_PATH}/cpu.rt_period_us" + +# Check if we're in rootless mode. +ROOTLESS=$(id -u) + +# Wrapper for runc. +function runc() { + run __runc "$@" + + # Some debug information to make life easier. bats will only print it if the + # test failed, in which case the output is useful. + echo "runc $@ (status=$status):" >&2 + echo "$output" >&2 +} + +# Raw wrapper for runc. +function __runc() { + "$RUNC" ${RUNC_USE_SYSTEMD+--systemd-cgroup} --root "$ROOT" "$@" +} + +# Wrapper for runc spec, which takes only one argument (the bundle path). +function runc_spec() { + ! [[ "$#" > 1 ]] + + local args=() + local bundle="" + + if [ "$ROOTLESS" -ne 0 ]; then + args+=("--rootless") + fi + if [ "$#" -ne 0 ]; then + bundle="$1" + args+=("--bundle" "$bundle") + fi + + runc spec "${args[@]}" + + # Always add additional mappings if we have idmaps. + if [[ "$ROOTLESS" -ne 0 ]] && [[ "$ROOTLESS_FEATURES" == *"idmap"* ]]; then + runc_rootless_idmap "$bundle" + fi + + # Ensure config.json contains linux.resources + if [[ "$ROOTLESS" -ne 0 ]] && [[ "$ROOTLESS_FEATURES" == *"cgroup"* ]]; then + runc_rootless_cgroup "$bundle" + fi +} + +# Shortcut to add additional uids and gids, based on the values set as part of +# a rootless configuration. +function runc_rootless_idmap() { + bundle="${1:-.}" + cat "$bundle/config.json" \ + | jq '.mounts |= map((select(.type == "devpts") | .options += ["gid=5"]) // .)' \ + | jq '.linux.uidMappings |= .+ [{"hostID": '"$ROOTLESS_UIDMAP_START"', "containerID": 1000, "size": '"$ROOTLESS_UIDMAP_LENGTH"'}]' \ + | jq '.linux.gidMappings |= .+ [{"hostID": '"$ROOTLESS_GIDMAP_START"', "containerID": 100, "size": 1}]' \ + | jq '.linux.gidMappings |= .+ [{"hostID": '"$(($ROOTLESS_GIDMAP_START+10))"', "containerID": 1, "size": 20}]' \ + | jq '.linux.gidMappings |= .+ [{"hostID": '"$(($ROOTLESS_GIDMAP_START+100))"', "containerID": 1000, "size": '"$(($ROOTLESS_GIDMAP_LENGTH-1000))"'}]' \ + >"$bundle/config.json.tmp" + mv "$bundle/config.json"{.tmp,} +} + +# Shortcut to add empty resources as part of a rootless configuration. +function runc_rootless_cgroup() { + bundle="${1:-.}" + cat "$bundle/config.json" \ + | jq '.linux.resources |= .+ {"memory":{},"cpu":{},"blockio":{},"pids":{}}' \ + >"$bundle/config.json.tmp" + mv "$bundle/config.json"{.tmp,} +} + +# Helper function to set cgroupsPath to the value of $CGROUPS_PATH +function set_cgroups_path() { + bundle="${1:-.}" + cgroups_path="/runc-cgroups-integration-test/test-cgroup" + if [[ -n "${RUNC_USE_SYSTEMD}" ]] ; then + cgroups_path="machine.slice:runc-cgroups:integration-test" + fi + sed -i 's#\("linux": {\)#\1\n "cgroupsPath": "'"${cgroups_path}"'",#' "$bundle/config.json" +} + +# Helper function to set a resources limit +function set_resources_limit() { + bundle="${1:-.}" + sed -i 's/\("linux": {\)/\1\n "resources": { "pids": { "limit": 100 } },/' "$bundle/config.json" +} + +# Fails the current test, providing the error given. +function fail() { + echo "$@" >&2 + exit 1 +} + +# Allows a test to specify what things it requires. If the environment can't +# support it, the test is skipped with a message. +function requires() { + for var in "$@"; do + case $var in + criu) + if [ ! -e "$CRIU" ]; then + skip "test requires ${var}" + fi + ;; + root) + if [ "$ROOTLESS" -ne 0 ]; then + skip "test requires ${var}" + fi + ;; + rootless) + if [ "$ROOTLESS" -eq 0 ]; then + skip "test requires ${var}" + fi + ;; + rootless_idmap) + if [[ "$ROOTLESS_FEATURES" != *"idmap"* ]]; then + skip "test requires ${var}" + fi + ;; + rootless_cgroup) + if [[ "$ROOTLESS_FEATURES" != *"cgroup"* ]]; then + skip "test requires ${var}" + fi + ;; + rootless_no_cgroup) + if [[ "$ROOTLESS_FEATURES" == *"cgroup"* ]]; then + skip "test requires ${var}" + fi + ;; + cgroups_kmem) + if [ ! -e "$KMEM" ]; then + skip "Test requires ${var}" + fi + ;; + cgroups_rt) + if [ ! -e "$RT_PERIOD" ]; then + skip "Test requires ${var}" + fi + ;; + *) + fail "BUG: Invalid requires ${var}." + ;; + esac + done +} + +# Retry a command $1 times until it succeeds. Wait $2 seconds between retries. +function retry() { + local attempts=$1 + shift + local delay=$1 + shift + local i + + for ((i = 0; i < attempts; i++)); do + run "$@" + if [[ "$status" -eq 0 ]]; then + return 0 + fi + sleep $delay + done + + echo "Command \"$@\" failed $attempts times. Output: $output" + false +} + +# retry until the given container has state +function wait_for_container() { + local attempts=$1 + local delay=$2 + local cid=$3 + # optionally wait for a specific status + local wait_for_status="${4:-}" + local i + + for ((i = 0; i < attempts; i++)); do + runc state $cid + if [[ "$status" -eq 0 ]]; then + if [[ "${output}" == *"${wait_for_status}"* ]]; then + return 0 + fi + fi + sleep $delay + done + + echo "runc state failed to return state $statecheck $attempts times. Output: $output" + false +} + +# retry until the given container has state +function wait_for_container_inroot() { + local attempts=$1 + local delay=$2 + local cid=$3 + # optionally wait for a specific status + local wait_for_status="${4:-}" + local i + + for ((i = 0; i < attempts; i++)); do + ROOT=$4 runc state $cid + if [[ "$status" -eq 0 ]]; then + if [[ "${output}" == *"${wait_for_status}"* ]]; then + return 0 + fi + fi + sleep $delay + done + + echo "runc state failed to return state $statecheck $attempts times. Output: $output" + false +} + +function testcontainer() { + # test state of container + runc state $1 + [ "$status" -eq 0 ] + [[ "${output}" == *"$2"* ]] +} + +function setup_recvtty() { + # We need to start recvtty in the background, so we double fork in the shell. + ("$RECVTTY" --pid-file "$BATS_TMPDIR/recvtty.pid" --mode null "$CONSOLE_SOCKET" &) & +} + +function teardown_recvtty() { + # When we kill recvtty, the container will also be killed. + if [ -f "$BATS_TMPDIR/recvtty.pid" ]; then + kill -9 $(cat "$BATS_TMPDIR/recvtty.pid") + fi + + # Clean up the files that might be left over. + rm -f "$BATS_TMPDIR/recvtty.pid" + rm -f "$CONSOLE_SOCKET" +} + +function setup_busybox() { + setup_recvtty + run mkdir "$BUSYBOX_BUNDLE" + run mkdir "$BUSYBOX_BUNDLE"/rootfs + if [ -e "/testdata/busybox.tar" ]; then + BUSYBOX_IMAGE="/testdata/busybox.tar" + fi + if [ ! -e $BUSYBOX_IMAGE ]; then + curl -o $BUSYBOX_IMAGE -sSL `get_busybox` + fi + tar --exclude './dev/*' -C "$BUSYBOX_BUNDLE"/rootfs -xf "$BUSYBOX_IMAGE" + cd "$BUSYBOX_BUNDLE" + runc_spec +} + +function setup_hello() { + setup_recvtty + run mkdir "$HELLO_BUNDLE" + run mkdir "$HELLO_BUNDLE"/rootfs + tar --exclude './dev/*' -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE" + cd "$HELLO_BUNDLE" + runc_spec + sed -i 's;"sh";"/hello";' config.json +} + +function teardown_running_container() { + runc list + # $1 should be a container name such as "test_busybox" + # here we detect "test_busybox "(with one extra blank) to avoid conflict prefix + # e.g. "test_busybox" and "test_busybox_update" + if [[ "${output}" == *"$1 "* ]]; then + runc kill $1 KILL + retry 10 1 eval "__runc state '$1' | grep -q 'stopped'" + runc delete $1 + fi +} + +function teardown_running_container_inroot() { + ROOT=$2 runc list + # $1 should be a container name such as "test_busybox" + # here we detect "test_busybox "(with one extra blank) to avoid conflict prefix + # e.g. "test_busybox" and "test_busybox_update" + if [[ "${output}" == *"$1 "* ]]; then + ROOT=$2 runc kill $1 KILL + retry 10 1 eval "ROOT='$2' __runc state '$1' | grep -q 'stopped'" + ROOT=$2 runc delete $1 + fi +} + +function teardown_busybox() { + cd "$INTEGRATION_ROOT" + teardown_recvtty + teardown_running_container test_busybox + run rm -f -r "$BUSYBOX_BUNDLE" +} + +function teardown_hello() { + cd "$INTEGRATION_ROOT" + teardown_recvtty + teardown_running_container test_hello + run rm -f -r "$HELLO_BUNDLE" +} diff --git a/tests/integration/kill.bats b/tests/integration/kill.bats new file mode 100644 index 0000000..d9afe92 --- /dev/null +++ b/tests/integration/kill.bats @@ -0,0 +1,30 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + + +@test "kill detached busybox" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running + + runc kill test_busybox KILL + [ "$status" -eq 0 ] + + retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" + + runc delete test_busybox + [ "$status" -eq 0 ] +} diff --git a/tests/integration/list.bats b/tests/integration/list.bats new file mode 100644 index 0000000..0a938c0 --- /dev/null +++ b/tests/integration/list.bats @@ -0,0 +1,56 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_running_container_inroot test_box1 $HELLO_BUNDLE + teardown_running_container_inroot test_box2 $HELLO_BUNDLE + teardown_running_container_inroot test_box3 $HELLO_BUNDLE + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_running_container_inroot test_box1 $HELLO_BUNDLE + teardown_running_container_inroot test_box2 $HELLO_BUNDLE + teardown_running_container_inroot test_box3 $HELLO_BUNDLE + teardown_busybox +} + +@test "list" { + # run a few busyboxes detached + ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_box1 + [ "$status" -eq 0 ] + + ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_box2 + [ "$status" -eq 0 ] + + ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_box3 + [ "$status" -eq 0 ] + + ROOT=$HELLO_BUNDLE runc list + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ ID\ +PID\ +STATUS\ +BUNDLE\ +CREATED+ ]] + [[ "${lines[1]}" == *"test_box1"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] + [[ "${lines[2]}" == *"test_box2"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] + [[ "${lines[3]}" == *"test_box3"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] + + ROOT=$HELLO_BUNDLE runc list -q + [ "$status" -eq 0 ] + [[ "${lines[0]}" == "test_box1" ]] + [[ "${lines[1]}" == "test_box2" ]] + [[ "${lines[2]}" == "test_box3" ]] + + ROOT=$HELLO_BUNDLE runc list --format table + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ ID\ +PID\ +STATUS\ +BUNDLE\ +CREATED+ ]] + [[ "${lines[1]}" == *"test_box1"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] + [[ "${lines[2]}" == *"test_box2"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] + [[ "${lines[3]}" == *"test_box3"*[0-9]*"running"*$BUSYBOX_BUNDLE*[0-9]* ]] + + ROOT=$HELLO_BUNDLE runc list --format json + [ "$status" -eq 0 ] + [[ "${lines[0]}" == [\[][\{]"\"ociVersion\""[:]"\""*[0-9][\.]*[0-9][\.]*[0-9]*"\""[,]"\"id\""[:]"\"test_box1\""[,]"\"pid\""[:]*[0-9][,]"\"status\""[:]*"\"running\""[,]"\"bundle\""[:]*$BUSYBOX_BUNDLE*[,]"\"rootfs\""[:]"\""*"\""[,]"\"created\""[:]*[0-9]*[\}]* ]] + [[ "${lines[0]}" == *[,][\{]"\"ociVersion\""[:]"\""*[0-9][\.]*[0-9][\.]*[0-9]*"\""[,]"\"id\""[:]"\"test_box2\""[,]"\"pid\""[:]*[0-9][,]"\"status\""[:]*"\"running\""[,]"\"bundle\""[:]*$BUSYBOX_BUNDLE*[,]"\"rootfs\""[:]"\""*"\""[,]"\"created\""[:]*[0-9]*[\}]* ]] + [[ "${lines[0]}" == *[,][\{]"\"ociVersion\""[:]"\""*[0-9][\.]*[0-9][\.]*[0-9]*"\""[,]"\"id\""[:]"\"test_box3\""[,]"\"pid\""[:]*[0-9][,]"\"status\""[:]*"\"running\""[,]"\"bundle\""[:]*$BUSYBOX_BUNDLE*[,]"\"rootfs\""[:]"\""*"\""[,]"\"created\""[:]*[0-9]*[\}][\]] ]] +} diff --git a/tests/integration/mask.bats b/tests/integration/mask.bats new file mode 100644 index 0000000..aaa8042 --- /dev/null +++ b/tests/integration/mask.bats @@ -0,0 +1,59 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox + + # Create fake rootfs. + mkdir rootfs/testdir + echo "Forbidden information!" > rootfs/testfile + + # add extra masked paths + sed -i 's;"maskedPaths": \[;"maskedPaths": \["/testdir","/testfile",;g' config.json +} + +function teardown() { + teardown_busybox +} + +@test "mask paths [file]" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + runc exec test_busybox cat /testfile + [ "$status" -eq 0 ] + [[ "${output}" == "" ]] + + runc exec test_busybox rm -f /testfile + [ "$status" -eq 1 ] + [[ "${output}" == *"Read-only file system"* ]] + + runc exec test_busybox umount /testfile + [ "$status" -eq 1 ] + [[ "${output}" == *"Operation not permitted"* ]] +} + +@test "mask paths [directory]" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + runc exec test_busybox ls /testdir + [ "$status" -eq 0 ] + [[ "${output}" == "" ]] + + runc exec test_busybox touch /testdir/foo + [ "$status" -eq 1 ] + [[ "${output}" == *"Read-only file system"* ]] + + runc exec test_busybox rm -rf /testdir + [ "$status" -eq 1 ] + [[ "${output}" == *"Read-only file system"* ]] + + runc exec test_busybox umount /testdir + [ "$status" -eq 1 ] + [[ "${output}" == *"Operation not permitted"* ]] +} diff --git a/tests/integration/mounts.bats b/tests/integration/mounts.bats new file mode 100755 index 0000000..c35b3c5 --- /dev/null +++ b/tests/integration/mounts.bats @@ -0,0 +1,21 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "runc run [bind mount]" { + CONFIG=$(jq '.mounts |= . + [{"source": ".", "destination": "/tmp/bind", "options": ["bind"]}] | .process.args = ["ls", "/tmp/bind/config.json"]' config.json) + echo "${CONFIG}" >config.json + + runc run test_bind_mount + [ "$status" -eq 0 ] + [[ "${lines[0]}" =~ '/tmp/bind/config.json' ]] +} diff --git a/tests/integration/multi-arch.bash b/tests/integration/multi-arch.bash new file mode 100644 index 0000000..5616bf7 --- /dev/null +++ b/tests/integration/multi-arch.bash @@ -0,0 +1,22 @@ +#!/bin/bash +get_busybox(){ + case $(go env GOARCH) in + arm64) + echo 'https://github.com/docker-library/busybox/raw/dist-arm64v8/glibc/busybox.tar.xz' + ;; + *) + echo 'https://github.com/docker-library/busybox/raw/dist-amd64/glibc/busybox.tar.xz' + ;; + esac +} + +get_hello(){ + case $(go env GOARCH) in + arm64) + echo 'hello-world-aarch64.tar' + ;; + *) + echo 'hello-world.tar' + ;; + esac +} diff --git a/tests/integration/pause.bats b/tests/integration/pause.bats new file mode 100644 index 0000000..4e25e59 --- /dev/null +++ b/tests/integration/pause.bats @@ -0,0 +1,72 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "runc pause and resume" { + # XXX: currently cgroups require root containers. + requires root + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox running + + # pause busybox + runc pause test_busybox + [ "$status" -eq 0 ] + + # test state of busybox is paused + testcontainer test_busybox paused + + # resume busybox + runc resume test_busybox + [ "$status" -eq 0 ] + + # test state of busybox is back to running + testcontainer test_busybox running +} + +@test "runc pause and resume with nonexist container" { + # XXX: currently cgroups require root containers. + requires root + + # run test_busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox running + + # pause test_busybox and nonexistent container + runc pause test_busybox + [ "$status" -eq 0 ] + runc pause nonexistent + [ "$status" -ne 0 ] + + # test state of test_busybox is paused + testcontainer test_busybox paused + + # resume test_busybox and nonexistent container + runc resume test_busybox + [ "$status" -eq 0 ] + runc resume nonexistent + [ "$status" -ne 0 ] + + # test state of test_busybox is back to running + testcontainer test_busybox running + + # delete test_busybox + runc delete --force test_busybox + + runc state test_busybox + [ "$status" -ne 0 ] +} diff --git a/tests/integration/ps.bats b/tests/integration/ps.bats new file mode 100644 index 0000000..646b5ab --- /dev/null +++ b/tests/integration/ps.bats @@ -0,0 +1,62 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "ps" { + # ps is not supported, it requires cgroups + requires root + + # start busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running + + runc ps test_busybox + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ UID\ +PID\ +PPID\ +C\ +STIME\ +TTY\ +TIME\ +CMD+ ]] + [[ "${lines[1]}" == *"$(id -un 2>/dev/null)"*[0-9]* ]] +} + +@test "ps -f json" { + # ps is not supported, it requires cgroups + requires root + + # start busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running + + runc ps -f json test_busybox + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ [0-9]+ ]] +} + +@test "ps -e -x" { + # ps is not supported, it requires cgroups + requires root + + # start busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running + + runc ps test_busybox -e -x + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ \ +PID\ +TTY\ +STAT\ +TIME\ +COMMAND+ ]] + [[ "${lines[1]}" =~ [0-9]+ ]] +} diff --git a/tests/integration/root.bats b/tests/integration/root.bats new file mode 100644 index 0000000..90b53b4 --- /dev/null +++ b/tests/integration/root.bats @@ -0,0 +1,50 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_running_container_inroot test_dotbox $HELLO_BUNDLE + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_running_container_inroot test_dotbox $HELLO_BUNDLE + teardown_busybox +} + +@test "global --root" { + # run busybox detached using $HELLO_BUNDLE for state + ROOT=$HELLO_BUNDLE runc run -d --console-socket $CONSOLE_SOCKET test_dotbox + [ "$status" -eq 0 ] + + # run busybox detached in default root + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + runc state test_busybox + [ "$status" -eq 0 ] + [[ "${output}" == *"running"* ]] + + ROOT=$HELLO_BUNDLE runc state test_dotbox + [ "$status" -eq 0 ] + [[ "${output}" == *"running"* ]] + + ROOT=$HELLO_BUNDLE runc state test_busybox + [ "$status" -ne 0 ] + + runc state test_dotbox + [ "$status" -ne 0 ] + + runc kill test_busybox KILL + [ "$status" -eq 0 ] + retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" + runc delete test_busybox + [ "$status" -eq 0 ] + + ROOT=$HELLO_BUNDLE runc kill test_dotbox KILL + [ "$status" -eq 0 ] + retry 10 1 eval "ROOT='$HELLO_BUNDLE' __runc state test_dotbox | grep -q 'stopped'" + ROOT=$HELLO_BUNDLE runc delete test_dotbox + [ "$status" -eq 0 ] +} diff --git a/tests/integration/spec.bats b/tests/integration/spec.bats new file mode 100644 index 0000000..5df8f70 --- /dev/null +++ b/tests/integration/spec.bats @@ -0,0 +1,96 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + # initial cleanup in case a prior test exited and did not cleanup + cd "$INTEGRATION_ROOT" + run rm -f -r "$HELLO_BUNDLE" + + # setup hello-world for spec generation testing + run mkdir "$HELLO_BUNDLE" + run mkdir "$HELLO_BUNDLE"/rootfs + run tar -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE" +} + +function teardown() { + cd "$INTEGRATION_ROOT" + run rm -f -r "$HELLO_BUNDLE" +} + +@test "spec generation cwd" { + cd "$HELLO_BUNDLE" + # note this test runs from the bundle not the integration root + + # test that config.json does not exist after the above partial setup + [ ! -e config.json ] + + # test generation of spec does not return an error + runc_spec + [ "$status" -eq 0 ] + + # test generation of spec created our config.json (spec) + [ -e config.json ] + + # test existence of required args parameter in the generated config.json + run bash -c "grep -A2 'args' config.json | grep 'sh'" + [[ "${output}" == *"sh"* ]] + + # change the default args parameter from sh to hello + sed -i 's;"sh";"/hello";' config.json + + # ensure the generated spec works by running hello-world + runc run test_hello + [ "$status" -eq 0 ] +} + +@test "spec generation --bundle" { + # note this test runs from the integration root not the bundle + + # test that config.json does not exist after the above partial setup + [ ! -e "$HELLO_BUNDLE"/config.json ] + + # test generation of spec does not return an error + runc_spec "$HELLO_BUNDLE" + [ "$status" -eq 0 ] + + # test generation of spec created our config.json (spec) + [ -e "$HELLO_BUNDLE"/config.json ] + + # change the default args parameter from sh to hello + sed -i 's;"sh";"/hello";' "$HELLO_BUNDLE"/config.json + + # ensure the generated spec works by running hello-world + runc run --bundle "$HELLO_BUNDLE" test_hello + [ "$status" -eq 0 ] +} + +@test "spec validator" { + TESTDIR=$(pwd) + cd "$HELLO_BUNDLE" + + run git clone https://github.com/opencontainers/runtime-spec.git src/runtime-spec + [ "$status" -eq 0 ] + + SPEC_COMMIT=$(grep '^github.com/opencontainers/runtime-spec' ${TESTDIR}/../../vendor.conf | tr -s ' ' | cut -d ' ' -f 2) + run git -C src/runtime-spec reset --hard "${SPEC_COMMIT}" + + [ "$status" -eq 0 ] + [ -e src/runtime-spec/schema/config-schema.json ] + + run bash -c "GOPATH='$GOPATH' go get github.com/xeipuuv/gojsonschema" + [ "$status" -eq 0 ] + + run git -C "${GOPATH}/src/github.com/xeipuuv/gojsonschema" reset --hard 6637feb73ee44cd4640bb3def285c29774234c7f + [ "$status" -eq 0 ] + + GOPATH="$GOPATH" go build src/runtime-spec/schema/validate.go + [ -e ./validate ] + + runc spec + [ -e config.json ] + + run ./validate src/runtime-spec/schema/config-schema.json config.json + [ "$status" -eq 0 ] + [[ "${lines[0]}" == *"The document is valid"* ]] +} diff --git a/tests/integration/start.bats b/tests/integration/start.bats new file mode 100644 index 0000000..1f0ea8e --- /dev/null +++ b/tests/integration/start.bats @@ -0,0 +1,31 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "runc start" { + runc create --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox created + + # start container test_busybox + runc start test_busybox + [ "$status" -eq 0 ] + + testcontainer test_busybox running + + # delete test_busybox + runc delete --force test_busybox + + runc state test_busybox + [ "$status" -ne 0 ] +} diff --git a/tests/integration/start_detached.bats b/tests/integration/start_detached.bats new file mode 100644 index 0000000..7f177b8 --- /dev/null +++ b/tests/integration/start_detached.bats @@ -0,0 +1,76 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "runc run detached" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running +} + +@test "runc run detached ({u,g}id != 0)" { + # cannot start containers as another user in rootless setup without idmap + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap + + # replace "uid": 0 with "uid": 1000 + # and do a similar thing for gid. + sed -i 's;"uid": 0;"uid": 1000;g' config.json + sed -i 's;"gid": 0;"gid": 100;g' config.json + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running +} + +@test "runc run detached --pid-file" { + # run busybox detached + runc run --pid-file pid.txt -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running + + # check pid.txt was generated + [ -e pid.txt ] + + run cat pid.txt + [ "$status" -eq 0 ] + [[ ${lines[0]} == $(__runc state test_busybox | jq '.pid') ]] +} + +@test "runc run detached --pid-file with new CWD" { + # create pid_file directory as the CWD + run mkdir pid_file + [ "$status" -eq 0 ] + run cd pid_file + [ "$status" -eq 0 ] + + # run busybox detached + runc run --pid-file pid.txt -d -b $BUSYBOX_BUNDLE --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running + + # check pid.txt was generated + [ -e pid.txt ] + + run cat pid.txt + [ "$status" -eq 0 ] + [[ ${lines[0]} == $(__runc state test_busybox | jq '.pid') ]] +} diff --git a/tests/integration/start_hello.bats b/tests/integration/start_hello.bats new file mode 100644 index 0000000..a706be2 --- /dev/null +++ b/tests/integration/start_hello.bats @@ -0,0 +1,64 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_hello + setup_hello +} + +function teardown() { + teardown_hello +} + +@test "runc run" { + # run hello-world + runc run test_hello + [ "$status" -eq 0 ] + + # check expected output + [[ "${output}" == *"Hello"* ]] +} + +@test "runc run ({u,g}id != 0)" { + # cannot start containers as another user in rootless setup without idmap + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap + + # replace "uid": 0 with "uid": 1000 + # and do a similar thing for gid. + sed -i 's;"uid": 0;"uid": 1000;g' config.json + sed -i 's;"gid": 0;"gid": 100;g' config.json + + # run hello-world + runc run test_hello + [ "$status" -eq 0 ] + + # check expected output + [[ "${output}" == *"Hello"* ]] +} + +@test "runc run with rootfs set to ." { + cp config.json rootfs/. + rm config.json + cd rootfs + sed -i 's;"rootfs";".";' config.json + + # run hello-world + runc run test_hello + [ "$status" -eq 0 ] + [[ "${output}" == *"Hello"* ]] +} + +@test "runc run --pid-file" { + # run hello-world + runc run --pid-file pid.txt test_hello + [ "$status" -eq 0 ] + [[ "${output}" == *"Hello"* ]] + + # check pid.txt was generated + [ -e pid.txt ] + + run cat pid.txt + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ [0-9]+ ]] +} diff --git a/tests/integration/state.bats b/tests/integration/state.bats new file mode 100644 index 0000000..68dae38 --- /dev/null +++ b/tests/integration/state.bats @@ -0,0 +1,66 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "state (kill + delete)" { + runc state test_busybox + [ "$status" -ne 0 ] + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running + + runc kill test_busybox KILL + [ "$status" -eq 0 ] + + # wait for busybox to be in the destroyed state + retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" + + # delete test_busybox + runc delete test_busybox + [ "$status" -eq 0 ] + + runc state test_busybox + [ "$status" -ne 0 ] +} + +@test "state (pause + resume)" { + # XXX: pause and resume require cgroups. + requires root + + runc state test_busybox + [ "$status" -ne 0 ] + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + testcontainer test_busybox running + + # pause busybox + runc pause test_busybox + [ "$status" -eq 0 ] + + # test state of busybox is paused + testcontainer test_busybox paused + + # resume busybox + runc resume test_busybox + [ "$status" -eq 0 ] + + # test state of busybox is back to running + testcontainer test_busybox running +} diff --git a/tests/integration/testdata/hello-world-aarch64.tar b/tests/integration/testdata/hello-world-aarch64.tar new file mode 100644 index 0000000..186c8ae Binary files /dev/null and b/tests/integration/testdata/hello-world-aarch64.tar differ diff --git a/tests/integration/testdata/hello-world.tar b/tests/integration/testdata/hello-world.tar new file mode 100644 index 0000000..aec830e Binary files /dev/null and b/tests/integration/testdata/hello-world.tar differ diff --git a/tests/integration/tty.bats b/tests/integration/tty.bats new file mode 100644 index 0000000..688875d --- /dev/null +++ b/tests/integration/tty.bats @@ -0,0 +1,230 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + teardown_busybox + setup_busybox +} + +function teardown() { + teardown_busybox +} + +@test "runc run [tty ptsname]" { + # Replace sh script with readlink. + sed -i 's|"sh"|"sh", "-c", "for file in /proc/self/fd/[012]; do readlink $file; done"|' config.json + + # run busybox + runc run test_busybox + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ /dev/pts/+ ]] + [[ ${lines[1]} =~ /dev/pts/+ ]] + [[ ${lines[2]} =~ /dev/pts/+ ]] +} + +@test "runc run [tty owner]" { + # tty chmod is not doable in rootless containers without idmap. + # TODO: this can be made as a change to the gid test. + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap + + # Replace sh script with stat. + sed -i 's/"sh"/"sh", "-c", "stat -c %u:%g $(tty) | tr : \\\\\\\\n"/' config.json + + # run busybox + runc run test_busybox + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ 0 ]] + # This is set by the default config.json (it corresponds to the standard tty group). + [[ ${lines[1]} =~ 5 ]] +} + +@test "runc run [tty owner] ({u,g}id != 0)" { + # tty chmod is not doable in rootless containers without idmap. + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap + + # replace "uid": 0 with "uid": 1000 + # and do a similar thing for gid. + sed -i 's;"uid": 0;"uid": 1000;g' config.json + sed -i 's;"gid": 0;"gid": 100;g' config.json + + # Replace sh script with stat. + sed -i 's/"sh"/"sh", "-c", "stat -c %u:%g $(tty) | tr : \\\\\\\\n"/' config.json + + # run busybox + runc run test_busybox + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ 1000 ]] + # This is set by the default config.json (it corresponds to the standard tty group). + [[ ${lines[1]} =~ 5 ]] +} + +@test "runc exec [tty ptsname]" { + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # make sure we're running + testcontainer test_busybox running + + # run the exec + runc exec test_busybox sh -c 'for file in /proc/self/fd/[012]; do readlink $file; done' + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ /dev/pts/+ ]] + [[ ${lines[1]} =~ /dev/pts/+ ]] + [[ ${lines[2]} =~ /dev/pts/+ ]] +} + +@test "runc exec [tty owner]" { + # tty chmod is not doable in rootless containers without idmap. + # TODO: this can be made as a change to the gid test. + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # make sure we're running + testcontainer test_busybox running + + # run the exec + runc exec test_busybox sh -c 'stat -c %u:%g $(tty) | tr : \\n' + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ 0 ]] + [[ ${lines[1]} =~ 5 ]] +} + +@test "runc exec [tty owner] ({u,g}id != 0)" { + # tty chmod is not doable in rootless containers without idmap. + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_idmap + + # replace "uid": 0 with "uid": 1000 + # and do a similar thing for gid. + sed -i 's;"uid": 0;"uid": 1000;g' config.json + sed -i 's;"gid": 0;"gid": 100;g' config.json + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # make sure we're running + testcontainer test_busybox running + + # run the exec + runc exec test_busybox sh -c 'stat -c %u:%g $(tty) | tr : \\n' + [ "$status" -eq 0 ] + [[ ${lines[0]} =~ 1000 ]] + [[ ${lines[1]} =~ 5 ]] +} + +@test "runc exec [tty consolesize]" { + # allow writing to filesystem + sed -i 's/"readonly": true/"readonly": false/' config.json + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # make sure we're running + testcontainer test_busybox running + + tty_info_with_consize_size=$( cat < /tmp/tty-info" + ], + "cwd": "/" +} +EOF + ) + + # run the exec + runc exec --pid-file pid.txt -d --console-socket $CONSOLE_SOCKET -p <( echo $tty_info_with_consize_size ) test_busybox + [ "$status" -eq 0 ] + + # check the pid was generated + [ -e pid.txt ] + + #wait user process to finish + timeout 1 tail --pid=$(head -n 1 pid.txt) -f /dev/null + + tty_info=$( cat </ { print $5; exit }') + eval CGROUP_${g}="${base_path}${CGROUPS_PATH}" + done + + CGROUP_SYSTEM_MEMORY=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~ /\<'MEMORY'\>/ { print $5; exit }') + + # check that initial values were properly set + check_cgroup_value $CGROUP_CPU "cpu.cfs_period_us" 1000000 + check_cgroup_value $CGROUP_CPU "cpu.cfs_quota_us" 500000 + check_cgroup_value $CGROUP_CPU "cpu.shares" 100 + check_cgroup_value $CGROUP_CPUSET "cpuset.cpus" 0 + check_cgroup_value $CGROUP_MEMORY "memory.kmem.limit_in_bytes" 16777216 + check_cgroup_value $CGROUP_MEMORY "memory.kmem.tcp.limit_in_bytes" 11534336 + check_cgroup_value $CGROUP_MEMORY "memory.limit_in_bytes" 33554432 + check_cgroup_value $CGROUP_MEMORY "memory.soft_limit_in_bytes" 25165824 + check_cgroup_value $CGROUP_PIDS "pids.max" 20 + + # update cpu-period + runc update test_update --cpu-period 900000 + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_CPU "cpu.cfs_period_us" 900000 + + # update cpu-quota + runc update test_update --cpu-quota 600000 + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_CPU "cpu.cfs_quota_us" 600000 + + # update cpu-shares + runc update test_update --cpu-share 200 + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_CPU "cpu.shares" 200 + + # update cpuset if supported (i.e. we're running on a multicore cpu) + cpu_count=$(grep '^processor' /proc/cpuinfo | wc -l) + if [ $cpu_count -gt 1 ]; then + runc update test_update --cpuset-cpus "1" + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_CPUSET "cpuset.cpus" 1 + fi + + # update memory limit + runc update test_update --memory 67108864 + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_MEMORY "memory.limit_in_bytes" 67108864 + + runc update test_update --memory 50M + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_MEMORY "memory.limit_in_bytes" 52428800 + + # update memory soft limit + runc update test_update --memory-reservation 33554432 + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_MEMORY "memory.soft_limit_in_bytes" 33554432 + + # Run swap memory tests if swap is available + if [ -f "$CGROUP_MEMORY/memory.memsw.limit_in_bytes" ]; then + # try to remove memory swap limit + runc update test_update --memory-swap -1 + [ "$status" -eq 0 ] + # Get System memory swap limit + SYSTEM_MEMORY_SW=$(cat "${CGROUP_SYSTEM_MEMORY}/memory.memsw.limit_in_bytes") + check_cgroup_value $CGROUP_MEMORY "memory.memsw.limit_in_bytes" ${SYSTEM_MEMORY_SW} + + # update memory swap + runc update test_update --memory-swap 96468992 + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_MEMORY "memory.memsw.limit_in_bytes" 96468992 + fi; + + # try to remove memory limit + runc update test_update --memory -1 + [ "$status" -eq 0 ] + + # Get System memory limit + SYSTEM_MEMORY=$(cat "${CGROUP_SYSTEM_MEMORY}/memory.limit_in_bytes") + # check memory limited is gone + check_cgroup_value $CGROUP_MEMORY "memory.limit_in_bytes" ${SYSTEM_MEMORY} + + # check swap memory limited is gone + if [ -f "$CGROUP_MEMORY/memory.memsw.limit_in_bytes" ]; then + check_cgroup_value $CGROUP_MEMORY "memory.memsw.limit_in_bytes" ${SYSTEM_MEMORY} + fi + + # update kernel memory limit + runc update test_update --kernel-memory 50331648 + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_MEMORY "memory.kmem.limit_in_bytes" 50331648 + + # update kernel memory tcp limit + runc update test_update --kernel-memory-tcp 41943040 + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_MEMORY "memory.kmem.tcp.limit_in_bytes" 41943040 + + # update pids limit + runc update test_update --pids-limit 10 + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_PIDS "pids.max" 10 + + # Revert to the test initial value via json on stding + runc update -r - test_update < $BATS_TMPDIR/runc-cgroups-integration-test.json + + runc update -r $BATS_TMPDIR/runc-cgroups-integration-test.json test_update + [ "$status" -eq 0 ] + check_cgroup_value $CGROUP_CPU "cpu.cfs_period_us" 1000000 + check_cgroup_value $CGROUP_CPU "cpu.cfs_quota_us" 500000 + check_cgroup_value $CGROUP_CPU "cpu.shares" 100 + check_cgroup_value $CGROUP_CPUSET "cpuset.cpus" 0 + check_cgroup_value $CGROUP_MEMORY "memory.kmem.limit_in_bytes" 16777216 + check_cgroup_value $CGROUP_MEMORY "memory.kmem.tcp.limit_in_bytes" 11534336 + check_cgroup_value $CGROUP_MEMORY "memory.limit_in_bytes" 33554432 + check_cgroup_value $CGROUP_MEMORY "memory.soft_limit_in_bytes" 25165824 + check_cgroup_value $CGROUP_PIDS "pids.max" 20 +} + +@test "update rt period and runtime" { + [[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup + requires cgroups_kmem cgroups_rt + + # run a detached busybox + runc run -d --console-socket $CONSOLE_SOCKET test_update_rt + [ "$status" -eq 0 ] + + # get the cgroup paths + eval CGROUP_CPU="${CGROUP_CPU_BASE_PATH}${CGROUPS_PATH}" + + runc update -r - test_update_rt < /etc/subuid.tmp + mv /etc/subuid{.tmp,} + [ -e /etc/subgid.tmp ] && mv /etc/subgid{.tmp,} + ( grep -v '^rootless' /etc/subgid ; echo "rootless:$ROOTLESS_GIDMAP_START:$ROOTLESS_GIDMAP_LENGTH" ) > /etc/subgid.tmp + mv /etc/subgid{.tmp,} + + # Reactivate new{uid,gid}map helpers if applicable. + [ -e /usr/bin/unused-newuidmap ] && mv /usr/bin/{unused-,}newuidmap + [ -e /usr/bin/unused-newgidmap ] && mv /usr/bin/{unused-,}newgidmap +} + +function disable_idmap() { + export ROOTLESS_UIDMAP_START ROOTLESS_UIDMAP_LENGTH + export ROOTLESS_GIDMAP_START ROOTLESS_GIDMAP_LENGTH + + # Deactivate sub{uid,gid} mappings. + [ -e /etc/subuid ] && mv /etc/subuid{,.tmp} + [ -e /etc/subgid ] && mv /etc/subgid{,.tmp} + + # Deactivate new{uid,gid}map helpers. setuid is preserved with mv(1). + [ -e /usr/bin/newuidmap ] && mv /usr/bin/{,unused-}newuidmap + [ -e /usr/bin/newgidmap ] && mv /usr/bin/{,unused-}newgidmap +} + +# FEATURE: Opportunistic cgroups support, allowing a rootless container to set +# resource limits on condition that cgroupsPath is set to a path the +# rootless user has permissions on. + +# List of cgroups. We handle name= cgroups as well as combined +# (comma-separated) cgroups and correctly split and/or strip them. +ALL_CGROUPS=( $(cat /proc/self/cgroup | cut -d: -f2 | sed -E '{s/^name=//;s/,/\n/;/^$/D}') ) +CGROUP_MOUNT="/sys/fs/cgroup" +CGROUP_PATH="/runc-cgroups-integration-test" + +function enable_cgroup() { + # Set up cgroups for use in rootless containers. + for cg in "${ALL_CGROUPS[@]}" + do + mkdir -p "$CGROUP_MOUNT/$cg$CGROUP_PATH" + # We only need to allow write access to {cgroup.procs,tasks} and the + # directory. Rather than changing the owner entirely, we just change + # the group and then allow write access to the group (in order to + # further limit the possible DAC permissions that runc could use). + chown root:rootless "$CGROUP_MOUNT/$cg$CGROUP_PATH/"{,cgroup.procs,tasks} + chmod g+rwx "$CGROUP_MOUNT/$cg$CGROUP_PATH/"{,cgroup.procs,tasks} + # Due to cpuset's semantics we need to give extra permissions to allow + # for runc to set up the hierarchy. XXX: This really shouldn't be + # necessary, and might actually be a bug in our impl of cgroup + # handling. + [[ "$cg" == "cpuset" ]] && chown rootless:rootless "$CGROUP_MOUNT/$cg$CGROUP_PATH/cpuset."{cpus,mems} + done +} + +function disable_cgroup() { + # Remove cgroups used in rootless containers. + for cg in "${ALL_CGROUPS[@]}" + do + [ -d "$CGROUP_MOUNT/$cg$CGROUP_PATH" ] && rmdir "$CGROUP_MOUNT/$cg$CGROUP_PATH" + done +} + +# Create a powerset of $ALL_FEATURES (the set of all subsets of $ALL_FEATURES). +# We test all of the possible combinations (as long as we don't add too many +# feature knobs this shouldn't take too long -- but the number of tested +# combinations is O(2^n)). +function powerset() { + eval printf '%s' $(printf '{,%s+}' "$@"): +} +features_powerset="$(powerset "${ALL_FEATURES[@]}")" + +# Iterate over the powerset of all features. +IFS=: +for enabled_features in $features_powerset +do + idx="$(($idx+1))" + echo "[$(printf '%.2d' "$idx")] run rootless tests ... (${enabled_features%%+})" + + unset IFS + for feature in "${ALL_FEATURES[@]}" + do + hook_func="disable_$feature" + grep -E "(^|\+)$feature(\+|$)" <<<$enabled_features &>/dev/null && hook_func="enable_$feature" + "$hook_func" + done + + # Run the test suite! + set -e + echo path: $PATH + export ROOTLESS_FEATURES="$enabled_features" + sudo -HE -u rootless PATH="$PATH" bats -t "$ROOT/tests/integration$TESTFLAGS" + set +e +done diff --git a/tty.go b/tty.go new file mode 100644 index 0000000..6106c2d --- /dev/null +++ b/tty.go @@ -0,0 +1,170 @@ +// +build linux + +package main + +import ( + "fmt" + "io" + "os" + "os/signal" + "sync" + + "github.com/containerd/console" + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/utils" +) + +type tty struct { + epoller *console.Epoller + console *console.EpollConsole + stdin console.Console + closers []io.Closer + postStart []io.Closer + wg sync.WaitGroup + consoleC chan error +} + +func (t *tty) copyIO(w io.Writer, r io.ReadCloser) { + defer t.wg.Done() + io.Copy(w, r) + r.Close() +} + +// setup pipes for the process so that advanced features like c/r are able to easily checkpoint +// and restore the process's IO without depending on a host specific path or device +func setupProcessPipes(p *libcontainer.Process, rootuid, rootgid int) (*tty, error) { + i, err := p.InitializeIO(rootuid, rootgid) + if err != nil { + return nil, err + } + t := &tty{ + closers: []io.Closer{ + i.Stdin, + i.Stdout, + i.Stderr, + }, + } + // add the process's io to the post start closers if they support close + for _, cc := range []interface{}{ + p.Stdin, + p.Stdout, + p.Stderr, + } { + if c, ok := cc.(io.Closer); ok { + t.postStart = append(t.postStart, c) + } + } + go func() { + io.Copy(i.Stdin, os.Stdin) + i.Stdin.Close() + }() + t.wg.Add(2) + go t.copyIO(os.Stdout, i.Stdout) + go t.copyIO(os.Stderr, i.Stderr) + return t, nil +} + +func inheritStdio(process *libcontainer.Process) error { + process.Stdin = os.Stdin + process.Stdout = os.Stdout + process.Stderr = os.Stderr + return nil +} + +func (t *tty) recvtty(process *libcontainer.Process, socket *os.File) (Err error) { + f, err := utils.RecvFd(socket) + if err != nil { + return err + } + cons, err := console.ConsoleFromFile(f) + if err != nil { + return err + } + console.ClearONLCR(cons.Fd()) + epoller, err := console.NewEpoller() + if err != nil { + return err + } + epollConsole, err := epoller.Add(cons) + if err != nil { + return err + } + defer func() { + if Err != nil { + epollConsole.Close() + } + }() + go epoller.Wait() + go io.Copy(epollConsole, os.Stdin) + t.wg.Add(1) + go t.copyIO(os.Stdout, epollConsole) + + // set raw mode to stdin and also handle interrupt + stdin, err := console.ConsoleFromFile(os.Stdin) + if err != nil { + return err + } + if err := stdin.SetRaw(); err != nil { + return fmt.Errorf("failed to set the terminal from the stdin: %v", err) + } + go handleInterrupt(stdin) + + t.epoller = epoller + t.stdin = stdin + t.console = epollConsole + t.closers = []io.Closer{epollConsole} + return nil +} + +func handleInterrupt(c console.Console) { + sigchan := make(chan os.Signal, 1) + signal.Notify(sigchan, os.Interrupt) + <-sigchan + c.Reset() + os.Exit(0) +} + +func (t *tty) waitConsole() error { + if t.consoleC != nil { + return <-t.consoleC + } + return nil +} + +// ClosePostStart closes any fds that are provided to the container and dup2'd +// so that we no longer have copy in our process. +func (t *tty) ClosePostStart() error { + for _, c := range t.postStart { + c.Close() + } + return nil +} + +// Close closes all open fds for the tty and/or restores the original +// stdin state to what it was prior to the container execution +func (t *tty) Close() error { + // ensure that our side of the fds are always closed + for _, c := range t.postStart { + c.Close() + } + // the process is gone at this point, shutting down the console if we have + // one and wait for all IO to be finished + if t.console != nil && t.epoller != nil { + t.console.Shutdown(t.epoller.CloseConsole) + } + t.wg.Wait() + for _, c := range t.closers { + c.Close() + } + if t.stdin != nil { + t.stdin.Reset() + } + return nil +} + +func (t *tty) resize() error { + if t.console == nil { + return nil + } + return t.console.ResizeFrom(console.Current()) +} diff --git a/types/events.go b/types/events.go new file mode 100644 index 0000000..c6f0e97 --- /dev/null +++ b/types/events.go @@ -0,0 +1,130 @@ +package types + +// Event struct for encoding the event data to json. +type Event struct { + Type string `json:"type"` + ID string `json:"id"` + Data interface{} `json:"data,omitempty"` +} + +// stats is the runc specific stats structure for stability when encoding and decoding stats. +type Stats struct { + CPU Cpu `json:"cpu"` + Memory Memory `json:"memory"` + Pids Pids `json:"pids"` + Blkio Blkio `json:"blkio"` + Hugetlb map[string]Hugetlb `json:"hugetlb"` + IntelRdt IntelRdt `json:"intel_rdt"` + NetworkInterfaces []*NetworkInterface `json:"network_interfaces"` +} + +type Hugetlb struct { + Usage uint64 `json:"usage,omitempty"` + Max uint64 `json:"max,omitempty"` + Failcnt uint64 `json:"failcnt"` +} + +type BlkioEntry struct { + Major uint64 `json:"major,omitempty"` + Minor uint64 `json:"minor,omitempty"` + Op string `json:"op,omitempty"` + Value uint64 `json:"value,omitempty"` +} + +type Blkio struct { + IoServiceBytesRecursive []BlkioEntry `json:"ioServiceBytesRecursive,omitempty"` + IoServicedRecursive []BlkioEntry `json:"ioServicedRecursive,omitempty"` + IoQueuedRecursive []BlkioEntry `json:"ioQueueRecursive,omitempty"` + IoServiceTimeRecursive []BlkioEntry `json:"ioServiceTimeRecursive,omitempty"` + IoWaitTimeRecursive []BlkioEntry `json:"ioWaitTimeRecursive,omitempty"` + IoMergedRecursive []BlkioEntry `json:"ioMergedRecursive,omitempty"` + IoTimeRecursive []BlkioEntry `json:"ioTimeRecursive,omitempty"` + SectorsRecursive []BlkioEntry `json:"sectorsRecursive,omitempty"` +} + +type Pids struct { + Current uint64 `json:"current,omitempty"` + Limit uint64 `json:"limit,omitempty"` +} + +type Throttling struct { + Periods uint64 `json:"periods,omitempty"` + ThrottledPeriods uint64 `json:"throttledPeriods,omitempty"` + ThrottledTime uint64 `json:"throttledTime,omitempty"` +} + +type CpuUsage struct { + // Units: nanoseconds. + Total uint64 `json:"total,omitempty"` + Percpu []uint64 `json:"percpu,omitempty"` + Kernel uint64 `json:"kernel"` + User uint64 `json:"user"` +} + +type Cpu struct { + Usage CpuUsage `json:"usage,omitempty"` + Throttling Throttling `json:"throttling,omitempty"` +} + +type MemoryEntry struct { + Limit uint64 `json:"limit"` + Usage uint64 `json:"usage,omitempty"` + Max uint64 `json:"max,omitempty"` + Failcnt uint64 `json:"failcnt"` +} + +type Memory struct { + Cache uint64 `json:"cache,omitempty"` + Usage MemoryEntry `json:"usage,omitempty"` + Swap MemoryEntry `json:"swap,omitempty"` + Kernel MemoryEntry `json:"kernel,omitempty"` + KernelTCP MemoryEntry `json:"kernelTCP,omitempty"` + Raw map[string]uint64 `json:"raw,omitempty"` +} + +type L3CacheInfo struct { + CbmMask string `json:"cbm_mask,omitempty"` + MinCbmBits uint64 `json:"min_cbm_bits,omitempty"` + NumClosids uint64 `json:"num_closids,omitempty"` +} + +type MemBwInfo struct { + BandwidthGran uint64 `json:"bandwidth_gran,omitempty"` + DelayLinear uint64 `json:"delay_linear,omitempty"` + MinBandwidth uint64 `json:"min_bandwidth,omitempty"` + NumClosids uint64 `json:"num_closids,omitempty"` +} + +type IntelRdt struct { + // The read-only L3 cache information + L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"` + + // The read-only L3 cache schema in root + L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"` + + // The L3 cache schema in 'container_id' group + L3CacheSchema string `json:"l3_cache_schema,omitempty"` + + // The read-only memory bandwidth information + MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"` + + // The read-only memory bandwidth schema in root + MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"` + + // The memory bandwidth schema in 'container_id' group + MemBwSchema string `json:"mem_bw_schema,omitempty"` +} + +type NetworkInterface struct { + // Name is the name of the network interface. + Name string + + RxBytes uint64 + RxPackets uint64 + RxErrors uint64 + RxDropped uint64 + TxBytes uint64 + TxPackets uint64 + TxErrors uint64 + TxDropped uint64 +} diff --git a/update.go b/update.go new file mode 100644 index 0000000..05dc4b5 --- /dev/null +++ b/update.go @@ -0,0 +1,304 @@ +// +build linux + +package main + +import ( + "encoding/json" + "fmt" + "os" + "strconv" + + "github.com/docker/go-units" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/urfave/cli" +) + +func i64Ptr(i int64) *int64 { return &i } +func u64Ptr(i uint64) *uint64 { return &i } +func u16Ptr(i uint16) *uint16 { return &i } + +var updateCommand = cli.Command{ + Name: "update", + Usage: "update container resource constraints", + ArgsUsage: ``, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "resources, r", + Value: "", + Usage: `path to the file containing the resources to update or '-' to read from the standard input + +The accepted format is as follow (unchanged values can be omitted): + +{ + "memory": { + "limit": 0, + "reservation": 0, + "swap": 0, + "kernel": 0, + "kernelTCP": 0 + }, + "cpu": { + "shares": 0, + "quota": 0, + "period": 0, + "realtimeRuntime": 0, + "realtimePeriod": 0, + "cpus": "", + "mems": "" + }, + "blockIO": { + "weight": 0 + } +} + +Note: if data is to be read from a file or the standard input, all +other options are ignored. +`, + }, + + cli.IntFlag{ + Name: "blkio-weight", + Usage: "Specifies per cgroup weight, range is from 10 to 1000", + }, + cli.StringFlag{ + Name: "cpu-period", + Usage: "CPU CFS period to be used for hardcapping (in usecs). 0 to use system default", + }, + cli.StringFlag{ + Name: "cpu-quota", + Usage: "CPU CFS hardcap limit (in usecs). Allowed cpu time in a given period", + }, + cli.StringFlag{ + Name: "cpu-share", + Usage: "CPU shares (relative weight vs. other containers)", + }, + cli.StringFlag{ + Name: "cpu-rt-period", + Usage: "CPU realtime period to be used for hardcapping (in usecs). 0 to use system default", + }, + cli.StringFlag{ + Name: "cpu-rt-runtime", + Usage: "CPU realtime hardcap limit (in usecs). Allowed cpu time in a given period", + }, + cli.StringFlag{ + Name: "cpuset-cpus", + Usage: "CPU(s) to use", + }, + cli.StringFlag{ + Name: "cpuset-mems", + Usage: "Memory node(s) to use", + }, + cli.StringFlag{ + Name: "kernel-memory", + Usage: "Kernel memory limit (in bytes)", + }, + cli.StringFlag{ + Name: "kernel-memory-tcp", + Usage: "Kernel memory limit (in bytes) for tcp buffer", + }, + cli.StringFlag{ + Name: "memory", + Usage: "Memory limit (in bytes)", + }, + cli.StringFlag{ + Name: "memory-reservation", + Usage: "Memory reservation or soft_limit (in bytes)", + }, + cli.StringFlag{ + Name: "memory-swap", + Usage: "Total memory usage (memory + swap); set '-1' to enable unlimited swap", + }, + cli.IntFlag{ + Name: "pids-limit", + Usage: "Maximum number of pids allowed in the container", + }, + cli.StringFlag{ + Name: "l3-cache-schema", + Usage: "The string of Intel RDT/CAT L3 cache schema", + }, + cli.StringFlag{ + Name: "mem-bw-schema", + Usage: "The string of Intel RDT/MBA memory bandwidth schema", + }, + }, + Action: func(context *cli.Context) error { + if err := checkArgs(context, 1, exactArgs); err != nil { + return err + } + container, err := getContainer(context) + if err != nil { + return err + } + + r := specs.LinuxResources{ + Memory: &specs.LinuxMemory{ + Limit: i64Ptr(0), + Reservation: i64Ptr(0), + Swap: i64Ptr(0), + Kernel: i64Ptr(0), + KernelTCP: i64Ptr(0), + }, + CPU: &specs.LinuxCPU{ + Shares: u64Ptr(0), + Quota: i64Ptr(0), + Period: u64Ptr(0), + RealtimeRuntime: i64Ptr(0), + RealtimePeriod: u64Ptr(0), + Cpus: "", + Mems: "", + }, + BlockIO: &specs.LinuxBlockIO{ + Weight: u16Ptr(0), + }, + Pids: &specs.LinuxPids{ + Limit: 0, + }, + } + + config := container.Config() + + if in := context.String("resources"); in != "" { + var ( + f *os.File + err error + ) + switch in { + case "-": + f = os.Stdin + default: + f, err = os.Open(in) + if err != nil { + return err + } + } + err = json.NewDecoder(f).Decode(&r) + if err != nil { + return err + } + } else { + if val := context.Int("blkio-weight"); val != 0 { + r.BlockIO.Weight = u16Ptr(uint16(val)) + } + if val := context.String("cpuset-cpus"); val != "" { + r.CPU.Cpus = val + } + if val := context.String("cpuset-mems"); val != "" { + r.CPU.Mems = val + } + + for _, pair := range []struct { + opt string + dest *uint64 + }{ + + {"cpu-period", r.CPU.Period}, + {"cpu-rt-period", r.CPU.RealtimePeriod}, + {"cpu-share", r.CPU.Shares}, + } { + if val := context.String(pair.opt); val != "" { + var err error + *pair.dest, err = strconv.ParseUint(val, 10, 64) + if err != nil { + return fmt.Errorf("invalid value for %s: %s", pair.opt, err) + } + } + } + for _, pair := range []struct { + opt string + dest *int64 + }{ + + {"cpu-quota", r.CPU.Quota}, + {"cpu-rt-runtime", r.CPU.RealtimeRuntime}, + } { + if val := context.String(pair.opt); val != "" { + var err error + *pair.dest, err = strconv.ParseInt(val, 10, 64) + if err != nil { + return fmt.Errorf("invalid value for %s: %s", pair.opt, err) + } + } + } + for _, pair := range []struct { + opt string + dest *int64 + }{ + {"memory", r.Memory.Limit}, + {"memory-swap", r.Memory.Swap}, + {"kernel-memory", r.Memory.Kernel}, + {"kernel-memory-tcp", r.Memory.KernelTCP}, + {"memory-reservation", r.Memory.Reservation}, + } { + if val := context.String(pair.opt); val != "" { + var v int64 + + if val != "-1" { + v, err = units.RAMInBytes(val) + if err != nil { + return fmt.Errorf("invalid value for %s: %s", pair.opt, err) + } + } else { + v = -1 + } + *pair.dest = v + } + } + r.Pids.Limit = int64(context.Int("pids-limit")) + } + + // Update the value + config.Cgroups.Resources.BlkioWeight = *r.BlockIO.Weight + config.Cgroups.Resources.CpuPeriod = *r.CPU.Period + config.Cgroups.Resources.CpuQuota = *r.CPU.Quota + config.Cgroups.Resources.CpuShares = *r.CPU.Shares + config.Cgroups.Resources.CpuRtPeriod = *r.CPU.RealtimePeriod + config.Cgroups.Resources.CpuRtRuntime = *r.CPU.RealtimeRuntime + config.Cgroups.Resources.CpusetCpus = r.CPU.Cpus + config.Cgroups.Resources.CpusetMems = r.CPU.Mems + config.Cgroups.Resources.KernelMemory = *r.Memory.Kernel + config.Cgroups.Resources.KernelMemoryTCP = *r.Memory.KernelTCP + config.Cgroups.Resources.Memory = *r.Memory.Limit + config.Cgroups.Resources.MemoryReservation = *r.Memory.Reservation + config.Cgroups.Resources.MemorySwap = *r.Memory.Swap + config.Cgroups.Resources.PidsLimit = r.Pids.Limit + + // Update Intel RDT + l3CacheSchema := context.String("l3-cache-schema") + memBwSchema := context.String("mem-bw-schema") + if l3CacheSchema != "" && !intelrdt.IsCatEnabled() { + return fmt.Errorf("Intel RDT/CAT: l3 cache schema is not enabled") + } + + if memBwSchema != "" && !intelrdt.IsMbaEnabled() { + return fmt.Errorf("Intel RDT/MBA: memory bandwidth schema is not enabled") + } + + if l3CacheSchema != "" || memBwSchema != "" { + // If intelRdt is not specified in original configuration, we just don't + // Apply() to create intelRdt group or attach tasks for this container. + // In update command, we could re-enable through IntelRdtManager.Apply() + // and then update intelrdt constraint. + if config.IntelRdt == nil { + state, err := container.State() + if err != nil { + return err + } + config.IntelRdt = &configs.IntelRdt{} + intelRdtManager := intelrdt.IntelRdtManager{ + Config: &config, + Id: container.ID(), + Path: state.IntelRdtPath, + } + if err := intelRdtManager.Apply(state.InitProcessPid); err != nil { + return err + } + } + config.IntelRdt.L3CacheSchema = l3CacheSchema + config.IntelRdt.MemBwSchema = memBwSchema + } + + return container.Set(config) + }, +} diff --git a/utils.go b/utils.go new file mode 100644 index 0000000..5165336 --- /dev/null +++ b/utils.go @@ -0,0 +1,94 @@ +package main + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runtime-spec/specs-go" + + "github.com/sirupsen/logrus" + "github.com/urfave/cli" +) + +const ( + exactArgs = iota + minArgs + maxArgs +) + +func checkArgs(context *cli.Context, expected, checkType int) error { + var err error + cmdName := context.Command.Name + switch checkType { + case exactArgs: + if context.NArg() != expected { + err = fmt.Errorf("%s: %q requires exactly %d argument(s)", os.Args[0], cmdName, expected) + } + case minArgs: + if context.NArg() < expected { + err = fmt.Errorf("%s: %q requires a minimum of %d argument(s)", os.Args[0], cmdName, expected) + } + case maxArgs: + if context.NArg() > expected { + err = fmt.Errorf("%s: %q requires a maximum of %d argument(s)", os.Args[0], cmdName, expected) + } + } + + if err != nil { + fmt.Printf("Incorrect Usage.\n\n") + cli.ShowCommandHelp(context, cmdName) + return err + } + return nil +} + +// fatal prints the error's details if it is a libcontainer specific error type +// then exits the program with an exit status of 1. +func fatal(err error) { + // make sure the error is written to the logger + logrus.Error(err) + fmt.Fprintln(os.Stderr, err) + os.Exit(1) +} + +// setupSpec performs initial setup based on the cli.Context for the container +func setupSpec(context *cli.Context) (*specs.Spec, error) { + bundle := context.String("bundle") + if bundle != "" { + if err := os.Chdir(bundle); err != nil { + return nil, err + } + } + spec, err := loadSpec(specConfig) + if err != nil { + return nil, err + } + return spec, nil +} + +func revisePidFile(context *cli.Context) error { + pidFile := context.String("pid-file") + if pidFile == "" { + return nil + } + + // convert pid-file to an absolute path so we can write to the right + // file after chdir to bundle + pidFile, err := filepath.Abs(pidFile) + if err != nil { + return err + } + return context.Set("pid-file", pidFile) +} + +// parseBoolOrAuto returns (nil, nil) if s is empty or "auto" +func parseBoolOrAuto(s string) (*bool, error) { + if s == "" || strings.ToLower(s) == "auto" { + return nil, nil + } + b, err := strconv.ParseBool(s) + return &b, err +} diff --git a/utils_linux.go b/utils_linux.go new file mode 100644 index 0000000..984e6b0 --- /dev/null +++ b/utils_linux.go @@ -0,0 +1,453 @@ +// +build linux + +package main + +import ( + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/libcontainer/specconv" + "github.com/opencontainers/runc/libcontainer/utils" + "github.com/opencontainers/runtime-spec/specs-go" + selinux "github.com/opencontainers/selinux/go-selinux" + + "github.com/coreos/go-systemd/activation" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/urfave/cli" + "golang.org/x/sys/unix" +) + +var errEmptyID = errors.New("container id cannot be empty") + +// loadFactory returns the configured factory instance for execing containers. +func loadFactory(context *cli.Context) (libcontainer.Factory, error) { + root := context.GlobalString("root") + abs, err := filepath.Abs(root) + if err != nil { + return nil, err + } + + // We default to cgroupfs, and can only use systemd if the system is a + // systemd box. + cgroupManager := libcontainer.Cgroupfs + rootlessCg, err := shouldUseRootlessCgroupManager(context) + if err != nil { + return nil, err + } + if rootlessCg { + cgroupManager = libcontainer.RootlessCgroupfs + } + if context.GlobalBool("systemd-cgroup") { + if systemd.UseSystemd() { + cgroupManager = libcontainer.SystemdCgroups + } else { + return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available") + } + } + + intelRdtManager := libcontainer.IntelRdtFs + if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() { + intelRdtManager = nil + } + + // We resolve the paths for {newuidmap,newgidmap} from the context of runc, + // to avoid doing a path lookup in the nsexec context. TODO: The binary + // names are not currently configurable. + newuidmap, err := exec.LookPath("newuidmap") + if err != nil { + newuidmap = "" + } + newgidmap, err := exec.LookPath("newgidmap") + if err != nil { + newgidmap = "" + } + + return libcontainer.New(abs, cgroupManager, intelRdtManager, + libcontainer.CriuPath(context.GlobalString("criu")), + libcontainer.NewuidmapPath(newuidmap), + libcontainer.NewgidmapPath(newgidmap)) +} + +// getContainer returns the specified container instance by loading it from state +// with the default factory. +func getContainer(context *cli.Context) (libcontainer.Container, error) { + id := context.Args().First() + if id == "" { + return nil, errEmptyID + } + factory, err := loadFactory(context) + if err != nil { + return nil, err + } + return factory.Load(id) +} + +func fatalf(t string, v ...interface{}) { + fatal(fmt.Errorf(t, v...)) +} + +func getDefaultImagePath(context *cli.Context) string { + cwd, err := os.Getwd() + if err != nil { + panic(err) + } + return filepath.Join(cwd, "checkpoint") +} + +// newProcess returns a new libcontainer Process with the arguments from the +// spec and stdio from the current process. +func newProcess(p specs.Process, init bool, logLevel string) (*libcontainer.Process, error) { + lp := &libcontainer.Process{ + Args: p.Args, + Env: p.Env, + // TODO: fix libcontainer's API to better support uid/gid in a typesafe way. + User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID), + Cwd: p.Cwd, + Label: p.SelinuxLabel, + NoNewPrivileges: &p.NoNewPrivileges, + AppArmorProfile: p.ApparmorProfile, + Init: init, + LogLevel: logLevel, + } + + if p.ConsoleSize != nil { + lp.ConsoleWidth = uint16(p.ConsoleSize.Width) + lp.ConsoleHeight = uint16(p.ConsoleSize.Height) + } + + if p.Capabilities != nil { + lp.Capabilities = &configs.Capabilities{} + lp.Capabilities.Bounding = p.Capabilities.Bounding + lp.Capabilities.Effective = p.Capabilities.Effective + lp.Capabilities.Inheritable = p.Capabilities.Inheritable + lp.Capabilities.Permitted = p.Capabilities.Permitted + lp.Capabilities.Ambient = p.Capabilities.Ambient + } + for _, gid := range p.User.AdditionalGids { + lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10)) + } + for _, rlimit := range p.Rlimits { + rl, err := createLibContainerRlimit(rlimit) + if err != nil { + return nil, err + } + lp.Rlimits = append(lp.Rlimits, rl) + } + return lp, nil +} + +func destroy(container libcontainer.Container) { + if err := container.Destroy(); err != nil { + logrus.Error(err) + } +} + +// setupIO modifies the given process config according to the options. +func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool, sockpath string) (*tty, error) { + if createTTY { + process.Stdin = nil + process.Stdout = nil + process.Stderr = nil + t := &tty{} + if !detach { + parent, child, err := utils.NewSockPair("console") + if err != nil { + return nil, err + } + process.ConsoleSocket = child + t.postStart = append(t.postStart, parent, child) + t.consoleC = make(chan error, 1) + go func() { + if err := t.recvtty(process, parent); err != nil { + t.consoleC <- err + } + t.consoleC <- nil + }() + } else { + // the caller of runc will handle receiving the console master + conn, err := net.Dial("unix", sockpath) + if err != nil { + return nil, err + } + uc, ok := conn.(*net.UnixConn) + if !ok { + return nil, fmt.Errorf("casting to UnixConn failed") + } + t.postStart = append(t.postStart, uc) + socket, err := uc.File() + if err != nil { + return nil, err + } + t.postStart = append(t.postStart, socket) + process.ConsoleSocket = socket + } + return t, nil + } + // when runc will detach the caller provides the stdio to runc via runc's 0,1,2 + // and the container's process inherits runc's stdio. + if detach { + if err := inheritStdio(process); err != nil { + return nil, err + } + return &tty{}, nil + } + return setupProcessPipes(process, rootuid, rootgid) +} + +// createPidFile creates a file with the processes pid inside it atomically +// it creates a temp file with the paths filename + '.' infront of it +// then renames the file +func createPidFile(path string, process *libcontainer.Process) error { + pid, err := process.Pid() + if err != nil { + return err + } + var ( + tmpDir = filepath.Dir(path) + tmpName = filepath.Join(tmpDir, fmt.Sprintf(".%s", filepath.Base(path))) + ) + f, err := os.OpenFile(tmpName, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666) + if err != nil { + return err + } + _, err = fmt.Fprintf(f, "%d", pid) + f.Close() + if err != nil { + return err + } + return os.Rename(tmpName, path) +} + +func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) { + rootlessCg, err := shouldUseRootlessCgroupManager(context) + if err != nil { + return nil, err + } + config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{ + CgroupName: id, + UseSystemdCgroup: context.GlobalBool("systemd-cgroup"), + NoPivotRoot: context.Bool("no-pivot"), + NoNewKeyring: context.Bool("no-new-keyring"), + Spec: spec, + RootlessEUID: os.Geteuid() != 0, + RootlessCgroups: rootlessCg, + }) + if err != nil { + return nil, err + } + + factory, err := loadFactory(context) + if err != nil { + return nil, err + } + return factory.Create(id, config) +} + +type runner struct { + init bool + enableSubreaper bool + shouldDestroy bool + detach bool + listenFDs []*os.File + preserveFDs int + pidFile string + consoleSocket string + container libcontainer.Container + action CtAct + notifySocket *notifySocket + criuOpts *libcontainer.CriuOpts + logLevel string +} + +func (r *runner) run(config *specs.Process) (int, error) { + var err error + defer func() { + if err != nil { + r.destroy() + } + }() + if err = r.checkTerminal(config); err != nil { + return -1, err + } + process, err := newProcess(*config, r.init, r.logLevel) + if err != nil { + return -1, err + } + if len(r.listenFDs) > 0 { + process.Env = append(process.Env, fmt.Sprintf("LISTEN_FDS=%d", len(r.listenFDs)), "LISTEN_PID=1") + process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...) + } + baseFd := 3 + len(process.ExtraFiles) + for i := baseFd; i < baseFd+r.preserveFDs; i++ { + _, err = os.Stat(fmt.Sprintf("/proc/self/fd/%d", i)) + if err != nil { + return -1, errors.Wrapf(err, "please check that preserved-fd %d (of %d) is present", i-baseFd, r.preserveFDs) + } + process.ExtraFiles = append(process.ExtraFiles, os.NewFile(uintptr(i), "PreserveFD:"+strconv.Itoa(i))) + } + rootuid, err := r.container.Config().HostRootUID() + if err != nil { + return -1, err + } + rootgid, err := r.container.Config().HostRootGID() + if err != nil { + return -1, err + } + var ( + detach = r.detach || (r.action == CT_ACT_CREATE) + ) + // Setting up IO is a two stage process. We need to modify process to deal + // with detaching containers, and then we get a tty after the container has + // started. + handler := newSignalHandler(r.enableSubreaper, r.notifySocket) + tty, err := setupIO(process, rootuid, rootgid, config.Terminal, detach, r.consoleSocket) + if err != nil { + return -1, err + } + defer tty.Close() + + switch r.action { + case CT_ACT_CREATE: + err = r.container.Start(process) + case CT_ACT_RESTORE: + err = r.container.Restore(process, r.criuOpts) + case CT_ACT_RUN: + err = r.container.Run(process) + default: + panic("Unknown action") + } + if err != nil { + return -1, err + } + if err = tty.waitConsole(); err != nil { + r.terminate(process) + return -1, err + } + if err = tty.ClosePostStart(); err != nil { + r.terminate(process) + return -1, err + } + if r.pidFile != "" { + if err = createPidFile(r.pidFile, process); err != nil { + r.terminate(process) + return -1, err + } + } + status, err := handler.forward(process, tty, detach) + if err != nil { + r.terminate(process) + } + if detach { + return 0, nil + } + r.destroy() + return status, err +} + +func (r *runner) destroy() { + if r.shouldDestroy { + destroy(r.container) + } +} + +func (r *runner) terminate(p *libcontainer.Process) { + _ = p.Signal(unix.SIGKILL) + _, _ = p.Wait() +} + +func (r *runner) checkTerminal(config *specs.Process) error { + detach := r.detach || (r.action == CT_ACT_CREATE) + // Check command-line for sanity. + if detach && config.Terminal && r.consoleSocket == "" { + return fmt.Errorf("cannot allocate tty if runc will detach without setting console socket") + } + if (!detach || !config.Terminal) && r.consoleSocket != "" { + return fmt.Errorf("cannot use console socket if runc will not detach or allocate tty") + } + return nil +} + +func validateProcessSpec(spec *specs.Process) error { + if spec.Cwd == "" { + return fmt.Errorf("Cwd property must not be empty") + } + if !filepath.IsAbs(spec.Cwd) { + return fmt.Errorf("Cwd must be an absolute path") + } + if len(spec.Args) == 0 { + return fmt.Errorf("args must not be empty") + } + if spec.SelinuxLabel != "" && !selinux.GetEnabled() { + return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported") + } + return nil +} + +type CtAct uint8 + +const ( + CT_ACT_CREATE CtAct = iota + 1 + CT_ACT_RUN + CT_ACT_RESTORE +) + +func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOpts *libcontainer.CriuOpts) (int, error) { + id := context.Args().First() + if id == "" { + return -1, errEmptyID + } + + notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id) + if notifySocket != nil { + notifySocket.setupSpec(context, spec) + } + + container, err := createContainer(context, id, spec) + if err != nil { + return -1, err + } + + if notifySocket != nil { + err := notifySocket.setupSocket() + if err != nil { + return -1, err + } + } + + // Support on-demand socket activation by passing file descriptors into the container init process. + listenFDs := []*os.File{} + if os.Getenv("LISTEN_FDS") != "" { + listenFDs = activation.Files(false) + } + + logLevel := "info" + if context.GlobalBool("debug") { + logLevel = "debug" + } + + r := &runner{ + enableSubreaper: !context.Bool("no-subreaper"), + shouldDestroy: true, + container: container, + listenFDs: listenFDs, + notifySocket: notifySocket, + consoleSocket: context.String("console-socket"), + detach: context.Bool("detach"), + pidFile: context.String("pid-file"), + preserveFDs: context.Int("preserve-fds"), + action: action, + criuOpts: criuOpts, + init: true, + logLevel: logLevel, + } + return r.run(spec.Process) +} diff --git a/vendor.conf b/vendor.conf new file mode 100644 index 0000000..dd51785 --- /dev/null +++ b/vendor.conf @@ -0,0 +1,31 @@ +# OCI runtime-spec. When updating this, make sure you use a version tag rather +# than a commit ID so it's much more obvious what version of the spec we are +# using. +github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4 # v1.0.1-59-g29686db + +# Core libcontainer functionality. +github.com/checkpoint-restore/go-criu 17b0214f6c48980c45dc47ecb0cfd6d9e02df723 # v3.11 +github.com/mrunalp/fileutils 7d4729fb36185a7c1719923406c9d40e54fb93c7 +github.com/opencontainers/selinux 5215b1806f52b1fcc2070a8826c542c9d33cd3cf # v1.3.0 (+ CVE-2019-16884) +github.com/seccomp/libseccomp-golang 689e3c1541a84461afc49c1c87352a6cedf72e9c # v0.9.1 +github.com/sirupsen/logrus 8bdbc7bcc01dcbb8ec23dc8a28e332258d25251f # v1.4.1 +github.com/syndtr/gocapability d98352740cb2c55f81556b63d4a1ec64c5a319c2 +github.com/vishvananda/netlink 1e2e08e8a2dcdacaae3f14ac44c5cfa31361f270 + +# systemd integration. +github.com/coreos/go-systemd 95778dfbb74eb7e4dbaf43bf7d71809650ef8076 # v19 +github.com/godbus/dbus 2ff6f7ffd60f0f2410b3105864bdd12c7894f844 # v5.0.1 +github.com/golang/protobuf 925541529c1fa6821df4e44ce2723319eb2be768 # v1.0.0 + +# Command-line interface. +github.com/cyphar/filepath-securejoin a261ee33d7a517f054effbf451841abaafe3e0fd # v0.2.2 +github.com/docker/go-units 47565b4f722fb6ceae66b95f853feed578a4a51c # v0.3.3 +github.com/urfave/cli cfb38830724cc34fedffe9a2a29fb54fa9169cd1 # v1.20.0 +golang.org/x/sys 9eafafc0a87e0fd0aeeba439a4573537970c44c7 https://github.com/golang/sys + +# console dependencies +github.com/containerd/console 0650fd9eeb50bab4fc99dceb9f2e14cf58f36e7f +github.com/pkg/errors ba968bfe8b2f7e042a574c888954fccecfa385b4 # v0.8.1 + +# ebpf dependencies +github.com/cilium/ebpf 95b36a581eed7b0f127306ed1d16cc0ddc06cf67 diff --git a/vendor/github.com/checkpoint-restore/go-criu/LICENSE b/vendor/github.com/checkpoint-restore/go-criu/LICENSE new file mode 100644 index 0000000..8dada3e --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/checkpoint-restore/go-criu/README.md b/vendor/github.com/checkpoint-restore/go-criu/README.md new file mode 100644 index 0000000..610756d --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/README.md @@ -0,0 +1,58 @@ +[![master](https://travis-ci.org/checkpoint-restore/go-criu.svg?branch=master)](https://travis-ci.org/checkpoint-restore/go-criu) + +## go-criu -- Go bindings for [CRIU](https://criu.org/) + +This repository provides Go bindings for CRIU. The code is based on the Go based PHaul +implementation from the CRIU repository. For easier inclusion into other Go projects the +CRIU Go bindings have been moved to this repository. + +The Go bindings provide an easy way to use the CRIU RPC calls from Go without the need +to set up all the infrastructure to make the actual RPC connection to CRIU. + +The following example would print the version of CRIU: +``` + c := criu.MakeCriu() + version, err := c.GetCriuVersion() + fmt.Println(version) +``` +or to just check if at least a certain CRIU version is installed: +``` + c := criu.MakeCriu() + result, err := c.IsCriuAtLeast(31100) +``` + +## How to contribute + +While bug fixes can first be identified via an "issue", that is not required. +It's ok to just open up a PR with the fix, but make sure you include the same +information you would have included in an issue - like how to reproduce it. + +PRs for new features should include some background on what use cases the +new code is trying to address. When possible and when it makes sense, try to +break-up larger PRs into smaller ones - it's easier to review smaller +code changes. But only if those smaller ones make sense as stand-alone PRs. + +Regardless of the type of PR, all PRs should include: +* well documented code changes +* additional testcases. Ideally, they should fail w/o your code change applied +* documentation changes + +Squash your commits into logical pieces of work that might want to be reviewed +separate from the rest of the PRs. Ideally, each commit should implement a +single idea, and the PR branch should pass the tests at every commit. GitHub +makes it easy to review the cumulative effect of many commits; so, when in +doubt, use smaller commits. + +PRs that fix issues should include a reference like `Closes #XXXX` in the +commit message so that github will automatically close the referenced issue +when the PR is merged. + +Contributors must assert that they are in compliance with the [Developer +Certificate of Origin 1.1](http://developercertificate.org/). This is achieved +by adding a "Signed-off-by" line containing the contributor's name and e-mail +to every commit message. Your signature certifies that you wrote the patch or +otherwise have the right to pass it on as an open-source patch. + +### License + +The license of go-criu is the Apache 2.0 license. diff --git a/vendor/github.com/checkpoint-restore/go-criu/rpc/rpc.pb.go b/vendor/github.com/checkpoint-restore/go-criu/rpc/rpc.pb.go new file mode 100644 index 0000000..230faac --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/rpc/rpc.pb.go @@ -0,0 +1,1211 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: rpc/rpc.proto + +/* +Package rpc is a generated protocol buffer package. + +It is generated from these files: + rpc/rpc.proto + +It has these top-level messages: + CriuPageServerInfo + CriuVethPair + ExtMountMap + JoinNamespace + InheritFd + CgroupRoot + UnixSk + CriuOpts + CriuDumpResp + CriuRestoreResp + CriuNotify + CriuFeatures + CriuReq + CriuResp + CriuVersion +*/ +package rpc + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type CriuCgMode int32 + +const ( + CriuCgMode_IGNORE CriuCgMode = 0 + CriuCgMode_CG_NONE CriuCgMode = 1 + CriuCgMode_PROPS CriuCgMode = 2 + CriuCgMode_SOFT CriuCgMode = 3 + CriuCgMode_FULL CriuCgMode = 4 + CriuCgMode_STRICT CriuCgMode = 5 + CriuCgMode_DEFAULT CriuCgMode = 6 +) + +var CriuCgMode_name = map[int32]string{ + 0: "IGNORE", + 1: "CG_NONE", + 2: "PROPS", + 3: "SOFT", + 4: "FULL", + 5: "STRICT", + 6: "DEFAULT", +} +var CriuCgMode_value = map[string]int32{ + "IGNORE": 0, + "CG_NONE": 1, + "PROPS": 2, + "SOFT": 3, + "FULL": 4, + "STRICT": 5, + "DEFAULT": 6, +} + +func (x CriuCgMode) Enum() *CriuCgMode { + p := new(CriuCgMode) + *p = x + return p +} +func (x CriuCgMode) String() string { + return proto.EnumName(CriuCgMode_name, int32(x)) +} +func (x *CriuCgMode) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(CriuCgMode_value, data, "CriuCgMode") + if err != nil { + return err + } + *x = CriuCgMode(value) + return nil +} +func (CriuCgMode) EnumDescriptor() ([]byte, []int) { return fileDescriptor0, []int{0} } + +type CriuReqType int32 + +const ( + CriuReqType_EMPTY CriuReqType = 0 + CriuReqType_DUMP CriuReqType = 1 + CriuReqType_RESTORE CriuReqType = 2 + CriuReqType_CHECK CriuReqType = 3 + CriuReqType_PRE_DUMP CriuReqType = 4 + CriuReqType_PAGE_SERVER CriuReqType = 5 + CriuReqType_NOTIFY CriuReqType = 6 + CriuReqType_CPUINFO_DUMP CriuReqType = 7 + CriuReqType_CPUINFO_CHECK CriuReqType = 8 + CriuReqType_FEATURE_CHECK CriuReqType = 9 + CriuReqType_VERSION CriuReqType = 10 + CriuReqType_WAIT_PID CriuReqType = 11 + CriuReqType_PAGE_SERVER_CHLD CriuReqType = 12 +) + +var CriuReqType_name = map[int32]string{ + 0: "EMPTY", + 1: "DUMP", + 2: "RESTORE", + 3: "CHECK", + 4: "PRE_DUMP", + 5: "PAGE_SERVER", + 6: "NOTIFY", + 7: "CPUINFO_DUMP", + 8: "CPUINFO_CHECK", + 9: "FEATURE_CHECK", + 10: "VERSION", + 11: "WAIT_PID", + 12: "PAGE_SERVER_CHLD", +} +var CriuReqType_value = map[string]int32{ + "EMPTY": 0, + "DUMP": 1, + "RESTORE": 2, + "CHECK": 3, + "PRE_DUMP": 4, + "PAGE_SERVER": 5, + "NOTIFY": 6, + "CPUINFO_DUMP": 7, + "CPUINFO_CHECK": 8, + "FEATURE_CHECK": 9, + "VERSION": 10, + "WAIT_PID": 11, + "PAGE_SERVER_CHLD": 12, +} + +func (x CriuReqType) Enum() *CriuReqType { + p := new(CriuReqType) + *p = x + return p +} +func (x CriuReqType) String() string { + return proto.EnumName(CriuReqType_name, int32(x)) +} +func (x *CriuReqType) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(CriuReqType_value, data, "CriuReqType") + if err != nil { + return err + } + *x = CriuReqType(value) + return nil +} +func (CriuReqType) EnumDescriptor() ([]byte, []int) { return fileDescriptor0, []int{1} } + +type CriuPageServerInfo struct { + Address *string `protobuf:"bytes,1,opt,name=address" json:"address,omitempty"` + Port *int32 `protobuf:"varint,2,opt,name=port" json:"port,omitempty"` + Pid *int32 `protobuf:"varint,3,opt,name=pid" json:"pid,omitempty"` + Fd *int32 `protobuf:"varint,4,opt,name=fd" json:"fd,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuPageServerInfo) Reset() { *m = CriuPageServerInfo{} } +func (m *CriuPageServerInfo) String() string { return proto.CompactTextString(m) } +func (*CriuPageServerInfo) ProtoMessage() {} +func (*CriuPageServerInfo) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{0} } + +func (m *CriuPageServerInfo) GetAddress() string { + if m != nil && m.Address != nil { + return *m.Address + } + return "" +} + +func (m *CriuPageServerInfo) GetPort() int32 { + if m != nil && m.Port != nil { + return *m.Port + } + return 0 +} + +func (m *CriuPageServerInfo) GetPid() int32 { + if m != nil && m.Pid != nil { + return *m.Pid + } + return 0 +} + +func (m *CriuPageServerInfo) GetFd() int32 { + if m != nil && m.Fd != nil { + return *m.Fd + } + return 0 +} + +type CriuVethPair struct { + IfIn *string `protobuf:"bytes,1,req,name=if_in,json=ifIn" json:"if_in,omitempty"` + IfOut *string `protobuf:"bytes,2,req,name=if_out,json=ifOut" json:"if_out,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuVethPair) Reset() { *m = CriuVethPair{} } +func (m *CriuVethPair) String() string { return proto.CompactTextString(m) } +func (*CriuVethPair) ProtoMessage() {} +func (*CriuVethPair) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{1} } + +func (m *CriuVethPair) GetIfIn() string { + if m != nil && m.IfIn != nil { + return *m.IfIn + } + return "" +} + +func (m *CriuVethPair) GetIfOut() string { + if m != nil && m.IfOut != nil { + return *m.IfOut + } + return "" +} + +type ExtMountMap struct { + Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` + Val *string `protobuf:"bytes,2,req,name=val" json:"val,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *ExtMountMap) Reset() { *m = ExtMountMap{} } +func (m *ExtMountMap) String() string { return proto.CompactTextString(m) } +func (*ExtMountMap) ProtoMessage() {} +func (*ExtMountMap) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{2} } + +func (m *ExtMountMap) GetKey() string { + if m != nil && m.Key != nil { + return *m.Key + } + return "" +} + +func (m *ExtMountMap) GetVal() string { + if m != nil && m.Val != nil { + return *m.Val + } + return "" +} + +type JoinNamespace struct { + Ns *string `protobuf:"bytes,1,req,name=ns" json:"ns,omitempty"` + NsFile *string `protobuf:"bytes,2,req,name=ns_file,json=nsFile" json:"ns_file,omitempty"` + ExtraOpt *string `protobuf:"bytes,3,opt,name=extra_opt,json=extraOpt" json:"extra_opt,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *JoinNamespace) Reset() { *m = JoinNamespace{} } +func (m *JoinNamespace) String() string { return proto.CompactTextString(m) } +func (*JoinNamespace) ProtoMessage() {} +func (*JoinNamespace) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{3} } + +func (m *JoinNamespace) GetNs() string { + if m != nil && m.Ns != nil { + return *m.Ns + } + return "" +} + +func (m *JoinNamespace) GetNsFile() string { + if m != nil && m.NsFile != nil { + return *m.NsFile + } + return "" +} + +func (m *JoinNamespace) GetExtraOpt() string { + if m != nil && m.ExtraOpt != nil { + return *m.ExtraOpt + } + return "" +} + +type InheritFd struct { + Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` + Fd *int32 `protobuf:"varint,2,req,name=fd" json:"fd,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *InheritFd) Reset() { *m = InheritFd{} } +func (m *InheritFd) String() string { return proto.CompactTextString(m) } +func (*InheritFd) ProtoMessage() {} +func (*InheritFd) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{4} } + +func (m *InheritFd) GetKey() string { + if m != nil && m.Key != nil { + return *m.Key + } + return "" +} + +func (m *InheritFd) GetFd() int32 { + if m != nil && m.Fd != nil { + return *m.Fd + } + return 0 +} + +type CgroupRoot struct { + Ctrl *string `protobuf:"bytes,1,opt,name=ctrl" json:"ctrl,omitempty"` + Path *string `protobuf:"bytes,2,req,name=path" json:"path,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CgroupRoot) Reset() { *m = CgroupRoot{} } +func (m *CgroupRoot) String() string { return proto.CompactTextString(m) } +func (*CgroupRoot) ProtoMessage() {} +func (*CgroupRoot) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} } + +func (m *CgroupRoot) GetCtrl() string { + if m != nil && m.Ctrl != nil { + return *m.Ctrl + } + return "" +} + +func (m *CgroupRoot) GetPath() string { + if m != nil && m.Path != nil { + return *m.Path + } + return "" +} + +type UnixSk struct { + Inode *uint32 `protobuf:"varint,1,req,name=inode" json:"inode,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *UnixSk) Reset() { *m = UnixSk{} } +func (m *UnixSk) String() string { return proto.CompactTextString(m) } +func (*UnixSk) ProtoMessage() {} +func (*UnixSk) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} } + +func (m *UnixSk) GetInode() uint32 { + if m != nil && m.Inode != nil { + return *m.Inode + } + return 0 +} + +type CriuOpts struct { + ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd,json=imagesDirFd" json:"images_dir_fd,omitempty"` + Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` + LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running,json=leaveRunning" json:"leave_running,omitempty"` + ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk,json=extUnixSk" json:"ext_unix_sk,omitempty"` + TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established,json=tcpEstablished" json:"tcp_established,omitempty"` + EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices,json=evasiveDevices" json:"evasive_devices,omitempty"` + ShellJob *bool `protobuf:"varint,7,opt,name=shell_job,json=shellJob" json:"shell_job,omitempty"` + FileLocks *bool `protobuf:"varint,8,opt,name=file_locks,json=fileLocks" json:"file_locks,omitempty"` + LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,json=logLevel,def=2" json:"log_level,omitempty"` + LogFile *string `protobuf:"bytes,10,opt,name=log_file,json=logFile" json:"log_file,omitempty"` + Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"` + NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts,json=notifyScripts" json:"notify_scripts,omitempty"` + Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"` + ParentImg *string `protobuf:"bytes,14,opt,name=parent_img,json=parentImg" json:"parent_img,omitempty"` + TrackMem *bool `protobuf:"varint,15,opt,name=track_mem,json=trackMem" json:"track_mem,omitempty"` + AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup,json=autoDedup" json:"auto_dedup,omitempty"` + WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd,json=workDirFd" json:"work_dir_fd,omitempty"` + LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap,json=linkRemap" json:"link_remap,omitempty"` + Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"` + CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,json=cpuCap,def=4294967295" json:"cpu_cap,omitempty"` + ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap,json=forceIrmap" json:"force_irmap,omitempty"` + ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd,json=execCmd" json:"exec_cmd,omitempty"` + ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt,json=extMnt" json:"ext_mnt,omitempty"` + ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups,json=manageCgroups" json:"manage_cgroups,omitempty"` + CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root,json=cgRoot" json:"cg_root,omitempty"` + RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling,json=rstSibling" json:"rst_sibling,omitempty"` + InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd,json=inheritFd" json:"inherit_fd,omitempty"` + AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt,json=autoExtMnt" json:"auto_ext_mnt,omitempty"` + ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing,json=extSharing" json:"ext_sharing,omitempty"` + ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters,json=extMasters" json:"ext_masters,omitempty"` + SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt,json=skipMnt" json:"skip_mnt,omitempty"` + EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs,json=enableFs" json:"enable_fs,omitempty"` + UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino,json=unixSkIno" json:"unix_sk_ino,omitempty"` + ManageCgroupsMode *CriuCgMode `protobuf:"varint,34,opt,name=manage_cgroups_mode,json=manageCgroupsMode,enum=CriuCgMode" json:"manage_cgroups_mode,omitempty"` + GhostLimit *uint32 `protobuf:"varint,35,opt,name=ghost_limit,json=ghostLimit,def=1048576" json:"ghost_limit,omitempty"` + IrmapScanPaths []string `protobuf:"bytes,36,rep,name=irmap_scan_paths,json=irmapScanPaths" json:"irmap_scan_paths,omitempty"` + External []string `protobuf:"bytes,37,rep,name=external" json:"external,omitempty"` + EmptyNs *uint32 `protobuf:"varint,38,opt,name=empty_ns,json=emptyNs" json:"empty_ns,omitempty"` + JoinNs []*JoinNamespace `protobuf:"bytes,39,rep,name=join_ns,json=joinNs" json:"join_ns,omitempty"` + CgroupProps *string `protobuf:"bytes,41,opt,name=cgroup_props,json=cgroupProps" json:"cgroup_props,omitempty"` + CgroupPropsFile *string `protobuf:"bytes,42,opt,name=cgroup_props_file,json=cgroupPropsFile" json:"cgroup_props_file,omitempty"` + CgroupDumpController []string `protobuf:"bytes,43,rep,name=cgroup_dump_controller,json=cgroupDumpController" json:"cgroup_dump_controller,omitempty"` + FreezeCgroup *string `protobuf:"bytes,44,opt,name=freeze_cgroup,json=freezeCgroup" json:"freeze_cgroup,omitempty"` + Timeout *uint32 `protobuf:"varint,45,opt,name=timeout" json:"timeout,omitempty"` + TcpSkipInFlight *bool `protobuf:"varint,46,opt,name=tcp_skip_in_flight,json=tcpSkipInFlight" json:"tcp_skip_in_flight,omitempty"` + WeakSysctls *bool `protobuf:"varint,47,opt,name=weak_sysctls,json=weakSysctls" json:"weak_sysctls,omitempty"` + LazyPages *bool `protobuf:"varint,48,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"` + StatusFd *int32 `protobuf:"varint,49,opt,name=status_fd,json=statusFd" json:"status_fd,omitempty"` + OrphanPtsMaster *bool `protobuf:"varint,50,opt,name=orphan_pts_master,json=orphanPtsMaster" json:"orphan_pts_master,omitempty"` + ConfigFile *string `protobuf:"bytes,51,opt,name=config_file,json=configFile" json:"config_file,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuOpts) Reset() { *m = CriuOpts{} } +func (m *CriuOpts) String() string { return proto.CompactTextString(m) } +func (*CriuOpts) ProtoMessage() {} +func (*CriuOpts) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} } + +const Default_CriuOpts_LogLevel int32 = 2 +const Default_CriuOpts_CpuCap uint32 = 4294967295 +const Default_CriuOpts_GhostLimit uint32 = 1048576 + +func (m *CriuOpts) GetImagesDirFd() int32 { + if m != nil && m.ImagesDirFd != nil { + return *m.ImagesDirFd + } + return 0 +} + +func (m *CriuOpts) GetPid() int32 { + if m != nil && m.Pid != nil { + return *m.Pid + } + return 0 +} + +func (m *CriuOpts) GetLeaveRunning() bool { + if m != nil && m.LeaveRunning != nil { + return *m.LeaveRunning + } + return false +} + +func (m *CriuOpts) GetExtUnixSk() bool { + if m != nil && m.ExtUnixSk != nil { + return *m.ExtUnixSk + } + return false +} + +func (m *CriuOpts) GetTcpEstablished() bool { + if m != nil && m.TcpEstablished != nil { + return *m.TcpEstablished + } + return false +} + +func (m *CriuOpts) GetEvasiveDevices() bool { + if m != nil && m.EvasiveDevices != nil { + return *m.EvasiveDevices + } + return false +} + +func (m *CriuOpts) GetShellJob() bool { + if m != nil && m.ShellJob != nil { + return *m.ShellJob + } + return false +} + +func (m *CriuOpts) GetFileLocks() bool { + if m != nil && m.FileLocks != nil { + return *m.FileLocks + } + return false +} + +func (m *CriuOpts) GetLogLevel() int32 { + if m != nil && m.LogLevel != nil { + return *m.LogLevel + } + return Default_CriuOpts_LogLevel +} + +func (m *CriuOpts) GetLogFile() string { + if m != nil && m.LogFile != nil { + return *m.LogFile + } + return "" +} + +func (m *CriuOpts) GetPs() *CriuPageServerInfo { + if m != nil { + return m.Ps + } + return nil +} + +func (m *CriuOpts) GetNotifyScripts() bool { + if m != nil && m.NotifyScripts != nil { + return *m.NotifyScripts + } + return false +} + +func (m *CriuOpts) GetRoot() string { + if m != nil && m.Root != nil { + return *m.Root + } + return "" +} + +func (m *CriuOpts) GetParentImg() string { + if m != nil && m.ParentImg != nil { + return *m.ParentImg + } + return "" +} + +func (m *CriuOpts) GetTrackMem() bool { + if m != nil && m.TrackMem != nil { + return *m.TrackMem + } + return false +} + +func (m *CriuOpts) GetAutoDedup() bool { + if m != nil && m.AutoDedup != nil { + return *m.AutoDedup + } + return false +} + +func (m *CriuOpts) GetWorkDirFd() int32 { + if m != nil && m.WorkDirFd != nil { + return *m.WorkDirFd + } + return 0 +} + +func (m *CriuOpts) GetLinkRemap() bool { + if m != nil && m.LinkRemap != nil { + return *m.LinkRemap + } + return false +} + +func (m *CriuOpts) GetVeths() []*CriuVethPair { + if m != nil { + return m.Veths + } + return nil +} + +func (m *CriuOpts) GetCpuCap() uint32 { + if m != nil && m.CpuCap != nil { + return *m.CpuCap + } + return Default_CriuOpts_CpuCap +} + +func (m *CriuOpts) GetForceIrmap() bool { + if m != nil && m.ForceIrmap != nil { + return *m.ForceIrmap + } + return false +} + +func (m *CriuOpts) GetExecCmd() []string { + if m != nil { + return m.ExecCmd + } + return nil +} + +func (m *CriuOpts) GetExtMnt() []*ExtMountMap { + if m != nil { + return m.ExtMnt + } + return nil +} + +func (m *CriuOpts) GetManageCgroups() bool { + if m != nil && m.ManageCgroups != nil { + return *m.ManageCgroups + } + return false +} + +func (m *CriuOpts) GetCgRoot() []*CgroupRoot { + if m != nil { + return m.CgRoot + } + return nil +} + +func (m *CriuOpts) GetRstSibling() bool { + if m != nil && m.RstSibling != nil { + return *m.RstSibling + } + return false +} + +func (m *CriuOpts) GetInheritFd() []*InheritFd { + if m != nil { + return m.InheritFd + } + return nil +} + +func (m *CriuOpts) GetAutoExtMnt() bool { + if m != nil && m.AutoExtMnt != nil { + return *m.AutoExtMnt + } + return false +} + +func (m *CriuOpts) GetExtSharing() bool { + if m != nil && m.ExtSharing != nil { + return *m.ExtSharing + } + return false +} + +func (m *CriuOpts) GetExtMasters() bool { + if m != nil && m.ExtMasters != nil { + return *m.ExtMasters + } + return false +} + +func (m *CriuOpts) GetSkipMnt() []string { + if m != nil { + return m.SkipMnt + } + return nil +} + +func (m *CriuOpts) GetEnableFs() []string { + if m != nil { + return m.EnableFs + } + return nil +} + +func (m *CriuOpts) GetUnixSkIno() []*UnixSk { + if m != nil { + return m.UnixSkIno + } + return nil +} + +func (m *CriuOpts) GetManageCgroupsMode() CriuCgMode { + if m != nil && m.ManageCgroupsMode != nil { + return *m.ManageCgroupsMode + } + return CriuCgMode_IGNORE +} + +func (m *CriuOpts) GetGhostLimit() uint32 { + if m != nil && m.GhostLimit != nil { + return *m.GhostLimit + } + return Default_CriuOpts_GhostLimit +} + +func (m *CriuOpts) GetIrmapScanPaths() []string { + if m != nil { + return m.IrmapScanPaths + } + return nil +} + +func (m *CriuOpts) GetExternal() []string { + if m != nil { + return m.External + } + return nil +} + +func (m *CriuOpts) GetEmptyNs() uint32 { + if m != nil && m.EmptyNs != nil { + return *m.EmptyNs + } + return 0 +} + +func (m *CriuOpts) GetJoinNs() []*JoinNamespace { + if m != nil { + return m.JoinNs + } + return nil +} + +func (m *CriuOpts) GetCgroupProps() string { + if m != nil && m.CgroupProps != nil { + return *m.CgroupProps + } + return "" +} + +func (m *CriuOpts) GetCgroupPropsFile() string { + if m != nil && m.CgroupPropsFile != nil { + return *m.CgroupPropsFile + } + return "" +} + +func (m *CriuOpts) GetCgroupDumpController() []string { + if m != nil { + return m.CgroupDumpController + } + return nil +} + +func (m *CriuOpts) GetFreezeCgroup() string { + if m != nil && m.FreezeCgroup != nil { + return *m.FreezeCgroup + } + return "" +} + +func (m *CriuOpts) GetTimeout() uint32 { + if m != nil && m.Timeout != nil { + return *m.Timeout + } + return 0 +} + +func (m *CriuOpts) GetTcpSkipInFlight() bool { + if m != nil && m.TcpSkipInFlight != nil { + return *m.TcpSkipInFlight + } + return false +} + +func (m *CriuOpts) GetWeakSysctls() bool { + if m != nil && m.WeakSysctls != nil { + return *m.WeakSysctls + } + return false +} + +func (m *CriuOpts) GetLazyPages() bool { + if m != nil && m.LazyPages != nil { + return *m.LazyPages + } + return false +} + +func (m *CriuOpts) GetStatusFd() int32 { + if m != nil && m.StatusFd != nil { + return *m.StatusFd + } + return 0 +} + +func (m *CriuOpts) GetOrphanPtsMaster() bool { + if m != nil && m.OrphanPtsMaster != nil { + return *m.OrphanPtsMaster + } + return false +} + +func (m *CriuOpts) GetConfigFile() string { + if m != nil && m.ConfigFile != nil { + return *m.ConfigFile + } + return "" +} + +type CriuDumpResp struct { + Restored *bool `protobuf:"varint,1,opt,name=restored" json:"restored,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuDumpResp) Reset() { *m = CriuDumpResp{} } +func (m *CriuDumpResp) String() string { return proto.CompactTextString(m) } +func (*CriuDumpResp) ProtoMessage() {} +func (*CriuDumpResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{8} } + +func (m *CriuDumpResp) GetRestored() bool { + if m != nil && m.Restored != nil { + return *m.Restored + } + return false +} + +type CriuRestoreResp struct { + Pid *int32 `protobuf:"varint,1,req,name=pid" json:"pid,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuRestoreResp) Reset() { *m = CriuRestoreResp{} } +func (m *CriuRestoreResp) String() string { return proto.CompactTextString(m) } +func (*CriuRestoreResp) ProtoMessage() {} +func (*CriuRestoreResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} } + +func (m *CriuRestoreResp) GetPid() int32 { + if m != nil && m.Pid != nil { + return *m.Pid + } + return 0 +} + +type CriuNotify struct { + Script *string `protobuf:"bytes,1,opt,name=script" json:"script,omitempty"` + Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuNotify) Reset() { *m = CriuNotify{} } +func (m *CriuNotify) String() string { return proto.CompactTextString(m) } +func (*CriuNotify) ProtoMessage() {} +func (*CriuNotify) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{10} } + +func (m *CriuNotify) GetScript() string { + if m != nil && m.Script != nil { + return *m.Script + } + return "" +} + +func (m *CriuNotify) GetPid() int32 { + if m != nil && m.Pid != nil { + return *m.Pid + } + return 0 +} + +// +// List of features which can queried via +// CRIU_REQ_TYPE__FEATURE_CHECK +type CriuFeatures struct { + MemTrack *bool `protobuf:"varint,1,opt,name=mem_track,json=memTrack" json:"mem_track,omitempty"` + LazyPages *bool `protobuf:"varint,2,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuFeatures) Reset() { *m = CriuFeatures{} } +func (m *CriuFeatures) String() string { return proto.CompactTextString(m) } +func (*CriuFeatures) ProtoMessage() {} +func (*CriuFeatures) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{11} } + +func (m *CriuFeatures) GetMemTrack() bool { + if m != nil && m.MemTrack != nil { + return *m.MemTrack + } + return false +} + +func (m *CriuFeatures) GetLazyPages() bool { + if m != nil && m.LazyPages != nil { + return *m.LazyPages + } + return false +} + +type CriuReq struct { + Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"` + Opts *CriuOpts `protobuf:"bytes,2,opt,name=opts" json:"opts,omitempty"` + NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success,json=notifySuccess" json:"notify_success,omitempty"` + // + // When set service won't close the connection but + // will wait for more req-s to appear. Works not + // for all request types. + KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open,json=keepOpen" json:"keep_open,omitempty"` + // + // 'features' can be used to query which features + // are supported by the installed criu/kernel + // via RPC. + Features *CriuFeatures `protobuf:"bytes,5,opt,name=features" json:"features,omitempty"` + // 'pid' is used for WAIT_PID + Pid *uint32 `protobuf:"varint,6,opt,name=pid" json:"pid,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuReq) Reset() { *m = CriuReq{} } +func (m *CriuReq) String() string { return proto.CompactTextString(m) } +func (*CriuReq) ProtoMessage() {} +func (*CriuReq) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{12} } + +func (m *CriuReq) GetType() CriuReqType { + if m != nil && m.Type != nil { + return *m.Type + } + return CriuReqType_EMPTY +} + +func (m *CriuReq) GetOpts() *CriuOpts { + if m != nil { + return m.Opts + } + return nil +} + +func (m *CriuReq) GetNotifySuccess() bool { + if m != nil && m.NotifySuccess != nil { + return *m.NotifySuccess + } + return false +} + +func (m *CriuReq) GetKeepOpen() bool { + if m != nil && m.KeepOpen != nil { + return *m.KeepOpen + } + return false +} + +func (m *CriuReq) GetFeatures() *CriuFeatures { + if m != nil { + return m.Features + } + return nil +} + +func (m *CriuReq) GetPid() uint32 { + if m != nil && m.Pid != nil { + return *m.Pid + } + return 0 +} + +type CriuResp struct { + Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"` + Success *bool `protobuf:"varint,2,req,name=success" json:"success,omitempty"` + Dump *CriuDumpResp `protobuf:"bytes,3,opt,name=dump" json:"dump,omitempty"` + Restore *CriuRestoreResp `protobuf:"bytes,4,opt,name=restore" json:"restore,omitempty"` + Notify *CriuNotify `protobuf:"bytes,5,opt,name=notify" json:"notify,omitempty"` + Ps *CriuPageServerInfo `protobuf:"bytes,6,opt,name=ps" json:"ps,omitempty"` + CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno,json=crErrno" json:"cr_errno,omitempty"` + Features *CriuFeatures `protobuf:"bytes,8,opt,name=features" json:"features,omitempty"` + CrErrmsg *string `protobuf:"bytes,9,opt,name=cr_errmsg,json=crErrmsg" json:"cr_errmsg,omitempty"` + Version *CriuVersion `protobuf:"bytes,10,opt,name=version" json:"version,omitempty"` + Status *int32 `protobuf:"varint,11,opt,name=status" json:"status,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuResp) Reset() { *m = CriuResp{} } +func (m *CriuResp) String() string { return proto.CompactTextString(m) } +func (*CriuResp) ProtoMessage() {} +func (*CriuResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{13} } + +func (m *CriuResp) GetType() CriuReqType { + if m != nil && m.Type != nil { + return *m.Type + } + return CriuReqType_EMPTY +} + +func (m *CriuResp) GetSuccess() bool { + if m != nil && m.Success != nil { + return *m.Success + } + return false +} + +func (m *CriuResp) GetDump() *CriuDumpResp { + if m != nil { + return m.Dump + } + return nil +} + +func (m *CriuResp) GetRestore() *CriuRestoreResp { + if m != nil { + return m.Restore + } + return nil +} + +func (m *CriuResp) GetNotify() *CriuNotify { + if m != nil { + return m.Notify + } + return nil +} + +func (m *CriuResp) GetPs() *CriuPageServerInfo { + if m != nil { + return m.Ps + } + return nil +} + +func (m *CriuResp) GetCrErrno() int32 { + if m != nil && m.CrErrno != nil { + return *m.CrErrno + } + return 0 +} + +func (m *CriuResp) GetFeatures() *CriuFeatures { + if m != nil { + return m.Features + } + return nil +} + +func (m *CriuResp) GetCrErrmsg() string { + if m != nil && m.CrErrmsg != nil { + return *m.CrErrmsg + } + return "" +} + +func (m *CriuResp) GetVersion() *CriuVersion { + if m != nil { + return m.Version + } + return nil +} + +func (m *CriuResp) GetStatus() int32 { + if m != nil && m.Status != nil { + return *m.Status + } + return 0 +} + +// Answer for criu_req_type.VERSION requests +type CriuVersion struct { + Major *int32 `protobuf:"varint,1,req,name=major" json:"major,omitempty"` + Minor *int32 `protobuf:"varint,2,req,name=minor" json:"minor,omitempty"` + Gitid *string `protobuf:"bytes,3,opt,name=gitid" json:"gitid,omitempty"` + Sublevel *int32 `protobuf:"varint,4,opt,name=sublevel" json:"sublevel,omitempty"` + Extra *int32 `protobuf:"varint,5,opt,name=extra" json:"extra,omitempty"` + Name *string `protobuf:"bytes,6,opt,name=name" json:"name,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *CriuVersion) Reset() { *m = CriuVersion{} } +func (m *CriuVersion) String() string { return proto.CompactTextString(m) } +func (*CriuVersion) ProtoMessage() {} +func (*CriuVersion) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{14} } + +func (m *CriuVersion) GetMajor() int32 { + if m != nil && m.Major != nil { + return *m.Major + } + return 0 +} + +func (m *CriuVersion) GetMinor() int32 { + if m != nil && m.Minor != nil { + return *m.Minor + } + return 0 +} + +func (m *CriuVersion) GetGitid() string { + if m != nil && m.Gitid != nil { + return *m.Gitid + } + return "" +} + +func (m *CriuVersion) GetSublevel() int32 { + if m != nil && m.Sublevel != nil { + return *m.Sublevel + } + return 0 +} + +func (m *CriuVersion) GetExtra() int32 { + if m != nil && m.Extra != nil { + return *m.Extra + } + return 0 +} + +func (m *CriuVersion) GetName() string { + if m != nil && m.Name != nil { + return *m.Name + } + return "" +} + +func init() { + proto.RegisterType((*CriuPageServerInfo)(nil), "criu_page_server_info") + proto.RegisterType((*CriuVethPair)(nil), "criu_veth_pair") + proto.RegisterType((*ExtMountMap)(nil), "ext_mount_map") + proto.RegisterType((*JoinNamespace)(nil), "join_namespace") + proto.RegisterType((*InheritFd)(nil), "inherit_fd") + proto.RegisterType((*CgroupRoot)(nil), "cgroup_root") + proto.RegisterType((*UnixSk)(nil), "unix_sk") + proto.RegisterType((*CriuOpts)(nil), "criu_opts") + proto.RegisterType((*CriuDumpResp)(nil), "criu_dump_resp") + proto.RegisterType((*CriuRestoreResp)(nil), "criu_restore_resp") + proto.RegisterType((*CriuNotify)(nil), "criu_notify") + proto.RegisterType((*CriuFeatures)(nil), "criu_features") + proto.RegisterType((*CriuReq)(nil), "criu_req") + proto.RegisterType((*CriuResp)(nil), "criu_resp") + proto.RegisterType((*CriuVersion)(nil), "criu_version") + proto.RegisterEnum("CriuCgMode", CriuCgMode_name, CriuCgMode_value) + proto.RegisterEnum("CriuReqType", CriuReqType_name, CriuReqType_value) +} + +func init() { proto.RegisterFile("rpc/rpc.proto", fileDescriptor0) } + +var fileDescriptor0 = []byte{ + // 1835 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x56, 0xeb, 0x72, 0x5b, 0xb7, + 0x11, 0x0e, 0x29, 0xf1, 0x06, 0x5e, 0x7c, 0x0c, 0x5f, 0x02, 0xc7, 0xb5, 0xad, 0xd0, 0x51, 0xa2, + 0x2a, 0x2e, 0x93, 0x30, 0x76, 0x5c, 0x67, 0xda, 0x1f, 0x1e, 0x8a, 0x74, 0xd8, 0x48, 0x22, 0x07, + 0xa4, 0xdc, 0xc9, 0x2f, 0xcc, 0xd1, 0x39, 0x20, 0x05, 0xf3, 0xdc, 0x0a, 0x80, 0x8a, 0xe4, 0x97, + 0xe8, 0xbf, 0x3e, 0x57, 0xde, 0xa4, 0xaf, 0xd0, 0xd9, 0x05, 0x28, 0x4b, 0x49, 0x66, 0xd2, 0x7f, + 0xd8, 0x0f, 0xbb, 0xc0, 0xde, 0x77, 0x49, 0x5b, 0x17, 0xd1, 0x57, 0xba, 0x88, 0x7a, 0x85, 0xce, + 0x6d, 0xde, 0x5d, 0x92, 0x7b, 0x91, 0x56, 0x6b, 0x51, 0x84, 0x4b, 0x29, 0x8c, 0xd4, 0xe7, 0x52, + 0x0b, 0x95, 0x2d, 0x72, 0xca, 0x48, 0x2d, 0x8c, 0x63, 0x2d, 0x8d, 0x61, 0xa5, 0x9d, 0xd2, 0x5e, + 0x83, 0x6f, 0x48, 0x4a, 0xc9, 0x76, 0x91, 0x6b, 0xcb, 0xca, 0x3b, 0xa5, 0xbd, 0x0a, 0xc7, 0x33, + 0x0d, 0xc8, 0x56, 0xa1, 0x62, 0xb6, 0x85, 0x10, 0x1c, 0x69, 0x87, 0x94, 0x17, 0x31, 0xdb, 0x46, + 0xa0, 0xbc, 0x88, 0xbb, 0x7f, 0x23, 0x1d, 0xfc, 0xe8, 0x5c, 0xda, 0x33, 0x51, 0x84, 0x4a, 0xd3, + 0x3b, 0xa4, 0xa2, 0x16, 0x42, 0x65, 0xac, 0xb4, 0x53, 0xde, 0x6b, 0xf0, 0x6d, 0xb5, 0x18, 0x67, + 0xf4, 0x1e, 0xa9, 0xaa, 0x85, 0xc8, 0xd7, 0xf0, 0x3c, 0xa0, 0x15, 0xb5, 0x98, 0xac, 0x6d, 0xf7, + 0x5b, 0xd2, 0x96, 0x17, 0x56, 0xa4, 0xf9, 0x3a, 0xb3, 0x22, 0x0d, 0x0b, 0xf8, 0x70, 0x25, 0x2f, + 0xbd, 0x28, 0x1c, 0x01, 0x39, 0x0f, 0x13, 0x2f, 0x06, 0xc7, 0xee, 0x5b, 0xd2, 0x79, 0x97, 0xab, + 0x4c, 0x64, 0x61, 0x2a, 0x4d, 0x11, 0x46, 0x12, 0x94, 0xca, 0x8c, 0x17, 0x2a, 0x67, 0x86, 0x7e, + 0x4c, 0x6a, 0x99, 0x11, 0x0b, 0x95, 0x48, 0x2f, 0x57, 0xcd, 0xcc, 0x48, 0x25, 0x92, 0x3e, 0x24, + 0x0d, 0x79, 0x61, 0x75, 0x28, 0xf2, 0xc2, 0xa2, 0x55, 0x0d, 0x5e, 0x47, 0x60, 0x52, 0xd8, 0x6e, + 0x8f, 0x10, 0x95, 0x9d, 0x49, 0xad, 0xac, 0x58, 0xc4, 0xbf, 0xa3, 0x89, 0x33, 0x1d, 0x1e, 0x74, + 0xa6, 0xbf, 0x20, 0xcd, 0x68, 0xa9, 0xf3, 0x75, 0x21, 0x74, 0x9e, 0x5b, 0xf0, 0x5f, 0x64, 0x75, + 0xe2, 0xdd, 0x8a, 0x67, 0xf4, 0x69, 0x68, 0xcf, 0xbc, 0x16, 0x78, 0xee, 0x3e, 0x21, 0xb5, 0x75, + 0xa6, 0x2e, 0x84, 0x59, 0xd1, 0xbb, 0xa4, 0xa2, 0xb2, 0x3c, 0x96, 0xf8, 0x4b, 0x9b, 0x3b, 0xa2, + 0xfb, 0xdf, 0x36, 0x69, 0xa0, 0x4f, 0xf3, 0xc2, 0x1a, 0xda, 0x25, 0x6d, 0x95, 0x86, 0x4b, 0x69, + 0x44, 0xac, 0xb4, 0x58, 0xc4, 0xc8, 0x5b, 0xe1, 0x4d, 0x07, 0x1e, 0x28, 0x3d, 0x8a, 0x37, 0x61, + 0x2a, 0x7f, 0x08, 0xd3, 0x53, 0xd2, 0x4e, 0x64, 0x78, 0x2e, 0x85, 0x5e, 0x67, 0x99, 0xca, 0x96, + 0x68, 0x6c, 0x9d, 0xb7, 0x10, 0xe4, 0x0e, 0xa3, 0x8f, 0x49, 0x13, 0xbc, 0xef, 0xb5, 0xc1, 0xa0, + 0xd6, 0x39, 0x38, 0xe8, 0x24, 0x53, 0x17, 0xb3, 0x15, 0xfd, 0x82, 0xdc, 0xb2, 0x51, 0x21, 0xa4, + 0xb1, 0xe1, 0x69, 0xa2, 0xcc, 0x99, 0x8c, 0x59, 0x05, 0x79, 0x3a, 0x36, 0x2a, 0x86, 0x1f, 0x50, + 0x60, 0x94, 0xe7, 0xa1, 0x51, 0xe7, 0x52, 0xc4, 0xf2, 0x5c, 0x45, 0xd2, 0xb0, 0xaa, 0x63, 0xf4, + 0xf0, 0x81, 0x43, 0xc1, 0xff, 0xe6, 0x4c, 0x26, 0x89, 0x78, 0x97, 0x9f, 0xb2, 0x1a, 0xb2, 0xd4, + 0x11, 0xf8, 0x47, 0x7e, 0x4a, 0x1f, 0x11, 0x02, 0x21, 0x13, 0x49, 0x1e, 0xad, 0x0c, 0xab, 0x3b, + 0x6d, 0x00, 0x39, 0x04, 0x80, 0x3e, 0x26, 0x8d, 0x24, 0x5f, 0x8a, 0x44, 0x9e, 0xcb, 0x84, 0x35, + 0xc0, 0xd4, 0xef, 0x4b, 0x7d, 0x5e, 0x4f, 0xf2, 0xe5, 0x21, 0x40, 0xf4, 0x01, 0x81, 0xb3, 0x8b, + 0x3a, 0x71, 0xa9, 0x9d, 0xe4, 0x4b, 0x0c, 0xfb, 0xe7, 0xa4, 0x5c, 0x18, 0xd6, 0xdc, 0x29, 0xed, + 0x35, 0xfb, 0xf7, 0x7b, 0xbf, 0x5b, 0x18, 0xbc, 0x5c, 0x18, 0xba, 0x4b, 0x3a, 0x59, 0x6e, 0xd5, + 0xe2, 0x52, 0x98, 0x48, 0xab, 0xc2, 0x1a, 0xd6, 0x42, 0x2d, 0xda, 0x0e, 0x9d, 0x39, 0x10, 0xa2, + 0x0a, 0x11, 0x67, 0x6d, 0x17, 0x69, 0x8c, 0xfe, 0x23, 0x42, 0x8a, 0x50, 0xcb, 0xcc, 0x0a, 0x95, + 0x2e, 0x59, 0x07, 0x6f, 0x1a, 0x0e, 0x19, 0xa7, 0x4b, 0x30, 0xdc, 0xea, 0x30, 0x5a, 0x89, 0x54, + 0xa6, 0xec, 0x96, 0x33, 0x1c, 0x81, 0x23, 0x99, 0x82, 0x6c, 0xb8, 0xb6, 0xb9, 0x88, 0x65, 0xbc, + 0x2e, 0x58, 0xe0, 0x0c, 0x07, 0xe4, 0x00, 0x00, 0x08, 0xd3, 0xcf, 0xb9, 0x5e, 0x6d, 0xe2, 0x7f, + 0x1b, 0xa3, 0xdc, 0x00, 0xc8, 0x45, 0xff, 0x11, 0x21, 0x89, 0xca, 0x56, 0x42, 0xcb, 0x34, 0x2c, + 0x18, 0x75, 0xe2, 0x80, 0x70, 0x00, 0xe8, 0x2e, 0xa9, 0x40, 0x71, 0x1a, 0x76, 0x67, 0x67, 0x6b, + 0xaf, 0xd9, 0xbf, 0xd5, 0xbb, 0x59, 0xaf, 0xdc, 0xdd, 0xd2, 0xa7, 0xa4, 0x16, 0x15, 0x6b, 0x11, + 0x85, 0x05, 0xbb, 0xbb, 0x53, 0xda, 0x6b, 0x7f, 0x4f, 0x9e, 0xf7, 0x5f, 0x3d, 0x7f, 0xf5, 0xdd, + 0xcb, 0xfe, 0xab, 0x17, 0xbc, 0x1a, 0x15, 0xeb, 0x41, 0x58, 0xd0, 0x27, 0xa4, 0xb9, 0xc8, 0x75, + 0x24, 0x85, 0xd2, 0xf0, 0xd7, 0x3d, 0xfc, 0x8b, 0x20, 0x34, 0x06, 0x04, 0x82, 0x20, 0x2f, 0x64, + 0x24, 0xa2, 0x34, 0x66, 0xf7, 0x77, 0xb6, 0x20, 0x08, 0x40, 0x0f, 0x52, 0x48, 0x92, 0x1a, 0xd6, + 0x7a, 0x66, 0xd9, 0xc7, 0xa8, 0x49, 0xa7, 0x77, 0xa3, 0xf6, 0x79, 0x55, 0x5e, 0xd8, 0xa3, 0xcc, + 0x42, 0x14, 0xd2, 0x30, 0x83, 0xf8, 0xb8, 0xf2, 0x32, 0x8c, 0xb9, 0x28, 0x38, 0x74, 0xe0, 0x40, + 0xba, 0x4b, 0x6a, 0xd1, 0x12, 0x4b, 0x8f, 0x3d, 0xc0, 0xf7, 0x5a, 0xbd, 0x6b, 0xe5, 0xc8, 0xab, + 0xd1, 0x92, 0x43, 0x60, 0x9e, 0x90, 0xa6, 0x36, 0x56, 0x18, 0x75, 0x9a, 0x40, 0x1d, 0x7c, 0xe2, + 0x54, 0xd6, 0xc6, 0xce, 0x1c, 0x42, 0xf7, 0xaf, 0x97, 0x3d, 0x7b, 0x88, 0x4f, 0x35, 0x7b, 0x1f, + 0x20, 0xde, 0xf0, 0xe7, 0x51, 0x4c, 0x77, 0x48, 0x0b, 0x23, 0xb5, 0x31, 0xe4, 0x4f, 0xee, 0x35, + 0xc0, 0x86, 0x4e, 0xf9, 0x27, 0xae, 0xa6, 0xcc, 0x59, 0xa8, 0xe1, 0xbb, 0x47, 0x8e, 0x41, 0x5e, + 0xd8, 0x99, 0x43, 0x36, 0x0c, 0x69, 0x68, 0xac, 0xd4, 0x86, 0x3d, 0xbe, 0x62, 0x38, 0x72, 0x08, + 0xb8, 0xd0, 0xac, 0x54, 0x81, 0xef, 0x3f, 0x71, 0x2e, 0x04, 0x1a, 0x1e, 0x87, 0xf6, 0x95, 0x85, + 0xa7, 0x89, 0x14, 0x0b, 0xc3, 0x76, 0xf0, 0xae, 0xee, 0x80, 0x91, 0xa1, 0x7b, 0xa4, 0xe9, 0x2b, + 0x59, 0xa8, 0x2c, 0x67, 0x9f, 0xa2, 0x21, 0xf5, 0x9e, 0xc7, 0x78, 0x63, 0x8d, 0x45, 0x3d, 0xce, + 0x72, 0xfa, 0x77, 0x72, 0xe7, 0xa6, 0x83, 0x45, 0x0a, 0x4d, 0xa8, 0xbb, 0x53, 0xda, 0xeb, 0xf4, + 0xdb, 0x2e, 0x3f, 0xa2, 0x25, 0x82, 0xfc, 0xf6, 0x0d, 0xa7, 0x1f, 0xe5, 0xb1, 0x84, 0x8f, 0x96, + 0x67, 0xb9, 0xb1, 0x22, 0x51, 0xa9, 0xb2, 0xec, 0x29, 0x66, 0x4b, 0xed, 0x9b, 0xaf, 0x9f, 0xff, + 0xf5, 0xc5, 0xcb, 0xef, 0x38, 0xc1, 0xbb, 0x43, 0xb8, 0xa2, 0x7b, 0x24, 0xc0, 0x44, 0x11, 0x26, + 0x0a, 0x33, 0x01, 0xdd, 0xcf, 0xb0, 0xcf, 0x50, 0xed, 0x0e, 0xe2, 0xb3, 0x28, 0xcc, 0xa6, 0x80, + 0xd2, 0x4f, 0x20, 0x6f, 0xac, 0xd4, 0x59, 0x98, 0xb0, 0x5d, 0x6f, 0x98, 0xa7, 0x31, 0xa7, 0xd2, + 0xc2, 0x5e, 0x8a, 0xcc, 0xb0, 0xcf, 0xe1, 0x33, 0x5e, 0x43, 0xfa, 0x18, 0x6c, 0xae, 0xb9, 0x51, + 0x60, 0xd8, 0x17, 0x3e, 0xbb, 0x6f, 0x8e, 0x06, 0x5e, 0x05, 0xfa, 0xd8, 0xd0, 0x4f, 0x49, 0xcb, + 0x67, 0x47, 0xa1, 0xf3, 0xc2, 0xb0, 0x3f, 0x63, 0x85, 0xfa, 0x06, 0x3e, 0x05, 0x88, 0xee, 0x93, + 0xdb, 0xd7, 0x59, 0x5c, 0x27, 0xd9, 0x47, 0xbe, 0x5b, 0xd7, 0xf8, 0xb0, 0xa3, 0x3c, 0x27, 0xf7, + 0x3d, 0x6f, 0xbc, 0x4e, 0x0b, 0x11, 0xe5, 0x99, 0xd5, 0x79, 0x92, 0x48, 0xcd, 0xbe, 0x44, 0xed, + 0xef, 0xba, 0xdb, 0x83, 0x75, 0x5a, 0x0c, 0xae, 0xee, 0xa0, 0x2b, 0x2f, 0xb4, 0x94, 0xef, 0x37, + 0x8e, 0x67, 0xcf, 0xf0, 0xf5, 0x96, 0x03, 0x9d, 0x8f, 0x61, 0x42, 0x5b, 0x95, 0x4a, 0x98, 0x95, + 0x7f, 0x71, 0xd6, 0x7a, 0x92, 0x7e, 0x49, 0x28, 0xf4, 0x63, 0xcc, 0x0e, 0x95, 0x89, 0x45, 0xa2, + 0x96, 0x67, 0x96, 0xf5, 0x30, 0x83, 0xa0, 0x53, 0xcf, 0x56, 0xaa, 0x18, 0x67, 0x23, 0x84, 0xc1, + 0xe0, 0x9f, 0x65, 0xb8, 0x12, 0xe6, 0xd2, 0x44, 0x36, 0x31, 0xec, 0x2b, 0x64, 0x6b, 0x02, 0x36, + 0x73, 0x10, 0x36, 0x8e, 0xf0, 0xfd, 0x25, 0xf6, 0x42, 0xc3, 0xbe, 0xf6, 0x8d, 0x23, 0x7c, 0x7f, + 0x39, 0x05, 0x00, 0x9b, 0xb5, 0x0d, 0xed, 0xda, 0x40, 0x5d, 0x7c, 0x83, 0x5d, 0xa7, 0xee, 0x80, + 0x51, 0x0c, 0xce, 0xca, 0x75, 0x71, 0x06, 0x61, 0xb5, 0xc6, 0x67, 0x33, 0xeb, 0x3b, 0x55, 0xdc, + 0xc5, 0xd4, 0x1a, 0x97, 0xd2, 0x90, 0xf2, 0x51, 0x9e, 0x2d, 0x94, 0x6f, 0xce, 0xdf, 0xa2, 0xd1, + 0xc4, 0x41, 0xe0, 0xcd, 0xee, 0x33, 0xbf, 0x44, 0xa0, 0x2f, 0xb5, 0x34, 0x05, 0xe4, 0x83, 0x96, + 0xc6, 0xe6, 0x5a, 0xc6, 0x38, 0x50, 0xeb, 0xfc, 0x8a, 0xee, 0xee, 0x92, 0xdb, 0xc8, 0xed, 0x01, + 0x27, 0xe0, 0x47, 0xa0, 0x1b, 0x8e, 0x70, 0xec, 0xbe, 0x24, 0x4d, 0x64, 0x73, 0xbd, 0x9b, 0xde, + 0x27, 0x55, 0xd7, 0xd4, 0xfd, 0x80, 0xf6, 0xd4, 0x6f, 0x67, 0x67, 0xf7, 0x47, 0xd2, 0x46, 0xc1, + 0x85, 0x0c, 0xed, 0x5a, 0x3b, 0x47, 0xa4, 0x32, 0x15, 0xd8, 0xaf, 0x37, 0xda, 0xa4, 0x32, 0x9d, + 0x03, 0xfd, 0x2b, 0x27, 0x96, 0x7f, 0xe5, 0xc4, 0xee, 0x2f, 0x25, 0x52, 0xf7, 0xda, 0xfe, 0x8b, + 0x76, 0xc9, 0xb6, 0xbd, 0x2c, 0xdc, 0xb8, 0xef, 0xf4, 0x3b, 0xbd, 0xcd, 0x85, 0x00, 0x94, 0xe3, + 0x1d, 0x7d, 0x4c, 0xb6, 0x61, 0xee, 0xe3, 0x4b, 0xcd, 0x3e, 0xe9, 0x5d, 0x6d, 0x02, 0x1c, 0xf1, + 0xeb, 0x33, 0x6a, 0x1d, 0x45, 0xb0, 0xc7, 0x6d, 0xdd, 0x98, 0x51, 0x0e, 0x04, 0x9d, 0x57, 0x52, + 0x16, 0x22, 0x2f, 0x64, 0xe6, 0x27, 0x7b, 0x1d, 0x80, 0x49, 0x21, 0x33, 0xba, 0x4f, 0xea, 0x1b, + 0xe3, 0x70, 0xa2, 0x37, 0x37, 0xba, 0x6c, 0x50, 0x7e, 0x75, 0xbf, 0xf1, 0x4f, 0x15, 0x53, 0x11, + 0xfd, 0xf3, 0xef, 0x2d, 0xbf, 0x9f, 0xa0, 0xe3, 0xff, 0x1f, 0x9b, 0x18, 0xa9, 0x6d, 0x94, 0x85, + 0x4d, 0xa8, 0xce, 0x37, 0x24, 0x7d, 0x4a, 0xb6, 0x21, 0xe8, 0x68, 0xc3, 0xd5, 0x6c, 0xba, 0x4a, + 0x03, 0x8e, 0x97, 0xf4, 0x19, 0xa9, 0xf9, 0x58, 0xa3, 0x25, 0xcd, 0x3e, 0xed, 0xfd, 0x26, 0x01, + 0xf8, 0x86, 0x85, 0x7e, 0x46, 0xaa, 0xce, 0x15, 0xde, 0xb4, 0x56, 0xef, 0x5a, 0x1a, 0x70, 0x7f, + 0xe7, 0x57, 0x82, 0xea, 0x1f, 0xae, 0x04, 0x0f, 0x20, 0x7c, 0x42, 0x6a, 0x9d, 0xe5, 0xb8, 0xb0, + 0x54, 0x78, 0x2d, 0xd2, 0x43, 0x20, 0x6f, 0x78, 0xb1, 0xfe, 0x07, 0x5e, 0x7c, 0x08, 0x2e, 0x83, + 0x67, 0x52, 0xb3, 0xc4, 0xe5, 0xa5, 0xc1, 0xeb, 0xf8, 0x4e, 0x6a, 0x96, 0x30, 0x19, 0xcf, 0xa5, + 0x36, 0x2a, 0xcf, 0x70, 0x71, 0x69, 0x6e, 0x7a, 0xb0, 0x07, 0xf9, 0xe6, 0x16, 0x73, 0x18, 0x0b, + 0x10, 0x77, 0x99, 0x0a, 0xf7, 0x54, 0xf7, 0x3f, 0x25, 0xd2, 0xba, 0x2e, 0x01, 0x8b, 0x65, 0x1a, + 0xbe, 0xcb, 0xb5, 0xaf, 0x07, 0x47, 0x20, 0xaa, 0xb2, 0x5c, 0xfb, 0x1d, 0xd6, 0x11, 0x80, 0x2e, + 0x95, 0xf5, 0x5b, 0x7e, 0x83, 0x3b, 0x02, 0x0a, 0xd0, 0xac, 0x4f, 0xdd, 0xb2, 0xb5, 0xed, 0x6b, + 0xdf, 0xd3, 0x20, 0x81, 0x4b, 0x33, 0x3a, 0xb8, 0xc2, 0x1d, 0x01, 0x5b, 0x11, 0xb4, 0x5d, 0xf4, + 0x69, 0x83, 0xe3, 0x79, 0x5f, 0x78, 0xbd, 0xfc, 0x34, 0xa1, 0x84, 0x54, 0xc7, 0x6f, 0x8e, 0x27, + 0x7c, 0x18, 0x7c, 0x44, 0x9b, 0xa4, 0x36, 0x78, 0x23, 0x8e, 0x27, 0xc7, 0xc3, 0xa0, 0x44, 0x1b, + 0xa4, 0x32, 0xe5, 0x93, 0xe9, 0x2c, 0x28, 0xd3, 0x3a, 0xd9, 0x9e, 0x4d, 0x46, 0xf3, 0x60, 0x0b, + 0x4e, 0xa3, 0x93, 0xc3, 0xc3, 0x60, 0x1b, 0xe4, 0x66, 0x73, 0x3e, 0x1e, 0xcc, 0x83, 0x0a, 0xc8, + 0x1d, 0x0c, 0x47, 0xaf, 0x4f, 0x0e, 0xe7, 0x41, 0x75, 0xff, 0x97, 0x92, 0x2f, 0xd6, 0x4d, 0xc6, + 0xc1, 0x4b, 0xc3, 0xa3, 0xe9, 0xfc, 0xa7, 0xe0, 0x23, 0x90, 0x3f, 0x38, 0x39, 0x9a, 0x06, 0x25, + 0x90, 0xe1, 0xc3, 0xd9, 0x1c, 0x3e, 0x2e, 0x03, 0xc7, 0xe0, 0x87, 0xe1, 0xe0, 0xc7, 0x60, 0x8b, + 0xb6, 0x48, 0x7d, 0xca, 0x87, 0x02, 0xb9, 0xb6, 0xe9, 0x2d, 0xd2, 0x9c, 0xbe, 0x7e, 0x33, 0x14, + 0xb3, 0x21, 0x7f, 0x3b, 0xe4, 0x41, 0x05, 0xbe, 0x3d, 0x9e, 0xcc, 0xc7, 0xa3, 0x9f, 0x82, 0x2a, + 0x0d, 0x48, 0x6b, 0x30, 0x3d, 0x19, 0x1f, 0x8f, 0x26, 0x8e, 0xbd, 0x46, 0x6f, 0x93, 0xf6, 0x06, + 0x71, 0xef, 0xd5, 0x01, 0x1a, 0x0d, 0x5f, 0xcf, 0x4f, 0xf8, 0xd0, 0x43, 0x0d, 0xf8, 0xfa, 0xed, + 0x90, 0xcf, 0xc6, 0x93, 0xe3, 0x80, 0xc0, 0x7f, 0xff, 0x7c, 0x3d, 0x9e, 0x8b, 0xe9, 0xf8, 0x20, + 0x68, 0xd2, 0xbb, 0x24, 0xb8, 0xf6, 0x9f, 0x18, 0xfc, 0x70, 0x78, 0x10, 0xb4, 0xfe, 0x17, 0x00, + 0x00, 0xff, 0xff, 0xf8, 0x9f, 0x0e, 0x7d, 0xca, 0x0d, 0x00, 0x00, +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/test/main.go b/vendor/github.com/checkpoint-restore/go-criu/test/main.go new file mode 100644 index 0000000..418ebb8 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/test/main.go @@ -0,0 +1,133 @@ +package main + +import ( + "fmt" + "github.com/checkpoint-restore/go-criu" + "github.com/checkpoint-restore/go-criu/rpc" + "github.com/golang/protobuf/proto" + "os" + "strconv" +) + +// TestNfy struct +type TestNfy struct { + criu.NoNotify +} + +// PreDump test function +func (c TestNfy) PreDump() error { + fmt.Printf("TEST PRE DUMP\n") + return nil +} + +func doDump(c *criu.Criu, pidS string, imgDir string, pre bool, prevImg string) error { + fmt.Printf("Dumping\n") + pid, _ := strconv.Atoi(pidS) + img, err := os.Open(imgDir) + if err != nil { + return fmt.Errorf("can't open image dir (%s)", err) + } + defer img.Close() + + opts := rpc.CriuOpts{ + Pid: proto.Int32(int32(pid)), + ImagesDirFd: proto.Int32(int32(img.Fd())), + LogLevel: proto.Int32(4), + LogFile: proto.String("dump.log"), + } + + if prevImg != "" { + opts.ParentImg = proto.String(prevImg) + opts.TrackMem = proto.Bool(true) + } + + if pre { + err = c.PreDump(opts, TestNfy{}) + } else { + err = c.Dump(opts, TestNfy{}) + } + if err != nil { + return fmt.Errorf("dump fail (%s)", err) + } + + return nil +} + +// Usage: test $act $pid $images_dir +func main() { + c := criu.MakeCriu() + // Read out CRIU version + version, err := c.GetCriuVersion() + if err != nil { + fmt.Println(err) + os.Exit(1) + } + fmt.Println("CRIU version", version) + // Check if version at least 3.2 + result, err := c.IsCriuAtLeast(30200) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + if !result { + fmt.Println("CRIU too old") + os.Exit(1) + } + act := os.Args[1] + switch act { + case "dump": + err := doDump(c, os.Args[2], os.Args[3], false, "") + if err != nil { + fmt.Print(err) + os.Exit(1) + } + case "dump2": + err := c.Prepare() + if err != nil { + fmt.Print(err) + os.Exit(1) + } + + err = doDump(c, os.Args[2], os.Args[3]+"/pre", true, "") + if err != nil { + fmt.Printf("pre-dump failed") + fmt.Print(err) + os.Exit(1) + } + err = doDump(c, os.Args[2], os.Args[3], false, "./pre") + if err != nil { + fmt.Printf("dump failed") + fmt.Print(err) + os.Exit(1) + } + + c.Cleanup() + case "restore": + fmt.Printf("Restoring\n") + img, err := os.Open(os.Args[2]) + if err != nil { + fmt.Printf("can't open image dir") + os.Exit(1) + } + defer img.Close() + + opts := rpc.CriuOpts{ + ImagesDirFd: proto.Int32(int32(img.Fd())), + LogLevel: proto.Int32(4), + LogFile: proto.String("restore.log"), + } + + err = c.Restore(opts, nil) + if err != nil { + fmt.Printf("Error:") + fmt.Print(err) + fmt.Printf("\n") + os.Exit(1) + } + default: + fmt.Printf("unknown action\n") + os.Exit(1) + } + + fmt.Printf("Success\n") +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/test/phaul-main.go b/vendor/github.com/checkpoint-restore/go-criu/test/phaul-main.go new file mode 100644 index 0000000..f1bec2c --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/test/phaul-main.go @@ -0,0 +1,192 @@ +package main + +import ( + "fmt" + "os" + "strconv" + "strings" + "syscall" + + "github.com/checkpoint-restore/go-criu" + "github.com/checkpoint-restore/go-criu/phaul" + "github.com/checkpoint-restore/go-criu/rpc" + "github.com/golang/protobuf/proto" +) + +type testLocal struct { + criu.NoNotify + r *testRemote +} + +type testRemote struct { + srv *phaul.Server +} + +/* Dir where test will put dump images */ +const imagesDir = "image" + +func prepareImages() error { + err := os.Mkdir(imagesDir, 0700) + if err != nil { + return err + } + + /* Work dir for PhaulClient */ + err = os.Mkdir(imagesDir+"/local", 0700) + if err != nil { + return err + } + + /* Work dir for PhaulServer */ + err = os.Mkdir(imagesDir+"/remote", 0700) + if err != nil { + return err + } + + /* Work dir for DumpCopyRestore */ + err = os.Mkdir(imagesDir+"/test", 0700) + if err != nil { + return err + } + + return nil +} + +func mergeImages(dumpDir, lastPreDumpDir string) error { + idir, err := os.Open(dumpDir) + if err != nil { + return err + } + + defer idir.Close() + + imgs, err := idir.Readdirnames(0) + if err != nil { + return err + } + + for _, fname := range imgs { + if !strings.HasSuffix(fname, ".img") { + continue + } + + fmt.Printf("\t%s -> %s/\n", fname, lastPreDumpDir) + err = syscall.Link(dumpDir+"/"+fname, lastPreDumpDir+"/"+fname) + if err != nil { + return err + } + } + + return nil +} + +func (r *testRemote) doRestore() error { + lastSrvImagesDir := r.srv.LastImagesDir() + /* + * In imagesDir we have images from dump, in the + * lastSrvImagesDir -- where server-side images + * (from page server, with pages and pagemaps) are. + * Need to put former into latter and restore from + * them. + */ + err := mergeImages(imagesDir+"/test", lastSrvImagesDir) + if err != nil { + return err + } + + imgDir, err := os.Open(lastSrvImagesDir) + if err != nil { + return err + } + defer imgDir.Close() + + opts := rpc.CriuOpts{ + LogLevel: proto.Int32(4), + LogFile: proto.String("restore.log"), + ImagesDirFd: proto.Int32(int32(imgDir.Fd())), + } + + cr := r.srv.GetCriu() + fmt.Printf("Do restore\n") + return cr.Restore(opts, nil) +} + +func (l *testLocal) PostDump() error { + return l.r.doRestore() +} + +func (l *testLocal) DumpCopyRestore(cr *criu.Criu, cfg phaul.Config, lastClnImagesDir string) error { + fmt.Printf("Final stage\n") + + imgDir, err := os.Open(imagesDir + "/test") + if err != nil { + return err + } + defer imgDir.Close() + + psi := rpc.CriuPageServerInfo{ + Fd: proto.Int32(int32(cfg.Memfd)), + } + + opts := rpc.CriuOpts{ + Pid: proto.Int32(int32(cfg.Pid)), + LogLevel: proto.Int32(4), + LogFile: proto.String("dump.log"), + ImagesDirFd: proto.Int32(int32(imgDir.Fd())), + TrackMem: proto.Bool(true), + ParentImg: proto.String(lastClnImagesDir), + Ps: &psi, + } + + fmt.Printf("Do dump\n") + return cr.Dump(opts, l) +} + +func main() { + pid, _ := strconv.Atoi(os.Args[1]) + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM, 0) + if err != nil { + fmt.Printf("Can't make socketpair: %v\n", err) + os.Exit(1) + } + + err = prepareImages() + if err != nil { + fmt.Printf("Can't prepare dirs for images: %v\n", err) + os.Exit(1) + return + } + + fmt.Printf("Make server part (socket %d)\n", fds[1]) + srv, err := phaul.MakePhaulServer(phaul.Config{ + Pid: pid, + Memfd: fds[1], + Wdir: imagesDir + "/remote"}) + if err != nil { + fmt.Printf("Unable to run a server: %v", err) + os.Exit(1) + return + } + + r := &testRemote{srv} + + fmt.Printf("Make client part (socket %d)\n", fds[0]) + cln, err := phaul.MakePhaulClient(&testLocal{r: r}, srv, + phaul.Config{ + Pid: pid, + Memfd: fds[0], + Wdir: imagesDir + "/local"}) + if err != nil { + fmt.Printf("Unable to run a client: %v\n", err) + os.Exit(1) + } + + fmt.Printf("Migrate\n") + err = cln.Migrate() + if err != nil { + fmt.Printf("Failed: %v\n", err) + os.Exit(1) + } + + fmt.Printf("SUCCESS!\n") +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/test/piggie.c b/vendor/github.com/checkpoint-restore/go-criu/test/piggie.c new file mode 100644 index 0000000..1dc0801 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/test/piggie.c @@ -0,0 +1,57 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +#define STKS (4*4096) + +#ifndef CLONE_NEWPID +#define CLONE_NEWPID 0x20000000 +#endif + +static int do_test(void *logf) +{ + int fd, i = 0; + + setsid(); + + close(0); + close(1); + close(2); + + fd = open("/dev/null", O_RDONLY); + if (fd != 0) { + dup2(fd, 0); + close(fd); + } + + fd = open(logf, O_WRONLY | O_TRUNC | O_CREAT, 0600); + dup2(fd, 1); + dup2(fd, 2); + if (fd != 1 && fd != 2) + close(fd); + + while (1) { + sleep(1); + printf("%d\n", i++); + fflush(stdout); + } + + return 0; +} + +int main(int argc, char **argv) +{ + int pid; + void *stk; + + stk = mmap(NULL, STKS, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0, 0); + pid = clone(do_test, stk + STKS, SIGCHLD | CLONE_NEWPID, argv[1]); + printf("Child forked, pid %d\n", pid); + + return 0; +} diff --git a/vendor/github.com/cilium/ebpf/LICENSE b/vendor/github.com/cilium/ebpf/LICENSE new file mode 100644 index 0000000..c637ae9 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/LICENSE @@ -0,0 +1,23 @@ +MIT License + +Copyright (c) 2017 Nathan Sweet +Copyright (c) 2018, 2019 Cloudflare +Copyright (c) 2019 Authors of Cilium + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/cilium/ebpf/abi.go b/vendor/github.com/cilium/ebpf/abi.go new file mode 100644 index 0000000..999b8cc --- /dev/null +++ b/vendor/github.com/cilium/ebpf/abi.go @@ -0,0 +1,183 @@ +package ebpf + +import ( + "github.com/pkg/errors" +) + +// CollectionABI describes the interface of an eBPF collection. +type CollectionABI struct { + Maps map[string]*MapABI + Programs map[string]*ProgramABI +} + +// CheckSpec verifies that all maps and programs mentioned +// in the ABI are present in the spec. +func (abi *CollectionABI) CheckSpec(cs *CollectionSpec) error { + for name := range abi.Maps { + if cs.Maps[name] == nil { + return errors.Errorf("missing map %s", name) + } + } + + for name := range abi.Programs { + if cs.Programs[name] == nil { + return errors.Errorf("missing program %s", name) + } + } + + return nil +} + +// Check verifies that all items in a collection conform to this ABI. +func (abi *CollectionABI) Check(coll *Collection) error { + for name, mapABI := range abi.Maps { + m := coll.Maps[name] + if m == nil { + return errors.Errorf("missing map %s", name) + } + if err := mapABI.Check(m); err != nil { + return errors.Wrapf(err, "map %s", name) + } + } + + for name, progABI := range abi.Programs { + p := coll.Programs[name] + if p == nil { + return errors.Errorf("missing program %s", name) + } + if err := progABI.Check(p); err != nil { + return errors.Wrapf(err, "program %s", name) + } + } + + return nil +} + +// MapABI describes a Map. +// +// Use it to assert that a Map matches what your code expects. +type MapABI struct { + Type MapType + KeySize uint32 + ValueSize uint32 + MaxEntries uint32 + InnerMap *MapABI +} + +func newMapABIFromSpec(spec *MapSpec) *MapABI { + var inner *MapABI + if spec.InnerMap != nil { + inner = newMapABIFromSpec(spec.InnerMap) + } + + return &MapABI{ + spec.Type, + spec.KeySize, + spec.ValueSize, + spec.MaxEntries, + inner, + } +} + +func newMapABIFromFd(fd *bpfFD) (*MapABI, error) { + info, err := bpfGetMapInfoByFD(fd) + if err != nil { + return nil, err + } + + mapType := MapType(info.mapType) + if mapType == ArrayOfMaps || mapType == HashOfMaps { + return nil, errors.New("can't get map info for nested maps") + } + + return &MapABI{ + mapType, + info.keySize, + info.valueSize, + info.maxEntries, + nil, + }, nil +} + +// Check verifies that a Map conforms to the ABI. +// +// Members of ABI which have the zero value of their type are not checked. +func (abi *MapABI) Check(m *Map) error { + return abi.check(&m.abi) +} + +func (abi *MapABI) check(other *MapABI) error { + if abi.Type != UnspecifiedMap && other.Type != abi.Type { + return errors.Errorf("expected map type %s, have %s", abi.Type, other.Type) + } + if err := checkUint32("key size", abi.KeySize, other.KeySize); err != nil { + return err + } + if err := checkUint32("value size", abi.ValueSize, other.ValueSize); err != nil { + return err + } + if err := checkUint32("max entries", abi.MaxEntries, other.MaxEntries); err != nil { + return err + } + + if abi.InnerMap == nil { + if abi.Type == ArrayOfMaps || abi.Type == HashOfMaps { + return errors.New("missing inner map ABI") + } + + return nil + } + + if other.InnerMap == nil { + return errors.New("missing inner map") + } + + return errors.Wrap(abi.InnerMap.check(other.InnerMap), "inner map") +} + +// ProgramABI describes a Program. +// +// Use it to assert that a Program matches what your code expects. +type ProgramABI struct { + Type ProgramType +} + +func newProgramABIFromSpec(spec *ProgramSpec) *ProgramABI { + return &ProgramABI{ + spec.Type, + } +} + +func newProgramABIFromFd(fd *bpfFD) (*ProgramABI, error) { + info, err := bpfGetProgInfoByFD(fd) + if err != nil { + return nil, err + } + + return newProgramABIFromInfo(info), nil +} + +func newProgramABIFromInfo(info *bpfProgInfo) *ProgramABI { + return &ProgramABI{ + Type: ProgramType(info.progType), + } +} + +// Check verifies that a Program conforms to the ABI. +// +// Members which have the zero value of their type +// are not checked. +func (abi *ProgramABI) Check(prog *Program) error { + if abi.Type != UnspecifiedProgram && prog.abi.Type != abi.Type { + return errors.Errorf("expected program type %s, have %s", abi.Type, prog.abi.Type) + } + + return nil +} + +func checkUint32(name string, want, have uint32) error { + if want != 0 && have != want { + return errors.Errorf("expected %s to be %d, have %d", name, want, have) + } + return nil +} diff --git a/vendor/github.com/cilium/ebpf/asm/alu.go b/vendor/github.com/cilium/ebpf/asm/alu.go new file mode 100644 index 0000000..70ccc4d --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/alu.go @@ -0,0 +1,149 @@ +package asm + +//go:generate stringer -output alu_string.go -type=Source,Endianness,ALUOp + +// Source of ALU / ALU64 / Branch operations +// +// msb lsb +// +----+-+---+ +// |op |S|cls| +// +----+-+---+ +type Source uint8 + +const sourceMask OpCode = 0x08 + +// Source bitmask +const ( + // InvalidSource is returned by getters when invoked + // on non ALU / branch OpCodes. + InvalidSource Source = 0xff + // ImmSource src is from constant + ImmSource Source = 0x00 + // RegSource src is from register + RegSource Source = 0x08 +) + +// The Endianness of a byte swap instruction. +type Endianness uint8 + +const endianMask = sourceMask + +// Endian flags +const ( + InvalidEndian Endianness = 0xff + // Convert to little endian + LE Endianness = 0x00 + // Convert to big endian + BE Endianness = 0x08 +) + +// ALUOp are ALU / ALU64 operations +// +// msb lsb +// +----+-+---+ +// |OP |s|cls| +// +----+-+---+ +type ALUOp uint8 + +const aluMask OpCode = 0xf0 + +const ( + // InvalidALUOp is returned by getters when invoked + // on non ALU OpCodes + InvalidALUOp ALUOp = 0xff + // Add - addition + Add ALUOp = 0x00 + // Sub - subtraction + Sub ALUOp = 0x10 + // Mul - multiplication + Mul ALUOp = 0x20 + // Div - division + Div ALUOp = 0x30 + // Or - bitwise or + Or ALUOp = 0x40 + // And - bitwise and + And ALUOp = 0x50 + // LSh - bitwise shift left + LSh ALUOp = 0x60 + // RSh - bitwise shift right + RSh ALUOp = 0x70 + // Neg - sign/unsign signing bit + Neg ALUOp = 0x80 + // Mod - modulo + Mod ALUOp = 0x90 + // Xor - bitwise xor + Xor ALUOp = 0xa0 + // Mov - move value from one place to another + Mov ALUOp = 0xb0 + // ArSh - arithmatic shift + ArSh ALUOp = 0xc0 + // Swap - endian conversions + Swap ALUOp = 0xd0 +) + +// HostTo converts from host to another endianness. +func HostTo(endian Endianness, dst Register, size Size) Instruction { + var imm int64 + switch size { + case Half: + imm = 16 + case Word: + imm = 32 + case DWord: + imm = 64 + default: + return Instruction{OpCode: InvalidOpCode} + } + + return Instruction{ + OpCode: OpCode(ALUClass).SetALUOp(Swap).SetSource(Source(endian)), + Dst: dst, + Constant: imm, + } +} + +// Op returns the OpCode for an ALU operation with a given source. +func (op ALUOp) Op(source Source) OpCode { + return OpCode(ALU64Class).SetALUOp(op).SetSource(source) +} + +// Reg emits `dst (op) src`. +func (op ALUOp) Reg(dst, src Register) Instruction { + return Instruction{ + OpCode: op.Op(RegSource), + Dst: dst, + Src: src, + } +} + +// Imm emits `dst (op) value`. +func (op ALUOp) Imm(dst Register, value int32) Instruction { + return Instruction{ + OpCode: op.Op(ImmSource), + Dst: dst, + Constant: int64(value), + } +} + +// Op32 returns the OpCode for a 32-bit ALU operation with a given source. +func (op ALUOp) Op32(source Source) OpCode { + return OpCode(ALUClass).SetALUOp(op).SetSource(source) +} + +// Reg32 emits `dst (op) src`, zeroing the upper 32 bit of dst. +func (op ALUOp) Reg32(dst, src Register) Instruction { + return Instruction{ + OpCode: op.Op32(RegSource), + Dst: dst, + Src: src, + } +} + +// Imm32 emits `dst (op) value`, zeroing the upper 32 bit of dst. +func (op ALUOp) Imm32(dst Register, value int32) Instruction { + return Instruction{ + OpCode: op.Op32(ImmSource), + Dst: dst, + Constant: int64(value), + } +} diff --git a/vendor/github.com/cilium/ebpf/asm/alu_string.go b/vendor/github.com/cilium/ebpf/asm/alu_string.go new file mode 100644 index 0000000..72d3fe6 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/alu_string.go @@ -0,0 +1,107 @@ +// Code generated by "stringer -output alu_string.go -type=Source,Endianness,ALUOp"; DO NOT EDIT. + +package asm + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidSource-255] + _ = x[ImmSource-0] + _ = x[RegSource-8] +} + +const ( + _Source_name_0 = "ImmSource" + _Source_name_1 = "RegSource" + _Source_name_2 = "InvalidSource" +) + +func (i Source) String() string { + switch { + case i == 0: + return _Source_name_0 + case i == 8: + return _Source_name_1 + case i == 255: + return _Source_name_2 + default: + return "Source(" + strconv.FormatInt(int64(i), 10) + ")" + } +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidEndian-255] + _ = x[LE-0] + _ = x[BE-8] +} + +const ( + _Endianness_name_0 = "LE" + _Endianness_name_1 = "BE" + _Endianness_name_2 = "InvalidEndian" +) + +func (i Endianness) String() string { + switch { + case i == 0: + return _Endianness_name_0 + case i == 8: + return _Endianness_name_1 + case i == 255: + return _Endianness_name_2 + default: + return "Endianness(" + strconv.FormatInt(int64(i), 10) + ")" + } +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidALUOp-255] + _ = x[Add-0] + _ = x[Sub-16] + _ = x[Mul-32] + _ = x[Div-48] + _ = x[Or-64] + _ = x[And-80] + _ = x[LSh-96] + _ = x[RSh-112] + _ = x[Neg-128] + _ = x[Mod-144] + _ = x[Xor-160] + _ = x[Mov-176] + _ = x[ArSh-192] + _ = x[Swap-208] +} + +const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOp" + +var _ALUOp_map = map[ALUOp]string{ + 0: _ALUOp_name[0:3], + 16: _ALUOp_name[3:6], + 32: _ALUOp_name[6:9], + 48: _ALUOp_name[9:12], + 64: _ALUOp_name[12:14], + 80: _ALUOp_name[14:17], + 96: _ALUOp_name[17:20], + 112: _ALUOp_name[20:23], + 128: _ALUOp_name[23:26], + 144: _ALUOp_name[26:29], + 160: _ALUOp_name[29:32], + 176: _ALUOp_name[32:35], + 192: _ALUOp_name[35:39], + 208: _ALUOp_name[39:43], + 255: _ALUOp_name[43:55], +} + +func (i ALUOp) String() string { + if str, ok := _ALUOp_map[i]; ok { + return str + } + return "ALUOp(" + strconv.FormatInt(int64(i), 10) + ")" +} diff --git a/vendor/github.com/cilium/ebpf/asm/doc.go b/vendor/github.com/cilium/ebpf/asm/doc.go new file mode 100644 index 0000000..7031bdc --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/doc.go @@ -0,0 +1,2 @@ +// Package asm is an assembler for eBPF bytecode. +package asm diff --git a/vendor/github.com/cilium/ebpf/asm/func.go b/vendor/github.com/cilium/ebpf/asm/func.go new file mode 100644 index 0000000..97f794c --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/func.go @@ -0,0 +1,143 @@ +package asm + +//go:generate stringer -output func_string.go -type=BuiltinFunc + +// BuiltinFunc is a built-in eBPF function. +type BuiltinFunc int32 + +// eBPF built-in functions +// +// You can renegerate this list using the following gawk script: +// +// /FN\(.+\),/ { +// match($1, /\((.+)\)/, r) +// split(r[1], p, "_") +// printf "Fn" +// for (i in p) { +// printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2) +// } +// print "" +// } +// +// The script expects include/uapi/linux/bpf.h as it's input. +const ( + FnUnspec BuiltinFunc = iota + FnMapLookupElem + FnMapUpdateElem + FnMapDeleteElem + FnProbeRead + FnKtimeGetNs + FnTracePrintk + FnGetPrandomU32 + FnGetSmpProcessorId + FnSkbStoreBytes + FnL3CsumReplace + FnL4CsumReplace + FnTailCall + FnCloneRedirect + FnGetCurrentPidTgid + FnGetCurrentUidGid + FnGetCurrentComm + FnGetCgroupClassid + FnSkbVlanPush + FnSkbVlanPop + FnSkbGetTunnelKey + FnSkbSetTunnelKey + FnPerfEventRead + FnRedirect + FnGetRouteRealm + FnPerfEventOutput + FnSkbLoadBytes + FnGetStackid + FnCsumDiff + FnSkbGetTunnelOpt + FnSkbSetTunnelOpt + FnSkbChangeProto + FnSkbChangeType + FnSkbUnderCgroup + FnGetHashRecalc + FnGetCurrentTask + FnProbeWriteUser + FnCurrentTaskUnderCgroup + FnSkbChangeTail + FnSkbPullData + FnCsumUpdate + FnSetHashInvalid + FnGetNumaNodeId + FnSkbChangeHead + FnXdpAdjustHead + FnProbeReadStr + FnGetSocketCookie + FnGetSocketUid + FnSetHash + FnSetsockopt + FnSkbAdjustRoom + FnRedirectMap + FnSkRedirectMap + FnSockMapUpdate + FnXdpAdjustMeta + FnPerfEventReadValue + FnPerfProgReadValue + FnGetsockopt + FnOverrideReturn + FnSockOpsCbFlagsSet + FnMsgRedirectMap + FnMsgApplyBytes + FnMsgCorkBytes + FnMsgPullData + FnBind + FnXdpAdjustTail + FnSkbGetXfrmState + FnGetStack + FnSkbLoadBytesRelative + FnFibLookup + FnSockHashUpdate + FnMsgRedirectHash + FnSkRedirectHash + FnLwtPushEncap + FnLwtSeg6StoreBytes + FnLwtSeg6AdjustSrh + FnLwtSeg6Action + FnRcRepeat + FnRcKeydown + FnSkbCgroupId + FnGetCurrentCgroupId + FnGetLocalStorage + FnSkSelectReuseport + FnSkbAncestorCgroupId + FnSkLookupTcp + FnSkLookupUdp + FnSkRelease + FnMapPushElem + FnMapPopElem + FnMapPeekElem + FnMsgPushData + FnMsgPopData + FnRcPointerRel + FnSpinLock + FnSpinUnlock + FnSkFullsock + FnTcpSock + FnSkbEcnSetCe + FnGetListenerSock + FnSkcLookupTcp + FnTcpCheckSyncookie + FnSysctlGetName + FnSysctlGetCurrentValue + FnSysctlGetNewValue + FnSysctlSetNewValue + FnStrtol + FnStrtoul + FnSkStorageGet + FnSkStorageDelete + FnSendSignal + FnTcpGenSyncookie +) + +// Call emits a function call. +func (fn BuiltinFunc) Call() Instruction { + return Instruction{ + OpCode: OpCode(JumpClass).SetJumpOp(Call), + Constant: int64(fn), + } +} diff --git a/vendor/github.com/cilium/ebpf/asm/func_string.go b/vendor/github.com/cilium/ebpf/asm/func_string.go new file mode 100644 index 0000000..8860b9f --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/func_string.go @@ -0,0 +1,133 @@ +// Code generated by "stringer -output func_string.go -type=BuiltinFunc"; DO NOT EDIT. + +package asm + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[FnUnspec-0] + _ = x[FnMapLookupElem-1] + _ = x[FnMapUpdateElem-2] + _ = x[FnMapDeleteElem-3] + _ = x[FnProbeRead-4] + _ = x[FnKtimeGetNs-5] + _ = x[FnTracePrintk-6] + _ = x[FnGetPrandomU32-7] + _ = x[FnGetSmpProcessorId-8] + _ = x[FnSkbStoreBytes-9] + _ = x[FnL3CsumReplace-10] + _ = x[FnL4CsumReplace-11] + _ = x[FnTailCall-12] + _ = x[FnCloneRedirect-13] + _ = x[FnGetCurrentPidTgid-14] + _ = x[FnGetCurrentUidGid-15] + _ = x[FnGetCurrentComm-16] + _ = x[FnGetCgroupClassid-17] + _ = x[FnSkbVlanPush-18] + _ = x[FnSkbVlanPop-19] + _ = x[FnSkbGetTunnelKey-20] + _ = x[FnSkbSetTunnelKey-21] + _ = x[FnPerfEventRead-22] + _ = x[FnRedirect-23] + _ = x[FnGetRouteRealm-24] + _ = x[FnPerfEventOutput-25] + _ = x[FnSkbLoadBytes-26] + _ = x[FnGetStackid-27] + _ = x[FnCsumDiff-28] + _ = x[FnSkbGetTunnelOpt-29] + _ = x[FnSkbSetTunnelOpt-30] + _ = x[FnSkbChangeProto-31] + _ = x[FnSkbChangeType-32] + _ = x[FnSkbUnderCgroup-33] + _ = x[FnGetHashRecalc-34] + _ = x[FnGetCurrentTask-35] + _ = x[FnProbeWriteUser-36] + _ = x[FnCurrentTaskUnderCgroup-37] + _ = x[FnSkbChangeTail-38] + _ = x[FnSkbPullData-39] + _ = x[FnCsumUpdate-40] + _ = x[FnSetHashInvalid-41] + _ = x[FnGetNumaNodeId-42] + _ = x[FnSkbChangeHead-43] + _ = x[FnXdpAdjustHead-44] + _ = x[FnProbeReadStr-45] + _ = x[FnGetSocketCookie-46] + _ = x[FnGetSocketUid-47] + _ = x[FnSetHash-48] + _ = x[FnSetsockopt-49] + _ = x[FnSkbAdjustRoom-50] + _ = x[FnRedirectMap-51] + _ = x[FnSkRedirectMap-52] + _ = x[FnSockMapUpdate-53] + _ = x[FnXdpAdjustMeta-54] + _ = x[FnPerfEventReadValue-55] + _ = x[FnPerfProgReadValue-56] + _ = x[FnGetsockopt-57] + _ = x[FnOverrideReturn-58] + _ = x[FnSockOpsCbFlagsSet-59] + _ = x[FnMsgRedirectMap-60] + _ = x[FnMsgApplyBytes-61] + _ = x[FnMsgCorkBytes-62] + _ = x[FnMsgPullData-63] + _ = x[FnBind-64] + _ = x[FnXdpAdjustTail-65] + _ = x[FnSkbGetXfrmState-66] + _ = x[FnGetStack-67] + _ = x[FnSkbLoadBytesRelative-68] + _ = x[FnFibLookup-69] + _ = x[FnSockHashUpdate-70] + _ = x[FnMsgRedirectHash-71] + _ = x[FnSkRedirectHash-72] + _ = x[FnLwtPushEncap-73] + _ = x[FnLwtSeg6StoreBytes-74] + _ = x[FnLwtSeg6AdjustSrh-75] + _ = x[FnLwtSeg6Action-76] + _ = x[FnRcRepeat-77] + _ = x[FnRcKeydown-78] + _ = x[FnSkbCgroupId-79] + _ = x[FnGetCurrentCgroupId-80] + _ = x[FnGetLocalStorage-81] + _ = x[FnSkSelectReuseport-82] + _ = x[FnSkbAncestorCgroupId-83] + _ = x[FnSkLookupTcp-84] + _ = x[FnSkLookupUdp-85] + _ = x[FnSkRelease-86] + _ = x[FnMapPushElem-87] + _ = x[FnMapPopElem-88] + _ = x[FnMapPeekElem-89] + _ = x[FnMsgPushData-90] + _ = x[FnMsgPopData-91] + _ = x[FnRcPointerRel-92] + _ = x[FnSpinLock-93] + _ = x[FnSpinUnlock-94] + _ = x[FnSkFullsock-95] + _ = x[FnTcpSock-96] + _ = x[FnSkbEcnSetCe-97] + _ = x[FnGetListenerSock-98] + _ = x[FnSkcLookupTcp-99] + _ = x[FnTcpCheckSyncookie-100] + _ = x[FnSysctlGetName-101] + _ = x[FnSysctlGetCurrentValue-102] + _ = x[FnSysctlGetNewValue-103] + _ = x[FnSysctlSetNewValue-104] + _ = x[FnStrtol-105] + _ = x[FnStrtoul-106] + _ = x[FnSkStorageGet-107] + _ = x[FnSkStorageDelete-108] + _ = x[FnSendSignal-109] + _ = x[FnTcpGenSyncookie-110] +} + +const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookie" + +var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632} + +func (i BuiltinFunc) String() string { + if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) { + return "BuiltinFunc(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _BuiltinFunc_name[_BuiltinFunc_index[i]:_BuiltinFunc_index[i+1]] +} diff --git a/vendor/github.com/cilium/ebpf/asm/instruction.go b/vendor/github.com/cilium/ebpf/asm/instruction.go new file mode 100644 index 0000000..c8ed6cf --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/instruction.go @@ -0,0 +1,416 @@ +package asm + +import ( + "encoding/binary" + "fmt" + "io" + "math" + "strings" + + "github.com/pkg/errors" +) + +// InstructionSize is the size of a BPF instruction in bytes +const InstructionSize = 8 + +// Instruction is a single eBPF instruction. +type Instruction struct { + OpCode OpCode + Dst Register + Src Register + Offset int16 + Constant int64 + Reference string + Symbol string +} + +// Sym creates a symbol. +func (ins Instruction) Sym(name string) Instruction { + ins.Symbol = name + return ins +} + +// Unmarshal decodes a BPF instruction. +func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, error) { + var bi bpfInstruction + err := binary.Read(r, bo, &bi) + if err != nil { + return 0, err + } + + ins.OpCode = bi.OpCode + ins.Dst = bi.Registers.Dst() + ins.Src = bi.Registers.Src() + ins.Offset = bi.Offset + ins.Constant = int64(bi.Constant) + + if !bi.OpCode.isDWordLoad() { + return InstructionSize, nil + } + + var bi2 bpfInstruction + if err := binary.Read(r, bo, &bi2); err != nil { + // No Wrap, to avoid io.EOF clash + return 0, errors.New("64bit immediate is missing second half") + } + if bi2.OpCode != 0 || bi2.Offset != 0 || bi2.Registers != 0 { + return 0, errors.New("64bit immediate has non-zero fields") + } + ins.Constant = int64(uint64(uint32(bi2.Constant))<<32 | uint64(uint32(bi.Constant))) + + return 2 * InstructionSize, nil +} + +// Marshal encodes a BPF instruction. +func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) { + if ins.OpCode == InvalidOpCode { + return 0, errors.New("invalid opcode") + } + + isDWordLoad := ins.OpCode.isDWordLoad() + + cons := int32(ins.Constant) + if isDWordLoad { + // Encode least significant 32bit first for 64bit operations. + cons = int32(uint32(ins.Constant)) + } + + bpfi := bpfInstruction{ + ins.OpCode, + newBPFRegisters(ins.Dst, ins.Src), + ins.Offset, + cons, + } + + if err := binary.Write(w, bo, &bpfi); err != nil { + return 0, err + } + + if !isDWordLoad { + return InstructionSize, nil + } + + bpfi = bpfInstruction{ + Constant: int32(ins.Constant >> 32), + } + + if err := binary.Write(w, bo, &bpfi); err != nil { + return 0, err + } + + return 2 * InstructionSize, nil +} + +// RewriteMapPtr changes an instruction to use a new map fd. +// +// Returns an error if the fd is invalid, or the instruction +// is incorrect. +func (ins *Instruction) RewriteMapPtr(fd int) error { + if !ins.OpCode.isDWordLoad() { + return errors.Errorf("%s is not a 64 bit load", ins.OpCode) + } + + if fd < 0 { + return errors.New("invalid fd") + } + + ins.Src = R1 + ins.Constant = int64(fd) + return nil +} + +// Format implements fmt.Formatter. +func (ins Instruction) Format(f fmt.State, c rune) { + if c != 'v' { + fmt.Fprintf(f, "{UNRECOGNIZED: %c}", c) + return + } + + op := ins.OpCode + + if op == InvalidOpCode { + fmt.Fprint(f, "INVALID") + return + } + + // Omit trailing space for Exit + if op.JumpOp() == Exit { + fmt.Fprint(f, op) + return + } + + fmt.Fprintf(f, "%v ", op) + switch cls := op.Class(); cls { + case LdClass, LdXClass, StClass, StXClass: + switch op.Mode() { + case ImmMode: + fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant) + case AbsMode: + fmt.Fprintf(f, "imm: %d", ins.Constant) + case IndMode: + fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant) + case MemMode: + fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant) + case XAddMode: + fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src) + } + + case ALU64Class, ALUClass: + fmt.Fprintf(f, "dst: %s ", ins.Dst) + if op.ALUOp() == Swap || op.Source() == ImmSource { + fmt.Fprintf(f, "imm: %d", ins.Constant) + } else { + fmt.Fprintf(f, "src: %s", ins.Src) + } + + case JumpClass: + switch jop := op.JumpOp(); jop { + case Call: + if ins.Src == R1 { + // bpf-to-bpf call + fmt.Fprint(f, ins.Constant) + } else { + fmt.Fprint(f, BuiltinFunc(ins.Constant)) + } + + default: + fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset) + if op.Source() == ImmSource { + fmt.Fprintf(f, "imm: %d", ins.Constant) + } else { + fmt.Fprintf(f, "src: %s", ins.Src) + } + } + } + + if ins.Reference != "" { + fmt.Fprintf(f, " <%s>", ins.Reference) + } +} + +// Instructions is an eBPF program. +type Instructions []Instruction + +func (insns Instructions) String() string { + return fmt.Sprint(insns) +} + +// RewriteMapPtr rewrites all loads of a specific map pointer to a new fd. +// +// Returns an error if the symbol isn't used, see IsUnreferencedSymbol. +func (insns Instructions) RewriteMapPtr(symbol string, fd int) error { + if symbol == "" { + return errors.New("empty symbol") + } + + found := false + for i := range insns { + ins := &insns[i] + if ins.Reference != symbol { + continue + } + + if err := ins.RewriteMapPtr(fd); err != nil { + return err + } + + found = true + } + + if !found { + return &unreferencedSymbolError{symbol} + } + + return nil +} + +// SymbolOffsets returns the set of symbols and their offset in +// the instructions. +func (insns Instructions) SymbolOffsets() (map[string]int, error) { + offsets := make(map[string]int) + + for i, ins := range insns { + if ins.Symbol == "" { + continue + } + + if _, ok := offsets[ins.Symbol]; ok { + return nil, errors.Errorf("duplicate symbol %s", ins.Symbol) + } + + offsets[ins.Symbol] = i + } + + return offsets, nil +} + +// ReferenceOffsets returns the set of references and their offset in +// the instructions. +func (insns Instructions) ReferenceOffsets() map[string][]int { + offsets := make(map[string][]int) + + for i, ins := range insns { + if ins.Reference == "" { + continue + } + + offsets[ins.Reference] = append(offsets[ins.Reference], i) + } + + return offsets +} + +func (insns Instructions) marshalledOffsets() (map[string]int, error) { + symbols := make(map[string]int) + + marshalledPos := 0 + for _, ins := range insns { + currentPos := marshalledPos + marshalledPos += ins.OpCode.marshalledInstructions() + + if ins.Symbol == "" { + continue + } + + if _, ok := symbols[ins.Symbol]; ok { + return nil, errors.Errorf("duplicate symbol %s", ins.Symbol) + } + + symbols[ins.Symbol] = currentPos + } + + return symbols, nil +} + +// Format implements fmt.Formatter. +// +// You can control indentation of symbols by +// specifying a width. Setting a precision controls the indentation of +// instructions. +// The default character is a tab, which can be overriden by specifying +// the ' ' space flag. +func (insns Instructions) Format(f fmt.State, c rune) { + if c != 's' && c != 'v' { + fmt.Fprintf(f, "{UNKNOWN FORMAT '%c'}", c) + return + } + + // Precision is better in this case, because it allows + // specifying 0 padding easily. + padding, ok := f.Precision() + if !ok { + padding = 1 + } + + indent := strings.Repeat("\t", padding) + if f.Flag(' ') { + indent = strings.Repeat(" ", padding) + } + + symPadding, ok := f.Width() + if !ok { + symPadding = padding - 1 + } + if symPadding < 0 { + symPadding = 0 + } + + symIndent := strings.Repeat("\t", symPadding) + if f.Flag(' ') { + symIndent = strings.Repeat(" ", symPadding) + } + + // Figure out how many digits we need to represent the highest + // offset. + highestOffset := 0 + for _, ins := range insns { + highestOffset += ins.OpCode.marshalledInstructions() + } + offsetWidth := int(math.Ceil(math.Log10(float64(highestOffset)))) + + offset := 0 + for _, ins := range insns { + if ins.Symbol != "" { + fmt.Fprintf(f, "%s%s:\n", symIndent, ins.Symbol) + } + fmt.Fprintf(f, "%s%*d: %v\n", indent, offsetWidth, offset, ins) + offset += ins.OpCode.marshalledInstructions() + } + + return +} + +// Marshal encodes a BPF program into the kernel format. +func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error { + absoluteOffsets, err := insns.marshalledOffsets() + if err != nil { + return err + } + + num := 0 + for i, ins := range insns { + switch { + case ins.OpCode.JumpOp() == Call && ins.Constant == -1: + // Rewrite bpf to bpf call + offset, ok := absoluteOffsets[ins.Reference] + if !ok { + return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference) + } + + ins.Constant = int64(offset - num - 1) + + case ins.OpCode.Class() == JumpClass && ins.Offset == -1: + // Rewrite jump to label + offset, ok := absoluteOffsets[ins.Reference] + if !ok { + return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference) + } + + ins.Offset = int16(offset - num - 1) + } + + n, err := ins.Marshal(w, bo) + if err != nil { + return errors.Wrapf(err, "instruction %d", i) + } + + num += int(n / InstructionSize) + } + return nil +} + +type bpfInstruction struct { + OpCode OpCode + Registers bpfRegisters + Offset int16 + Constant int32 +} + +type bpfRegisters uint8 + +func newBPFRegisters(dst, src Register) bpfRegisters { + return bpfRegisters((src << 4) | (dst & 0xF)) +} + +func (r bpfRegisters) Dst() Register { + return Register(r & 0xF) +} + +func (r bpfRegisters) Src() Register { + return Register(r >> 4) +} + +type unreferencedSymbolError struct { + symbol string +} + +func (use *unreferencedSymbolError) Error() string { + return fmt.Sprintf("unreferenced symbol %s", use.symbol) +} + +// IsUnreferencedSymbol returns true if err was caused by +// an unreferenced symbol. +func IsUnreferencedSymbol(err error) bool { + _, ok := err.(*unreferencedSymbolError) + return ok +} diff --git a/vendor/github.com/cilium/ebpf/asm/jump.go b/vendor/github.com/cilium/ebpf/asm/jump.go new file mode 100644 index 0000000..33c9b56 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/jump.go @@ -0,0 +1,109 @@ +package asm + +//go:generate stringer -output jump_string.go -type=JumpOp + +// JumpOp affect control flow. +// +// msb lsb +// +----+-+---+ +// |OP |s|cls| +// +----+-+---+ +type JumpOp uint8 + +const jumpMask OpCode = aluMask + +const ( + // InvalidJumpOp is returned by getters when invoked + // on non branch OpCodes + InvalidJumpOp JumpOp = 0xff + // Ja jumps by offset unconditionally + Ja JumpOp = 0x00 + // JEq jumps by offset if r == imm + JEq JumpOp = 0x10 + // JGT jumps by offset if r > imm + JGT JumpOp = 0x20 + // JGE jumps by offset if r >= imm + JGE JumpOp = 0x30 + // JSet jumps by offset if r & imm + JSet JumpOp = 0x40 + // JNE jumps by offset if r != imm + JNE JumpOp = 0x50 + // JSGT jumps by offset if signed r > signed imm + JSGT JumpOp = 0x60 + // JSGE jumps by offset if signed r >= signed imm + JSGE JumpOp = 0x70 + // Call builtin or user defined function from imm + Call JumpOp = 0x80 + // Exit ends execution, with value in r0 + Exit JumpOp = 0x90 + // JLT jumps by offset if r < imm + JLT JumpOp = 0xa0 + // JLE jumps by offset if r <= imm + JLE JumpOp = 0xb0 + // JSLT jumps by offset if signed r < signed imm + JSLT JumpOp = 0xc0 + // JSLE jumps by offset if signed r <= signed imm + JSLE JumpOp = 0xd0 +) + +// Return emits an exit instruction. +// +// Requires a return value in R0. +func Return() Instruction { + return Instruction{ + OpCode: OpCode(JumpClass).SetJumpOp(Exit), + } +} + +// Op returns the OpCode for a given jump source. +func (op JumpOp) Op(source Source) OpCode { + return OpCode(JumpClass).SetJumpOp(op).SetSource(source) +} + +// Imm compares dst to value, and adjusts PC by offset if the condition is fulfilled. +func (op JumpOp) Imm(dst Register, value int32, label string) Instruction { + if op == Exit || op == Call || op == Ja { + return Instruction{OpCode: InvalidOpCode} + } + + return Instruction{ + OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(ImmSource), + Dst: dst, + Offset: -1, + Constant: int64(value), + Reference: label, + } +} + +// Reg compares dst to src, and adjusts PC by offset if the condition is fulfilled. +func (op JumpOp) Reg(dst, src Register, label string) Instruction { + if op == Exit || op == Call || op == Ja { + return Instruction{OpCode: InvalidOpCode} + } + + return Instruction{ + OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(RegSource), + Dst: dst, + Src: src, + Offset: -1, + Reference: label, + } +} + +// Label adjusts PC to the address of the label. +func (op JumpOp) Label(label string) Instruction { + if op == Call { + return Instruction{ + OpCode: OpCode(JumpClass).SetJumpOp(Call), + Src: R1, + Constant: -1, + Reference: label, + } + } + + return Instruction{ + OpCode: OpCode(JumpClass).SetJumpOp(op), + Offset: -1, + Reference: label, + } +} diff --git a/vendor/github.com/cilium/ebpf/asm/jump_string.go b/vendor/github.com/cilium/ebpf/asm/jump_string.go new file mode 100644 index 0000000..85a4aaf --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/jump_string.go @@ -0,0 +1,53 @@ +// Code generated by "stringer -output jump_string.go -type=JumpOp"; DO NOT EDIT. + +package asm + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidJumpOp-255] + _ = x[Ja-0] + _ = x[JEq-16] + _ = x[JGT-32] + _ = x[JGE-48] + _ = x[JSet-64] + _ = x[JNE-80] + _ = x[JSGT-96] + _ = x[JSGE-112] + _ = x[Call-128] + _ = x[Exit-144] + _ = x[JLT-160] + _ = x[JLE-176] + _ = x[JSLT-192] + _ = x[JSLE-208] +} + +const _JumpOp_name = "JaJEqJGTJGEJSetJNEJSGTJSGECallExitJLTJLEJSLTJSLEInvalidJumpOp" + +var _JumpOp_map = map[JumpOp]string{ + 0: _JumpOp_name[0:2], + 16: _JumpOp_name[2:5], + 32: _JumpOp_name[5:8], + 48: _JumpOp_name[8:11], + 64: _JumpOp_name[11:15], + 80: _JumpOp_name[15:18], + 96: _JumpOp_name[18:22], + 112: _JumpOp_name[22:26], + 128: _JumpOp_name[26:30], + 144: _JumpOp_name[30:34], + 160: _JumpOp_name[34:37], + 176: _JumpOp_name[37:40], + 192: _JumpOp_name[40:44], + 208: _JumpOp_name[44:48], + 255: _JumpOp_name[48:61], +} + +func (i JumpOp) String() string { + if str, ok := _JumpOp_map[i]; ok { + return str + } + return "JumpOp(" + strconv.FormatInt(int64(i), 10) + ")" +} diff --git a/vendor/github.com/cilium/ebpf/asm/load_store.go b/vendor/github.com/cilium/ebpf/asm/load_store.go new file mode 100644 index 0000000..ab0e92f --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/load_store.go @@ -0,0 +1,189 @@ +package asm + +//go:generate stringer -output load_store_string.go -type=Mode,Size + +// Mode for load and store operations +// +// msb lsb +// +---+--+---+ +// |MDE|sz|cls| +// +---+--+---+ +type Mode uint8 + +const modeMask OpCode = 0xe0 + +const ( + // InvalidMode is returned by getters when invoked + // on non load / store OpCodes + InvalidMode Mode = 0xff + // ImmMode - immediate value + ImmMode Mode = 0x00 + // AbsMode - immediate value + offset + AbsMode Mode = 0x20 + // IndMode - indirect (imm+src) + IndMode Mode = 0x40 + // MemMode - load from memory + MemMode Mode = 0x60 + // XAddMode - add atomically across processors. + XAddMode Mode = 0xc0 +) + +// Size of load and store operations +// +// msb lsb +// +---+--+---+ +// |mde|SZ|cls| +// +---+--+---+ +type Size uint8 + +const sizeMask OpCode = 0x18 + +const ( + // InvalidSize is returned by getters when invoked + // on non load / store OpCodes + InvalidSize Size = 0xff + // DWord - double word; 64 bits + DWord Size = 0x18 + // Word - word; 32 bits + Word Size = 0x00 + // Half - half-word; 16 bits + Half Size = 0x08 + // Byte - byte; 8 bits + Byte Size = 0x10 +) + +// Sizeof returns the size in bytes. +func (s Size) Sizeof() int { + switch s { + case DWord: + return 8 + case Word: + return 4 + case Half: + return 2 + case Byte: + return 1 + default: + return -1 + } +} + +// LoadMemOp returns the OpCode to load a value of given size from memory. +func LoadMemOp(size Size) OpCode { + return OpCode(LdXClass).SetMode(MemMode).SetSize(size) +} + +// LoadMem emits `dst = *(size *)(src + offset)`. +func LoadMem(dst, src Register, offset int16, size Size) Instruction { + return Instruction{ + OpCode: LoadMemOp(size), + Dst: dst, + Src: src, + Offset: offset, + } +} + +// LoadImmOp returns the OpCode to load an immediate of given size. +// +// As of kernel 4.20, only DWord size is accepted. +func LoadImmOp(size Size) OpCode { + return OpCode(LdClass).SetMode(ImmMode).SetSize(size) +} + +// LoadImm emits `dst = (size)value`. +// +// As of kernel 4.20, only DWord size is accepted. +func LoadImm(dst Register, value int64, size Size) Instruction { + return Instruction{ + OpCode: LoadImmOp(size), + Dst: dst, + Constant: value, + } +} + +// LoadMapPtr stores a pointer to a map in dst. +func LoadMapPtr(dst Register, fd int) Instruction { + if fd < 0 { + return Instruction{OpCode: InvalidOpCode} + } + + return Instruction{ + OpCode: LoadImmOp(DWord), + Dst: dst, + Src: R1, + Constant: int64(fd), + } +} + +// LoadIndOp returns the OpCode for loading a value of given size from an sk_buff. +func LoadIndOp(size Size) OpCode { + return OpCode(LdClass).SetMode(IndMode).SetSize(size) +} + +// LoadInd emits `dst = ntoh(*(size *)(((sk_buff *)R6)->data + src + offset))`. +func LoadInd(dst, src Register, offset int32, size Size) Instruction { + return Instruction{ + OpCode: LoadIndOp(size), + Dst: dst, + Src: src, + Constant: int64(offset), + } +} + +// LoadAbsOp returns the OpCode for loading a value of given size from an sk_buff. +func LoadAbsOp(size Size) OpCode { + return OpCode(LdClass).SetMode(AbsMode).SetSize(size) +} + +// LoadAbs emits `r0 = ntoh(*(size *)(((sk_buff *)R6)->data + offset))`. +func LoadAbs(offset int32, size Size) Instruction { + return Instruction{ + OpCode: LoadAbsOp(size), + Dst: R0, + Constant: int64(offset), + } +} + +// StoreMemOp returns the OpCode for storing a register of given size in memory. +func StoreMemOp(size Size) OpCode { + return OpCode(StXClass).SetMode(MemMode).SetSize(size) +} + +// StoreMem emits `*(size *)(dst + offset) = src` +func StoreMem(dst Register, offset int16, src Register, size Size) Instruction { + return Instruction{ + OpCode: StoreMemOp(size), + Dst: dst, + Src: src, + Offset: offset, + } +} + +// StoreImmOp returns the OpCode for storing an immediate of given size in memory. +func StoreImmOp(size Size) OpCode { + return OpCode(StClass).SetMode(MemMode).SetSize(size) +} + +// StoreImm emits `*(size *)(dst + offset) = value`. +func StoreImm(dst Register, offset int16, value int64, size Size) Instruction { + return Instruction{ + OpCode: StoreImmOp(size), + Dst: dst, + Offset: offset, + Constant: value, + } +} + +// StoreXAddOp returns the OpCode to atomically add a register to a value in memory. +func StoreXAddOp(size Size) OpCode { + return OpCode(StXClass).SetMode(XAddMode).SetSize(size) +} + +// StoreXAdd atomically adds src to *dst. +func StoreXAdd(dst, src Register, size Size) Instruction { + return Instruction{ + OpCode: StoreXAddOp(size), + Dst: dst, + Src: src, + } +} diff --git a/vendor/github.com/cilium/ebpf/asm/load_store_string.go b/vendor/github.com/cilium/ebpf/asm/load_store_string.go new file mode 100644 index 0000000..76d29a0 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/load_store_string.go @@ -0,0 +1,80 @@ +// Code generated by "stringer -output load_store_string.go -type=Mode,Size"; DO NOT EDIT. + +package asm + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidMode-255] + _ = x[ImmMode-0] + _ = x[AbsMode-32] + _ = x[IndMode-64] + _ = x[MemMode-96] + _ = x[XAddMode-192] +} + +const ( + _Mode_name_0 = "ImmMode" + _Mode_name_1 = "AbsMode" + _Mode_name_2 = "IndMode" + _Mode_name_3 = "MemMode" + _Mode_name_4 = "XAddMode" + _Mode_name_5 = "InvalidMode" +) + +func (i Mode) String() string { + switch { + case i == 0: + return _Mode_name_0 + case i == 32: + return _Mode_name_1 + case i == 64: + return _Mode_name_2 + case i == 96: + return _Mode_name_3 + case i == 192: + return _Mode_name_4 + case i == 255: + return _Mode_name_5 + default: + return "Mode(" + strconv.FormatInt(int64(i), 10) + ")" + } +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[InvalidSize-255] + _ = x[DWord-24] + _ = x[Word-0] + _ = x[Half-8] + _ = x[Byte-16] +} + +const ( + _Size_name_0 = "Word" + _Size_name_1 = "Half" + _Size_name_2 = "Byte" + _Size_name_3 = "DWord" + _Size_name_4 = "InvalidSize" +) + +func (i Size) String() string { + switch { + case i == 0: + return _Size_name_0 + case i == 8: + return _Size_name_1 + case i == 16: + return _Size_name_2 + case i == 24: + return _Size_name_3 + case i == 255: + return _Size_name_4 + default: + return "Size(" + strconv.FormatInt(int64(i), 10) + ")" + } +} diff --git a/vendor/github.com/cilium/ebpf/asm/opcode.go b/vendor/github.com/cilium/ebpf/asm/opcode.go new file mode 100644 index 0000000..d796de3 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/opcode.go @@ -0,0 +1,237 @@ +package asm + +import ( + "fmt" + "strings" +) + +//go:generate stringer -output opcode_string.go -type=Class + +type encoding int + +const ( + unknownEncoding encoding = iota + loadOrStore + jumpOrALU +) + +// Class of operations +// +// msb lsb +// +---+--+---+ +// | ?? |CLS| +// +---+--+---+ +type Class uint8 + +const classMask OpCode = 0x07 + +const ( + // LdClass load memory + LdClass Class = 0x00 + // LdXClass load memory from constant + LdXClass Class = 0x01 + // StClass load register from memory + StClass Class = 0x02 + // StXClass load register from constant + StXClass Class = 0x03 + // ALUClass arithmetic operators + ALUClass Class = 0x04 + // JumpClass jump operators + JumpClass Class = 0x05 + // ALU64Class arithmetic in 64 bit mode + ALU64Class Class = 0x07 +) + +func (cls Class) encoding() encoding { + switch cls { + case LdClass, LdXClass, StClass, StXClass: + return loadOrStore + case ALU64Class, ALUClass, JumpClass: + return jumpOrALU + default: + return unknownEncoding + } +} + +// OpCode is a packed eBPF opcode. +// +// Its encoding is defined by a Class value: +// +// msb lsb +// +----+-+---+ +// | ???? |CLS| +// +----+-+---+ +type OpCode uint8 + +// InvalidOpCode is returned by setters on OpCode +const InvalidOpCode OpCode = 0xff + +// marshalledInstructions returns the number of BPF instructions required +// to encode this opcode. +func (op OpCode) marshalledInstructions() int { + if op == LoadImmOp(DWord) { + return 2 + } + return 1 +} + +func (op OpCode) isDWordLoad() bool { + return op == LoadImmOp(DWord) +} + +// Class returns the class of operation. +func (op OpCode) Class() Class { + return Class(op & classMask) +} + +// Mode returns the mode for load and store operations. +func (op OpCode) Mode() Mode { + if op.Class().encoding() != loadOrStore { + return InvalidMode + } + return Mode(op & modeMask) +} + +// Size returns the size for load and store operations. +func (op OpCode) Size() Size { + if op.Class().encoding() != loadOrStore { + return InvalidSize + } + return Size(op & sizeMask) +} + +// Source returns the source for branch and ALU operations. +func (op OpCode) Source() Source { + if op.Class().encoding() != jumpOrALU || op.ALUOp() == Swap { + return InvalidSource + } + return Source(op & sourceMask) +} + +// ALUOp returns the ALUOp. +func (op OpCode) ALUOp() ALUOp { + if op.Class().encoding() != jumpOrALU { + return InvalidALUOp + } + return ALUOp(op & aluMask) +} + +// Endianness returns the Endianness for a byte swap instruction. +func (op OpCode) Endianness() Endianness { + if op.ALUOp() != Swap { + return InvalidEndian + } + return Endianness(op & endianMask) +} + +// JumpOp returns the JumpOp. +func (op OpCode) JumpOp() JumpOp { + if op.Class().encoding() != jumpOrALU { + return InvalidJumpOp + } + return JumpOp(op & jumpMask) +} + +// SetMode sets the mode on load and store operations. +// +// Returns InvalidOpCode if op is of the wrong class. +func (op OpCode) SetMode(mode Mode) OpCode { + if op.Class().encoding() != loadOrStore || !valid(OpCode(mode), modeMask) { + return InvalidOpCode + } + return (op & ^modeMask) | OpCode(mode) +} + +// SetSize sets the size on load and store operations. +// +// Returns InvalidOpCode if op is of the wrong class. +func (op OpCode) SetSize(size Size) OpCode { + if op.Class().encoding() != loadOrStore || !valid(OpCode(size), sizeMask) { + return InvalidOpCode + } + return (op & ^sizeMask) | OpCode(size) +} + +// SetSource sets the source on jump and ALU operations. +// +// Returns InvalidOpCode if op is of the wrong class. +func (op OpCode) SetSource(source Source) OpCode { + if op.Class().encoding() != jumpOrALU || !valid(OpCode(source), sourceMask) { + return InvalidOpCode + } + return (op & ^sourceMask) | OpCode(source) +} + +// SetALUOp sets the ALUOp on ALU operations. +// +// Returns InvalidOpCode if op is of the wrong class. +func (op OpCode) SetALUOp(alu ALUOp) OpCode { + class := op.Class() + if (class != ALUClass && class != ALU64Class) || !valid(OpCode(alu), aluMask) { + return InvalidOpCode + } + return (op & ^aluMask) | OpCode(alu) +} + +// SetJumpOp sets the JumpOp on jump operations. +// +// Returns InvalidOpCode if op is of the wrong class. +func (op OpCode) SetJumpOp(jump JumpOp) OpCode { + if op.Class() != JumpClass || !valid(OpCode(jump), jumpMask) { + return InvalidOpCode + } + return (op & ^jumpMask) | OpCode(jump) +} + +func (op OpCode) String() string { + var f strings.Builder + + switch class := op.Class(); class { + case LdClass, LdXClass, StClass, StXClass: + f.WriteString(strings.TrimSuffix(class.String(), "Class")) + + mode := op.Mode() + f.WriteString(strings.TrimSuffix(mode.String(), "Mode")) + + switch op.Size() { + case DWord: + f.WriteString("DW") + case Word: + f.WriteString("W") + case Half: + f.WriteString("H") + case Byte: + f.WriteString("B") + } + + case ALU64Class, ALUClass: + f.WriteString(op.ALUOp().String()) + + if op.ALUOp() == Swap { + // Width for Endian is controlled by Constant + f.WriteString(op.Endianness().String()) + } else { + if class == ALUClass { + f.WriteString("32") + } + + f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) + } + + case JumpClass: + f.WriteString(op.JumpOp().String()) + if jop := op.JumpOp(); jop != Exit && jop != Call { + f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) + } + + default: + fmt.Fprintf(&f, "%#x", op) + } + + return f.String() +} + +// valid returns true if all bits in value are covered by mask. +func valid(value, mask OpCode) bool { + return value & ^mask == 0 +} diff --git a/vendor/github.com/cilium/ebpf/asm/opcode_string.go b/vendor/github.com/cilium/ebpf/asm/opcode_string.go new file mode 100644 index 0000000..079ce1d --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/opcode_string.go @@ -0,0 +1,38 @@ +// Code generated by "stringer -output opcode_string.go -type=Class"; DO NOT EDIT. + +package asm + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[LdClass-0] + _ = x[LdXClass-1] + _ = x[StClass-2] + _ = x[StXClass-3] + _ = x[ALUClass-4] + _ = x[JumpClass-5] + _ = x[ALU64Class-7] +} + +const ( + _Class_name_0 = "LdClassLdXClassStClassStXClassALUClassJumpClass" + _Class_name_1 = "ALU64Class" +) + +var ( + _Class_index_0 = [...]uint8{0, 7, 15, 22, 30, 38, 47} +) + +func (i Class) String() string { + switch { + case 0 <= i && i <= 5: + return _Class_name_0[_Class_index_0[i]:_Class_index_0[i+1]] + case i == 7: + return _Class_name_1 + default: + return "Class(" + strconv.FormatInt(int64(i), 10) + ")" + } +} diff --git a/vendor/github.com/cilium/ebpf/asm/register.go b/vendor/github.com/cilium/ebpf/asm/register.go new file mode 100644 index 0000000..4f284fb --- /dev/null +++ b/vendor/github.com/cilium/ebpf/asm/register.go @@ -0,0 +1,42 @@ +package asm + +import ( + "fmt" +) + +// Register is the source or destination of most operations. +type Register uint8 + +// R0 contains return values. +const R0 Register = 0 + +// Registers for function arguments. +const ( + R1 Register = R0 + 1 + iota + R2 + R3 + R4 + R5 +) + +// Callee saved registers preserved by function calls. +const ( + R6 Register = R5 + 1 + iota + R7 + R8 + R9 +) + +// Read-only frame pointer to access stack. +const ( + R10 Register = R9 + 1 + RFP = R10 +) + +func (r Register) String() string { + v := uint8(r) + if v == 10 { + return "rfp" + } + return fmt.Sprintf("r%d", v) +} diff --git a/vendor/github.com/cilium/ebpf/collection.go b/vendor/github.com/cilium/ebpf/collection.go new file mode 100644 index 0000000..5ad1a5e --- /dev/null +++ b/vendor/github.com/cilium/ebpf/collection.go @@ -0,0 +1,148 @@ +package ebpf + +import ( + "github.com/cilium/ebpf/asm" + "github.com/pkg/errors" +) + +// CollectionOptions control loading a collection into the kernel. +type CollectionOptions struct { + Programs ProgramOptions +} + +// CollectionSpec describes a collection. +type CollectionSpec struct { + Maps map[string]*MapSpec + Programs map[string]*ProgramSpec +} + +// Copy returns a recursive copy of the spec. +func (cs *CollectionSpec) Copy() *CollectionSpec { + if cs == nil { + return nil + } + + cpy := CollectionSpec{ + Maps: make(map[string]*MapSpec, len(cs.Maps)), + Programs: make(map[string]*ProgramSpec, len(cs.Programs)), + } + + for name, spec := range cs.Maps { + cpy.Maps[name] = spec.Copy() + } + + for name, spec := range cs.Programs { + cpy.Programs[name] = spec.Copy() + } + + return &cpy +} + +// Collection is a collection of Programs and Maps associated +// with their symbols +type Collection struct { + Programs map[string]*Program + Maps map[string]*Map +} + +// NewCollection creates a Collection from a specification. +// +// Only maps referenced by at least one of the programs are initialized. +func NewCollection(spec *CollectionSpec) (*Collection, error) { + return NewCollectionWithOptions(spec, CollectionOptions{}) +} + +// NewCollectionWithOptions creates a Collection from a specification. +// +// Only maps referenced by at least one of the programs are initialized. +func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Collection, error) { + maps := make(map[string]*Map) + for mapName, mapSpec := range spec.Maps { + m, err := NewMap(mapSpec) + if err != nil { + return nil, errors.Wrapf(err, "map %s", mapName) + } + maps[mapName] = m + } + + progs := make(map[string]*Program) + for progName, origProgSpec := range spec.Programs { + progSpec := origProgSpec.Copy() + + // Rewrite any reference to a valid map. + for i := range progSpec.Instructions { + var ( + ins = &progSpec.Instructions[i] + m = maps[ins.Reference] + ) + + if ins.Reference == "" || m == nil { + continue + } + + if ins.Src == asm.R1 { + // Don't overwrite maps already rewritten, users can + // rewrite programs in the spec themselves + continue + } + + if err := ins.RewriteMapPtr(m.FD()); err != nil { + return nil, errors.Wrapf(err, "progam %s: map %s", progName, ins.Reference) + } + } + + prog, err := NewProgramWithOptions(progSpec, opts.Programs) + if err != nil { + return nil, errors.Wrapf(err, "program %s", progName) + } + progs[progName] = prog + } + + return &Collection{ + progs, + maps, + }, nil +} + +// LoadCollection parses an object file and converts it to a collection. +func LoadCollection(file string) (*Collection, error) { + spec, err := LoadCollectionSpec(file) + if err != nil { + return nil, err + } + return NewCollection(spec) +} + +// Close frees all maps and programs associated with the collection. +// +// The collection mustn't be used afterwards. +func (coll *Collection) Close() { + for _, prog := range coll.Programs { + prog.Close() + } + for _, m := range coll.Maps { + m.Close() + } +} + +// DetachMap removes the named map from the Collection. +// +// This means that a later call to Close() will not affect this map. +// +// Returns nil if no map of that name exists. +func (coll *Collection) DetachMap(name string) *Map { + m := coll.Maps[name] + delete(coll.Maps, name) + return m +} + +// DetachProgram removes the named program from the Collection. +// +// This means that a later call to Close() will not affect this program. +// +// Returns nil if no program of that name exists. +func (coll *Collection) DetachProgram(name string) *Program { + p := coll.Programs[name] + delete(coll.Programs, name) + return p +} diff --git a/vendor/github.com/cilium/ebpf/doc.go b/vendor/github.com/cilium/ebpf/doc.go new file mode 100644 index 0000000..d96e6b1 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/doc.go @@ -0,0 +1,17 @@ +// Package ebpf is a toolkit for working with eBPF programs. +// +// eBPF programs are small snippets of code which are executed directly +// in a VM in the Linux kernel, which makes them very fast and flexible. +// Many Linux subsystems now accept eBPF programs. This makes it possible +// to implement highly application specific logic inside the kernel, +// without having to modify the actual kernel itself. +// +// This package is designed for long-running processes which +// want to use eBPF to implement part of their application logic. It has no +// run-time dependencies outside of the library and the Linux kernel itself. +// eBPF code should be compiled ahead of time using clang, and shipped with +// your application as any other resource. +// +// This package doesn't include code required to attach eBPF to Linux +// subsystems, since this varies per subsystem. +package ebpf diff --git a/vendor/github.com/cilium/ebpf/elf_reader.go b/vendor/github.com/cilium/ebpf/elf_reader.go new file mode 100644 index 0000000..3bdc084 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/elf_reader.go @@ -0,0 +1,392 @@ +package ebpf + +import ( + "bytes" + "debug/elf" + "encoding/binary" + "fmt" + "io" + "os" + "strings" + + "github.com/cilium/ebpf/asm" + + "github.com/pkg/errors" +) + +type elfCode struct { + *elf.File + symbols []elf.Symbol + symbolsPerSection map[elf.SectionIndex]map[uint64]string +} + +// LoadCollectionSpec parses an ELF file into a CollectionSpec. +func LoadCollectionSpec(file string) (*CollectionSpec, error) { + f, err := os.Open(file) + if err != nil { + return nil, err + } + defer f.Close() + + spec, err := LoadCollectionSpecFromReader(f) + return spec, errors.Wrapf(err, "file %s", file) +} + +// LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec. +func LoadCollectionSpecFromReader(code io.ReaderAt) (*CollectionSpec, error) { + f, err := elf.NewFile(code) + if err != nil { + return nil, err + } + defer f.Close() + + symbols, err := f.Symbols() + if err != nil { + return nil, errors.Wrap(err, "load symbols") + } + + ec := &elfCode{f, symbols, symbolsPerSection(symbols)} + + var licenseSection, versionSection *elf.Section + progSections := make(map[elf.SectionIndex]*elf.Section) + relSections := make(map[elf.SectionIndex]*elf.Section) + mapSections := make(map[elf.SectionIndex]*elf.Section) + for i, sec := range ec.Sections { + switch { + case strings.HasPrefix(sec.Name, "license"): + licenseSection = sec + case strings.HasPrefix(sec.Name, "version"): + versionSection = sec + case strings.HasPrefix(sec.Name, "maps"): + mapSections[elf.SectionIndex(i)] = sec + case sec.Type == elf.SHT_REL: + if int(sec.Info) >= len(ec.Sections) { + return nil, errors.Errorf("found relocation section %v for missing section %v", i, sec.Info) + } + + // Store relocations under the section index of the target + idx := elf.SectionIndex(sec.Info) + if relSections[idx] != nil { + return nil, errors.Errorf("section %d has multiple relocation sections", idx) + } + relSections[idx] = sec + case sec.Type == elf.SHT_PROGBITS && (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0: + progSections[elf.SectionIndex(i)] = sec + } + } + + license, err := loadLicense(licenseSection) + if err != nil { + return nil, errors.Wrap(err, "load license") + } + + version, err := loadVersion(versionSection, ec.ByteOrder) + if err != nil { + return nil, errors.Wrap(err, "load version") + } + + maps, err := ec.loadMaps(mapSections) + if err != nil { + return nil, errors.Wrap(err, "load maps") + } + + progs, libs, err := ec.loadPrograms(progSections, relSections, license, version) + if err != nil { + return nil, errors.Wrap(err, "load programs") + } + + if len(libs) > 0 { + for name, prog := range progs { + prog.Instructions, err = link(prog.Instructions, libs...) + if err != nil { + return nil, errors.Wrapf(err, "program %s", name) + } + } + } + + return &CollectionSpec{maps, progs}, nil +} + +func loadLicense(sec *elf.Section) (string, error) { + if sec == nil { + return "", errors.Errorf("missing license section") + } + data, err := sec.Data() + if err != nil { + return "", errors.Wrapf(err, "section %s", sec.Name) + } + return string(bytes.TrimRight(data, "\000")), nil +} + +func loadVersion(sec *elf.Section, bo binary.ByteOrder) (uint32, error) { + if sec == nil { + return 0, nil + } + + var version uint32 + err := binary.Read(sec.Open(), bo, &version) + return version, errors.Wrapf(err, "section %s", sec.Name) +} + +func (ec *elfCode) loadPrograms(progSections, relSections map[elf.SectionIndex]*elf.Section, license string, version uint32) (map[string]*ProgramSpec, []asm.Instructions, error) { + var ( + progs = make(map[string]*ProgramSpec) + libs []asm.Instructions + ) + for idx, prog := range progSections { + syms := ec.symbolsPerSection[idx] + if len(syms) == 0 { + return nil, nil, errors.Errorf("section %v: missing symbols", prog.Name) + } + + funcSym := syms[0] + if funcSym == "" { + return nil, nil, errors.Errorf("section %v: no label at start", prog.Name) + } + + rels, err := ec.loadRelocations(relSections[idx]) + if err != nil { + return nil, nil, errors.Wrapf(err, "program %s: can't load relocations", funcSym) + } + + insns, err := ec.loadInstructions(prog, syms, rels) + if err != nil { + return nil, nil, errors.Wrapf(err, "program %s: can't unmarshal instructions", funcSym) + } + + if progType, attachType := getProgType(prog.Name); progType == UnspecifiedProgram { + // There is no single name we can use for "library" sections, + // since they may contain multiple functions. We'll decode the + // labels they contain later on, and then link sections that way. + libs = append(libs, insns) + } else { + progs[funcSym] = &ProgramSpec{ + Name: funcSym, + Type: progType, + AttachType: attachType, + License: license, + KernelVersion: version, + Instructions: insns, + } + } + } + return progs, libs, nil +} + +func (ec *elfCode) loadInstructions(section *elf.Section, symbols, relocations map[uint64]string) (asm.Instructions, error) { + var ( + r = section.Open() + insns asm.Instructions + ins asm.Instruction + offset uint64 + ) + for { + n, err := ins.Unmarshal(r, ec.ByteOrder) + if err == io.EOF { + return insns, nil + } + if err != nil { + return nil, errors.Wrapf(err, "offset %d", offset) + } + + ins.Symbol = symbols[offset] + ins.Reference = relocations[offset] + + insns = append(insns, ins) + offset += n + } +} + +func (ec *elfCode) loadMaps(mapSections map[elf.SectionIndex]*elf.Section) (map[string]*MapSpec, error) { + var ( + maps = make(map[string]*MapSpec) + b = make([]byte, 1) + ) + for idx, sec := range mapSections { + syms := ec.symbolsPerSection[idx] + if len(syms) == 0 { + return nil, errors.Errorf("section %v: no symbols", sec.Name) + } + + if sec.Size%uint64(len(syms)) != 0 { + return nil, errors.Errorf("section %v: map descriptors are not of equal size", sec.Name) + } + + var ( + r = sec.Open() + size = sec.Size / uint64(len(syms)) + ) + for i, offset := 0, uint64(0); i < len(syms); i, offset = i+1, offset+size { + mapSym := syms[offset] + if mapSym == "" { + fmt.Println(syms) + return nil, errors.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset) + } + + if maps[mapSym] != nil { + return nil, errors.Errorf("section %v: map %v already exists", sec.Name, mapSym) + } + + lr := io.LimitReader(r, int64(size)) + + var spec MapSpec + switch { + case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil: + return nil, errors.Errorf("map %v: missing type", mapSym) + case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil: + return nil, errors.Errorf("map %v: missing key size", mapSym) + case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil: + return nil, errors.Errorf("map %v: missing value size", mapSym) + case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil: + return nil, errors.Errorf("map %v: missing max entries", mapSym) + case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil: + return nil, errors.Errorf("map %v: missing flags", mapSym) + } + + for { + _, err := lr.Read(b) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + if b[0] != 0 { + return nil, errors.Errorf("map %v: unknown and non-zero fields in definition", mapSym) + } + } + + maps[mapSym] = &spec + } + } + return maps, nil +} + +func getProgType(v string) (ProgramType, AttachType) { + types := map[string]ProgramType{ + // From https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c#n3568 + "socket": SocketFilter, + "seccomp": SocketFilter, + "kprobe/": Kprobe, + "kretprobe/": Kprobe, + "tracepoint/": TracePoint, + "xdp": XDP, + "perf_event": PerfEvent, + "sockops": SockOps, + "sk_skb": SkSKB, + "sk_msg": SkMsg, + "lirc_mode2": LircMode2, + "flow_dissector": FlowDissector, + + "cgroup_skb/": CGroupSKB, + "cgroup/dev": CGroupDevice, + "cgroup/skb": CGroupSKB, + "cgroup/sock": CGroupSock, + "cgroup/post_bind": CGroupSock, + "cgroup/bind": CGroupSockAddr, + "cgroup/connect": CGroupSockAddr, + "cgroup/sendmsg": CGroupSockAddr, + "cgroup/recvmsg": CGroupSockAddr, + "cgroup/sysctl": CGroupSysctl, + "cgroup/getsockopt": CGroupSockopt, + "cgroup/setsockopt": CGroupSockopt, + "classifier": SchedCLS, + "action": SchedACT, + } + attachTypes := map[string]AttachType{ + "cgroup_skb/ingress": AttachCGroupInetIngress, + "cgroup_skb/egress": AttachCGroupInetEgress, + "cgroup/sock": AttachCGroupInetSockCreate, + "cgroup/post_bind4": AttachCGroupInet4PostBind, + "cgroup/post_bind6": AttachCGroupInet6PostBind, + "cgroup/dev": AttachCGroupDevice, + "sockops": AttachCGroupSockOps, + "sk_skb/stream_parser": AttachSkSKBStreamParser, + "sk_skb/stream_verdict": AttachSkSKBStreamVerdict, + "sk_msg": AttachSkSKBStreamVerdict, + "lirc_mode2": AttachLircMode2, + "flow_dissector": AttachFlowDissector, + "cgroup/bind4": AttachCGroupInet4Bind, + "cgroup/bind6": AttachCGroupInet6Bind, + "cgroup/connect4": AttachCGroupInet4Connect, + "cgroup/connect6": AttachCGroupInet6Connect, + "cgroup/sendmsg4": AttachCGroupUDP4Sendmsg, + "cgroup/sendmsg6": AttachCGroupUDP6Sendmsg, + "cgroup/recvmsg4": AttachCGroupUDP4Recvmsg, + "cgroup/recvmsg6": AttachCGroupUDP6Recvmsg, + "cgroup/sysctl": AttachCGroupSysctl, + "cgroup/getsockopt": AttachCGroupGetsockopt, + "cgroup/setsockopt": AttachCGroupSetsockopt, + } + attachType := AttachNone + for k, t := range attachTypes { + if strings.HasPrefix(v, k) { + attachType = t + } + } + + for k, t := range types { + if strings.HasPrefix(v, k) { + return t, attachType + } + } + return UnspecifiedProgram, AttachNone +} + +func (ec *elfCode) loadRelocations(sec *elf.Section) (map[uint64]string, error) { + rels := make(map[uint64]string) + if sec == nil { + return rels, nil + } + + if sec.Entsize < 16 { + return nil, errors.New("rels are less than 16 bytes") + } + + r := sec.Open() + for off := uint64(0); off < sec.Size; off += sec.Entsize { + ent := io.LimitReader(r, int64(sec.Entsize)) + + var rel elf.Rel64 + if binary.Read(ent, ec.ByteOrder, &rel) != nil { + return nil, errors.Errorf("can't parse relocation at offset %v", off) + } + + symNo := int(elf.R_SYM64(rel.Info) - 1) + if symNo >= len(ec.symbols) { + return nil, errors.Errorf("relocation at offset %d: symbol %v doesnt exist", off, symNo) + } + + rels[rel.Off] = ec.symbols[symNo].Name + } + return rels, nil +} + +func symbolsPerSection(symbols []elf.Symbol) map[elf.SectionIndex]map[uint64]string { + result := make(map[elf.SectionIndex]map[uint64]string) + for i, sym := range symbols { + switch elf.ST_TYPE(sym.Info) { + case elf.STT_NOTYPE: + // Older versions of LLVM doesn't tag + // symbols correctly. + break + case elf.STT_OBJECT: + break + case elf.STT_FUNC: + break + default: + continue + } + + if sym.Name == "" { + continue + } + + idx := sym.Section + if _, ok := result[idx]; !ok { + result[idx] = make(map[uint64]string) + } + result[idx][sym.Value] = symbols[i].Name + } + return result +} diff --git a/vendor/github.com/cilium/ebpf/feature.go b/vendor/github.com/cilium/ebpf/feature.go new file mode 100644 index 0000000..9104bc9 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/feature.go @@ -0,0 +1,19 @@ +package ebpf + +import ( + "sync" +) + +type featureTest struct { + Fn func() bool + + once sync.Once + result bool +} + +func (ft *featureTest) Result() bool { + ft.once.Do(func() { + ft.result = ft.Fn() + }) + return ft.result +} diff --git a/vendor/github.com/cilium/ebpf/go.mod b/vendor/github.com/cilium/ebpf/go.mod new file mode 100644 index 0000000..687bdec --- /dev/null +++ b/vendor/github.com/cilium/ebpf/go.mod @@ -0,0 +1,8 @@ +module github.com/cilium/ebpf + +go 1.12 + +require ( + github.com/pkg/errors v0.8.1 + golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7 +) diff --git a/vendor/github.com/cilium/ebpf/internal/cpu.go b/vendor/github.com/cilium/ebpf/internal/cpu.go new file mode 100644 index 0000000..ce3cab7 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/cpu.go @@ -0,0 +1,64 @@ +package internal + +import ( + "fmt" + "os" + "sync" + + "github.com/pkg/errors" +) + +var sysCPU struct { + once sync.Once + err error + num int +} + +// PossibleCPUs returns the max number of CPUs a system may possibly have +// Logical CPU numbers must be of the form 0-n +func PossibleCPUs() (int, error) { + sysCPU.once.Do(func() { + sysCPU.num, sysCPU.err = parseCPUs("/sys/devices/system/cpu/possible") + }) + + return sysCPU.num, sysCPU.err +} + +var onlineCPU struct { + once sync.Once + err error + num int +} + +// OnlineCPUs returns the number of currently online CPUs +// Logical CPU numbers must be of the form 0-n +func OnlineCPUs() (int, error) { + onlineCPU.once.Do(func() { + onlineCPU.num, onlineCPU.err = parseCPUs("/sys/devices/system/cpu/online") + }) + + return onlineCPU.num, onlineCPU.err +} + +// parseCPUs parses the number of cpus from sysfs, +// in the format of "/sys/devices/system/cpu/{possible,online,..}. +// Logical CPU numbers must be of the form 0-n +func parseCPUs(path string) (int, error) { + file, err := os.Open(path) + if err != nil { + return 0, err + } + defer file.Close() + + var low, high int + n, _ := fmt.Fscanf(file, "%d-%d", &low, &high) + if n < 1 || low != 0 { + return 0, errors.Wrapf(err, "%s has unknown format", path) + } + if n == 1 { + high = low + } + + // cpus is 0 indexed + return high + 1, nil +} diff --git a/vendor/github.com/cilium/ebpf/internal/endian.go b/vendor/github.com/cilium/ebpf/internal/endian.go new file mode 100644 index 0000000..ac8a94e --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/endian.go @@ -0,0 +1,24 @@ +package internal + +import ( + "encoding/binary" + "unsafe" +) + +// NativeEndian is set to either binary.BigEndian or binary.LittleEndian, +// depending on the host's endianness. +var NativeEndian binary.ByteOrder + +func init() { + if isBigEndian() { + NativeEndian = binary.BigEndian + } else { + NativeEndian = binary.LittleEndian + } +} + +func isBigEndian() (ret bool) { + i := int(0x1) + bs := (*[int(unsafe.Sizeof(i))]byte)(unsafe.Pointer(&i)) + return bs[0] == 0 +} diff --git a/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go b/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go new file mode 100644 index 0000000..49c6be5 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go @@ -0,0 +1,118 @@ +// +build linux + +package unix + +import ( + "syscall" + + linux "golang.org/x/sys/unix" +) + +const ( + ENOENT = linux.ENOENT + EAGAIN = linux.EAGAIN + ENOSPC = linux.ENOSPC + EINVAL = linux.EINVAL + EPOLLIN = linux.EPOLLIN + BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN + BPF_TAG_SIZE = linux.BPF_TAG_SIZE + SYS_BPF = linux.SYS_BPF + F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC + EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD + EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC + O_CLOEXEC = linux.O_CLOEXEC + O_NONBLOCK = linux.O_NONBLOCK + PROT_READ = linux.PROT_READ + PROT_WRITE = linux.PROT_WRITE + MAP_SHARED = linux.MAP_SHARED + PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE + PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT + PerfBitWatermark = linux.PerfBitWatermark + PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW + PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC +) + +// Statfs_t is a wrapper +type Statfs_t = linux.Statfs_t + +// Rlimit is a wrapper +type Rlimit = linux.Rlimit + +// Setrlimit is a wrapper +func Setrlimit(resource int, rlim *Rlimit) (err error) { + return linux.Setrlimit(resource, rlim) +} + +// Syscall is a wrapper +func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) { + return linux.Syscall(trap, a1, a2, a3) +} + +// FcntlInt is a wrapper +func FcntlInt(fd uintptr, cmd, arg int) (int, error) { + return linux.FcntlInt(fd, cmd, arg) +} + +// Statfs is a wrapper +func Statfs(path string, buf *Statfs_t) (err error) { + return linux.Statfs(path, buf) +} + +// Close is a wrapper +func Close(fd int) (err error) { + return linux.Close(fd) +} + +// EpollEvent is a wrapper +type EpollEvent = linux.EpollEvent + +// EpollWait is a wrapper +func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { + return linux.EpollWait(epfd, events, msec) +} + +// EpollCtl is a wrapper +func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { + return linux.EpollCtl(epfd, op, fd, event) +} + +// Eventfd is a wrapper +func Eventfd(initval uint, flags int) (fd int, err error) { + return linux.Eventfd(initval, flags) +} + +// Write is a wrapper +func Write(fd int, p []byte) (n int, err error) { + return linux.Write(fd, p) +} + +// EpollCreate1 is a wrapper +func EpollCreate1(flag int) (fd int, err error) { + return linux.EpollCreate1(flag) +} + +// PerfEventMmapPage is a wrapper +type PerfEventMmapPage linux.PerfEventMmapPage + +// SetNonblock is a wrapper +func SetNonblock(fd int, nonblocking bool) (err error) { + return linux.SetNonblock(fd, nonblocking) +} + +// Mmap is a wrapper +func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { + return linux.Mmap(fd, offset, length, prot, flags) +} + +// Munmap is a wrapper +func Munmap(b []byte) (err error) { + return linux.Munmap(b) +} + +// PerfEventAttr is a wrapper +type PerfEventAttr = linux.PerfEventAttr + +// PerfEventOpen is a wrapper +func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { + return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags) +} diff --git a/vendor/github.com/cilium/ebpf/internal/unix/types_other.go b/vendor/github.com/cilium/ebpf/internal/unix/types_other.go new file mode 100644 index 0000000..a327f2a --- /dev/null +++ b/vendor/github.com/cilium/ebpf/internal/unix/types_other.go @@ -0,0 +1,183 @@ +// +build !linux + +package unix + +import ( + "fmt" + "runtime" + "syscall" +) + +var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) + +const ( + ENOENT = syscall.ENOENT + EAGAIN = syscall.EAGAIN + ENOSPC = syscall.ENOSPC + EINVAL = syscall.EINVAL + BPF_OBJ_NAME_LEN = 0x10 + BPF_TAG_SIZE = 0x8 + SYS_BPF = 321 + F_DUPFD_CLOEXEC = 0x406 + EPOLLIN = 0x1 + EPOLL_CTL_ADD = 0x1 + EPOLL_CLOEXEC = 0x80000 + O_CLOEXEC = 0x80000 + O_NONBLOCK = 0x800 + PROT_READ = 0x1 + PROT_WRITE = 0x2 + MAP_SHARED = 0x1 + PERF_TYPE_SOFTWARE = 0x1 + PERF_COUNT_SW_BPF_OUTPUT = 0xa + PerfBitWatermark = 0x4000 + PERF_SAMPLE_RAW = 0x400 + PERF_FLAG_FD_CLOEXEC = 0x8 +) + +// Statfs_t is a wrapper +type Statfs_t struct { + Type int64 + Bsize int64 + Blocks uint64 + Bfree uint64 + Bavail uint64 + Files uint64 + Ffree uint64 + Fsid [2]int32 + Namelen int64 + Frsize int64 + Flags int64 + Spare [4]int64 +} + +// Rlimit is a wrapper +type Rlimit struct { + Cur uint64 + Max uint64 +} + +// Setrlimit is a wrapper +func Setrlimit(resource int, rlim *Rlimit) (err error) { + return errNonLinux +} + +// Syscall is a wrapper +func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) { + return 0, 0, syscall.Errno(1) +} + +// FcntlInt is a wrapper +func FcntlInt(fd uintptr, cmd, arg int) (int, error) { + return -1, errNonLinux +} + +// Statfs is a wrapper +func Statfs(path string, buf *Statfs_t) error { + return errNonLinux +} + +// Close is a wrapper +func Close(fd int) (err error) { + return errNonLinux +} + +// EpollEvent is a wrapper +type EpollEvent struct { + Events uint32 + Fd int32 + Pad int32 +} + +// EpollWait is a wrapper +func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { + return 0, errNonLinux +} + +// EpollCtl is a wrapper +func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { + return errNonLinux +} + +// Eventfd is a wrapper +func Eventfd(initval uint, flags int) (fd int, err error) { + return 0, errNonLinux +} + +// Write is a wrapper +func Write(fd int, p []byte) (n int, err error) { + return 0, errNonLinux +} + +// EpollCreate1 is a wrapper +func EpollCreate1(flag int) (fd int, err error) { + return 0, errNonLinux +} + +// PerfEventMmapPage is a wrapper +type PerfEventMmapPage struct { + Version uint32 + Compat_version uint32 + Lock uint32 + Index uint32 + Offset int64 + Time_enabled uint64 + Time_running uint64 + Capabilities uint64 + Pmc_width uint16 + Time_shift uint16 + Time_mult uint32 + Time_offset uint64 + Time_zero uint64 + Size uint32 + + Data_head uint64 + Data_tail uint64 + Data_offset uint64 + Data_size uint64 + Aux_head uint64 + Aux_tail uint64 + Aux_offset uint64 + Aux_size uint64 +} + +// SetNonblock is a wrapper +func SetNonblock(fd int, nonblocking bool) (err error) { + return errNonLinux +} + +// Mmap is a wrapper +func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { + return []byte{}, errNonLinux +} + +// Munmap is a wrapper +func Munmap(b []byte) (err error) { + return errNonLinux +} + +// PerfEventAttr is a wrapper +type PerfEventAttr struct { + Type uint32 + Size uint32 + Config uint64 + Sample uint64 + Sample_type uint64 + Read_format uint64 + Bits uint64 + Wakeup uint32 + Bp_type uint32 + Ext1 uint64 + Ext2 uint64 + Branch_sample_type uint64 + Sample_regs_user uint64 + Sample_stack_user uint32 + Clockid int32 + Sample_regs_intr uint64 + Aux_watermark uint32 + Sample_max_stack uint16 +} + +// PerfEventOpen is a wrapper +func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { + return 0, errNonLinux +} diff --git a/vendor/github.com/cilium/ebpf/linker.go b/vendor/github.com/cilium/ebpf/linker.go new file mode 100644 index 0000000..da556c2 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/linker.go @@ -0,0 +1,58 @@ +package ebpf + +import ( + "github.com/cilium/ebpf/asm" +) + +// link resolves bpf-to-bpf calls. +// +// Each section may contain multiple functions / labels, and is only linked +// if the program being edited references one of these functions. +// +// Sections must not require linking themselves. +func link(insns asm.Instructions, sections ...asm.Instructions) (asm.Instructions, error) { + for _, section := range sections { + var err error + insns, err = linkSection(insns, section) + if err != nil { + return nil, err + } + } + return insns, nil +} + +func linkSection(insns, section asm.Instructions) (asm.Instructions, error) { + // A map of symbols to the libraries which contain them. + symbols, err := section.SymbolOffsets() + if err != nil { + return nil, err + } + + for _, ins := range insns { + if ins.Reference == "" { + continue + } + + if ins.OpCode.JumpOp() != asm.Call || ins.Src != asm.R1 { + continue + } + + if ins.Constant != -1 { + // This is already a valid call, no need to link again. + continue + } + + if _, ok := symbols[ins.Reference]; !ok { + // Symbol isn't available in this section + continue + } + + // At this point we know that at least one function in the + // library is called from insns. Merge the two sections. + // The rewrite of ins.Constant happens in asm.Instruction.Marshal. + return append(insns, section...), nil + } + + // None of the functions in the section are called. Do nothing. + return insns, nil +} diff --git a/vendor/github.com/cilium/ebpf/map.go b/vendor/github.com/cilium/ebpf/map.go new file mode 100644 index 0000000..028a913 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/map.go @@ -0,0 +1,595 @@ +package ebpf + +import ( + "fmt" + "unsafe" + + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/unix" + + "github.com/pkg/errors" +) + +// MapSpec defines a Map. +type MapSpec struct { + // Name is passed to the kernel as a debug aid. Must only contain + // alpha numeric and '_' characters. + Name string + Type MapType + KeySize uint32 + ValueSize uint32 + MaxEntries uint32 + Flags uint32 + // InnerMap is used as a template for ArrayOfMaps and HashOfMaps + InnerMap *MapSpec +} + +func (ms *MapSpec) String() string { + return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags) +} + +// Copy returns a copy of the spec. +func (ms *MapSpec) Copy() *MapSpec { + if ms == nil { + return nil + } + + cpy := *ms + cpy.InnerMap = ms.InnerMap.Copy() + return &cpy +} + +// Map represents a Map file descriptor. +// +// It is not safe to close a map which is used by other goroutines. +// +// Methods which take interface{} arguments by default encode +// them using binary.Read/Write in the machine's native endianness. +// +// Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler +// if you require custom encoding. +type Map struct { + fd *bpfFD + abi MapABI + // Per CPU maps return values larger than the size in the spec + fullValueSize int +} + +// NewMapFromFD creates a map from a raw fd. +// +// You should not use fd after calling this function. +func NewMapFromFD(fd int) (*Map, error) { + if fd < 0 { + return nil, errors.New("invalid fd") + } + bpfFd := newBPFFD(uint32(fd)) + + abi, err := newMapABIFromFd(bpfFd) + if err != nil { + bpfFd.forget() + return nil, err + } + return newMap(bpfFd, abi) +} + +// NewMap creates a new Map. +// +// Creating a map for the first time will perform feature detection +// by creating small, temporary maps. +func NewMap(spec *MapSpec) (*Map, error) { + if spec.Type != ArrayOfMaps && spec.Type != HashOfMaps { + return createMap(spec, nil) + } + + if spec.InnerMap == nil { + return nil, errors.Errorf("%s requires InnerMap", spec.Type) + } + + template, err := createMap(spec.InnerMap, nil) + if err != nil { + return nil, err + } + defer template.Close() + + return createMap(spec, template.fd) +} + +func createMap(spec *MapSpec, inner *bpfFD) (*Map, error) { + spec = spec.Copy() + + switch spec.Type { + case ArrayOfMaps: + fallthrough + case HashOfMaps: + if spec.ValueSize != 0 && spec.ValueSize != 4 { + return nil, errors.Errorf("ValueSize must be zero or four for map of map") + } + spec.ValueSize = 4 + + case PerfEventArray: + if spec.KeySize != 0 { + return nil, errors.Errorf("KeySize must be zero for perf event array") + } + if spec.ValueSize != 0 { + return nil, errors.Errorf("ValueSize must be zero for perf event array") + } + if spec.MaxEntries == 0 { + n, err := internal.OnlineCPUs() + if err != nil { + return nil, errors.Wrap(err, "perf event array") + } + spec.MaxEntries = uint32(n) + } + + spec.KeySize = 4 + spec.ValueSize = 4 + } + + attr := bpfMapCreateAttr{ + mapType: spec.Type, + keySize: spec.KeySize, + valueSize: spec.ValueSize, + maxEntries: spec.MaxEntries, + flags: spec.Flags, + } + + if inner != nil { + var err error + attr.innerMapFd, err = inner.value() + if err != nil { + return nil, errors.Wrap(err, "map create") + } + } + + name, err := newBPFObjName(spec.Name) + if err != nil { + return nil, errors.Wrap(err, "map create") + } + + if haveObjName.Result() { + attr.mapName = name + } + + fd, err := bpfMapCreate(&attr) + if err != nil { + return nil, errors.Wrap(err, "map create") + } + + return newMap(fd, newMapABIFromSpec(spec)) +} + +func newMap(fd *bpfFD, abi *MapABI) (*Map, error) { + m := &Map{ + fd, + *abi, + int(abi.ValueSize), + } + + if !abi.Type.hasPerCPUValue() { + return m, nil + } + + possibleCPUs, err := internal.PossibleCPUs() + if err != nil { + return nil, err + } + + m.fullValueSize = align(int(abi.ValueSize), 8) * possibleCPUs + return m, nil +} + +func (m *Map) String() string { + return fmt.Sprintf("%s#%d", m.abi.Type, m.fd) +} + +// ABI gets the ABI of the Map +func (m *Map) ABI() MapABI { + return m.abi +} + +// Lookup retrieves a value from a Map. +// +// Calls Close() on valueOut if it is of type **Map or **Program, +// and *valueOut is not nil. +// +// Returns an error if the key doesn't exist, see IsNotExist. +func (m *Map) Lookup(key, valueOut interface{}) error { + valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) + + if err := m.lookup(key, valuePtr); err != nil { + return err + } + + if valueBytes == nil { + return nil + } + + if m.abi.Type.hasPerCPUValue() { + return unmarshalPerCPUValue(valueOut, int(m.abi.ValueSize), valueBytes) + } + + switch value := valueOut.(type) { + case **Map: + m, err := unmarshalMap(valueBytes) + if err != nil { + return err + } + + (*value).Close() + *value = m + return nil + case *Map: + return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil)) + case Map: + return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil)) + + case **Program: + p, err := unmarshalProgram(valueBytes) + if err != nil { + return err + } + + (*value).Close() + *value = p + return nil + case *Program: + return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil)) + case Program: + return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil)) + + default: + return unmarshalBytes(valueOut, valueBytes) + } +} + +// LookupBytes gets a value from Map. +// +// Returns a nil value if a key doesn't exist. +func (m *Map) LookupBytes(key interface{}) ([]byte, error) { + valueBytes := make([]byte, m.fullValueSize) + valuePtr := newPtr(unsafe.Pointer(&valueBytes[0])) + + err := m.lookup(key, valuePtr) + if IsNotExist(err) { + return nil, nil + } + + return valueBytes, err +} + +func (m *Map) lookup(key interface{}, valueOut syscallPtr) error { + keyPtr, err := marshalPtr(key, int(m.abi.KeySize)) + if err != nil { + return errors.WithMessage(err, "can't marshal key") + } + + err = bpfMapLookupElem(m.fd, keyPtr, valueOut) + return errors.WithMessage(err, "lookup failed") +} + +// MapUpdateFlags controls the behaviour of the Map.Update call. +// +// The exact semantics depend on the specific MapType. +type MapUpdateFlags uint64 + +const ( + // UpdateAny creates a new element or update an existing one. + UpdateAny MapUpdateFlags = iota + // UpdateNoExist creates a new element. + UpdateNoExist MapUpdateFlags = 1 << (iota - 1) + // UpdateExist updates an existing element. + UpdateExist +) + +// Put replaces or creates a value in map. +// +// It is equivalent to calling Update with UpdateAny. +func (m *Map) Put(key, value interface{}) error { + return m.Update(key, value, UpdateAny) +} + +// Update changes the value of a key. +func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error { + keyPtr, err := marshalPtr(key, int(m.abi.KeySize)) + if err != nil { + return errors.WithMessage(err, "can't marshal key") + } + + var valuePtr syscallPtr + if m.abi.Type.hasPerCPUValue() { + valuePtr, err = marshalPerCPUValue(value, int(m.abi.ValueSize)) + } else { + valuePtr, err = marshalPtr(value, int(m.abi.ValueSize)) + } + if err != nil { + return errors.WithMessage(err, "can't marshal value") + } + + return bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags)) +} + +// Delete removes a value. +// +// Returns an error if the key does not exist, see IsNotExist. +func (m *Map) Delete(key interface{}) error { + keyPtr, err := marshalPtr(key, int(m.abi.KeySize)) + if err != nil { + return errors.WithMessage(err, "can't marshal key") + } + + err = bpfMapDeleteElem(m.fd, keyPtr) + return errors.WithMessage(err, "can't delete key") +} + +// NextKey finds the key following an initial key. +// +// See NextKeyBytes for details. +func (m *Map) NextKey(key, nextKeyOut interface{}) error { + nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.abi.KeySize)) + + if err := m.nextKey(key, nextKeyPtr); err != nil { + return err + } + + if nextKeyBytes == nil { + return nil + } + + err := unmarshalBytes(nextKeyOut, nextKeyBytes) + return errors.WithMessage(err, "can't unmarshal next key") +} + +// NextKeyBytes returns the key following an initial key as a byte slice. +// +// Passing nil will return the first key. +// +// Use Iterate if you want to traverse all entries in the map. +func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) { + nextKey := make([]byte, m.abi.KeySize) + nextKeyPtr := newPtr(unsafe.Pointer(&nextKey[0])) + + err := m.nextKey(key, nextKeyPtr) + if IsNotExist(err) { + return nil, nil + } + + return nextKey, err +} + +func (m *Map) nextKey(key interface{}, nextKeyOut syscallPtr) error { + var ( + keyPtr syscallPtr + err error + ) + + if key != nil { + keyPtr, err = marshalPtr(key, int(m.abi.KeySize)) + if err != nil { + return errors.WithMessage(err, "can't marshal key") + } + } + + err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut) + return errors.WithMessage(err, "can't get next key") +} + +// Iterate traverses a map. +// +// It's safe to create multiple iterators at the same time. +// +// It's not possible to guarantee that all keys in a map will be +// returned if there are concurrent modifications to the map. +func (m *Map) Iterate() *MapIterator { + return newMapIterator(m) +} + +// Close removes a Map +func (m *Map) Close() error { + if m == nil { + // This makes it easier to clean up when iterating maps + // of maps / programs. + return nil + } + + return m.fd.close() +} + +// FD gets the file descriptor of the Map. +// +// Calling this function is invalid after Close has been called. +func (m *Map) FD() int { + fd, err := m.fd.value() + if err != nil { + // Best effort: -1 is the number most likely to be an + // invalid file descriptor. + return -1 + } + + return int(fd) +} + +// Clone creates a duplicate of the Map. +// +// Closing the duplicate does not affect the original, and vice versa. +// Changes made to the map are reflected by both instances however. +// +// Cloning a nil Map returns nil. +func (m *Map) Clone() (*Map, error) { + if m == nil { + return nil, nil + } + + dup, err := m.fd.dup() + if err != nil { + return nil, errors.Wrap(err, "can't clone map") + } + + return newMap(dup, &m.abi) +} + +// Pin persists the map past the lifetime of the process that created it. +// +// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional +func (m *Map) Pin(fileName string) error { + return bpfPinObject(fileName, m.fd) +} + +// LoadPinnedMap load a Map from a BPF file. +// +// Requires at least Linux 4.13, and is not compatible with +// nested maps. Use LoadPinnedMapExplicit in these situations. +func LoadPinnedMap(fileName string) (*Map, error) { + fd, err := bpfGetObject(fileName) + if err != nil { + return nil, err + } + abi, err := newMapABIFromFd(fd) + if err != nil { + _ = fd.close() + return nil, err + } + return newMap(fd, abi) +} + +// LoadPinnedMapExplicit loads a map with explicit parameters. +func LoadPinnedMapExplicit(fileName string, abi *MapABI) (*Map, error) { + fd, err := bpfGetObject(fileName) + if err != nil { + return nil, err + } + return newMap(fd, abi) +} + +func unmarshalMap(buf []byte) (*Map, error) { + if len(buf) != 4 { + return nil, errors.New("map id requires 4 byte value") + } + + // Looking up an entry in a nested map or prog array returns an id, + // not an fd. + id := internal.NativeEndian.Uint32(buf) + fd, err := bpfGetMapFDByID(id) + if err != nil { + return nil, err + } + + abi, err := newMapABIFromFd(fd) + if err != nil { + _ = fd.close() + return nil, err + } + + return newMap(fd, abi) +} + +// MarshalBinary implements BinaryMarshaler. +func (m *Map) MarshalBinary() ([]byte, error) { + fd, err := m.fd.value() + if err != nil { + return nil, err + } + + buf := make([]byte, 4) + internal.NativeEndian.PutUint32(buf, fd) + return buf, nil +} + +// MapIterator iterates a Map. +// +// See Map.Iterate. +type MapIterator struct { + target *Map + prevKey interface{} + prevBytes []byte + count, maxEntries uint32 + done bool + err error +} + +func newMapIterator(target *Map) *MapIterator { + return &MapIterator{ + target: target, + maxEntries: target.abi.MaxEntries, + prevBytes: make([]byte, int(target.abi.KeySize)), + } +} + +var errIterationAborted = errors.New("iteration aborted") + +// Next decodes the next key and value. +// +// Iterating a hash map from which keys are being deleted is not +// safe. You may see the same key multiple times. Iteration may +// also abort with an error, see IsIterationAborted. +// +// Returns false if there are no more entries. You must check +// the result of Err afterwards. +// +// See Map.Get for further caveats around valueOut. +func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool { + if mi.err != nil || mi.done { + return false + } + + for ; mi.count < mi.maxEntries; mi.count++ { + var nextBytes []byte + nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey) + if mi.err != nil { + return false + } + + if nextBytes == nil { + mi.done = true + return false + } + + // The user can get access to nextBytes since unmarshalBytes + // does not copy when unmarshaling into a []byte. + // Make a copy to prevent accidental corruption of + // iterator state. + copy(mi.prevBytes, nextBytes) + mi.prevKey = mi.prevBytes + + mi.err = mi.target.Lookup(nextBytes, valueOut) + if IsNotExist(mi.err) { + // Even though the key should be valid, we couldn't look up + // its value. If we're iterating a hash map this is probably + // because a concurrent delete removed the value before we + // could get it. This means that the next call to NextKeyBytes + // is very likely to restart iteration. + // If we're iterating one of the fd maps like + // ProgramArray it means that a given slot doesn't have + // a valid fd associated. It's OK to continue to the next slot. + continue + } + if mi.err != nil { + return false + } + + mi.err = unmarshalBytes(keyOut, nextBytes) + return mi.err == nil + } + + mi.err = errIterationAborted + return false +} + +// Err returns any encountered error. +// +// The method must be called after Next returns nil. +func (mi *MapIterator) Err() error { + return mi.err +} + +// IsNotExist returns true if the error indicates that a +// key doesn't exist. +func IsNotExist(err error) bool { + return errors.Cause(err) == unix.ENOENT +} + +// IsIterationAborted returns true if the iteration was aborted. +// +// This occurs when keys are deleted from a hash map during iteration. +func IsIterationAborted(err error) bool { + return errors.Cause(err) == errIterationAborted +} diff --git a/vendor/github.com/cilium/ebpf/marshalers.go b/vendor/github.com/cilium/ebpf/marshalers.go new file mode 100644 index 0000000..44ba273 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/marshalers.go @@ -0,0 +1,192 @@ +package ebpf + +import ( + "bytes" + "encoding" + "encoding/binary" + "reflect" + "runtime" + "unsafe" + + "github.com/cilium/ebpf/internal" + + "github.com/pkg/errors" +) + +func marshalPtr(data interface{}, length int) (syscallPtr, error) { + if ptr, ok := data.(unsafe.Pointer); ok { + return newPtr(ptr), nil + } + + buf, err := marshalBytes(data, length) + if err != nil { + return syscallPtr{}, err + } + + return newPtr(unsafe.Pointer(&buf[0])), nil +} + +func marshalBytes(data interface{}, length int) (buf []byte, err error) { + switch value := data.(type) { + case encoding.BinaryMarshaler: + buf, err = value.MarshalBinary() + case string: + buf = []byte(value) + case []byte: + buf = value + case unsafe.Pointer: + err = errors.New("can't marshal from unsafe.Pointer") + default: + var wr bytes.Buffer + err = binary.Write(&wr, internal.NativeEndian, value) + err = errors.Wrapf(err, "encoding %T", value) + buf = wr.Bytes() + } + if err != nil { + return nil, err + } + + if len(buf) != length { + return nil, errors.Errorf("%T doesn't marshal to %d bytes", data, length) + } + return buf, nil +} + +func makeBuffer(dst interface{}, length int) (syscallPtr, []byte) { + if ptr, ok := dst.(unsafe.Pointer); ok { + return newPtr(ptr), nil + } + + buf := make([]byte, length) + return newPtr(unsafe.Pointer(&buf[0])), buf +} + +func unmarshalBytes(data interface{}, buf []byte) error { + switch value := data.(type) { + case unsafe.Pointer: + sh := &reflect.SliceHeader{ + Data: uintptr(value), + Len: len(buf), + Cap: len(buf), + } + + dst := *(*[]byte)(unsafe.Pointer(sh)) + copy(dst, buf) + runtime.KeepAlive(value) + return nil + case encoding.BinaryUnmarshaler: + return value.UnmarshalBinary(buf) + case *string: + *value = string(buf) + return nil + case *[]byte: + *value = buf + return nil + case string: + return errors.New("require pointer to string") + case []byte: + return errors.New("require pointer to []byte") + default: + rd := bytes.NewReader(buf) + err := binary.Read(rd, internal.NativeEndian, value) + return errors.Wrapf(err, "decoding %T", value) + } +} + +// marshalPerCPUValue encodes a slice containing one value per +// possible CPU into a buffer of bytes. +// +// Values are initialized to zero if the slice has less elements than CPUs. +// +// slice must have a type like []elementType. +func marshalPerCPUValue(slice interface{}, elemLength int) (syscallPtr, error) { + sliceType := reflect.TypeOf(slice) + if sliceType.Kind() != reflect.Slice { + return syscallPtr{}, errors.New("per-CPU value requires slice") + } + + possibleCPUs, err := internal.PossibleCPUs() + if err != nil { + return syscallPtr{}, err + } + + sliceValue := reflect.ValueOf(slice) + sliceLen := sliceValue.Len() + if sliceLen > possibleCPUs { + return syscallPtr{}, errors.Errorf("per-CPU value exceeds number of CPUs") + } + + alignedElemLength := align(elemLength, 8) + buf := make([]byte, alignedElemLength*possibleCPUs) + + for i := 0; i < sliceLen; i++ { + elem := sliceValue.Index(i).Interface() + elemBytes, err := marshalBytes(elem, elemLength) + if err != nil { + return syscallPtr{}, err + } + + offset := i * alignedElemLength + copy(buf[offset:offset+elemLength], elemBytes) + } + + return newPtr(unsafe.Pointer(&buf[0])), nil +} + +// unmarshalPerCPUValue decodes a buffer into a slice containing one value per +// possible CPU. +// +// valueOut must have a type like *[]elementType +func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error { + slicePtrType := reflect.TypeOf(slicePtr) + if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice { + return errors.Errorf("per-cpu value requires pointer to slice") + } + + possibleCPUs, err := internal.PossibleCPUs() + if err != nil { + return err + } + + sliceType := slicePtrType.Elem() + slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs) + + sliceElemType := sliceType.Elem() + sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr + if sliceElemIsPointer { + sliceElemType = sliceElemType.Elem() + } + + step := len(buf) / possibleCPUs + if step < elemLength { + return errors.Errorf("per-cpu element length is larger than available data") + } + for i := 0; i < possibleCPUs; i++ { + var elem interface{} + if sliceElemIsPointer { + newElem := reflect.New(sliceElemType) + slice.Index(i).Set(newElem) + elem = newElem.Interface() + } else { + elem = slice.Index(i).Addr().Interface() + } + + // Make a copy, since unmarshal can hold on to itemBytes + elemBytes := make([]byte, elemLength) + copy(elemBytes, buf[:elemLength]) + + err := unmarshalBytes(elem, elemBytes) + if err != nil { + return errors.Wrapf(err, "cpu %d", i) + } + + buf = buf[step:] + } + + reflect.ValueOf(slicePtr).Elem().Set(slice) + return nil +} + +func align(n, alignment int) int { + return (int(n) + alignment - 1) / alignment * alignment +} diff --git a/vendor/github.com/cilium/ebpf/prog.go b/vendor/github.com/cilium/ebpf/prog.go new file mode 100644 index 0000000..03b24fb --- /dev/null +++ b/vendor/github.com/cilium/ebpf/prog.go @@ -0,0 +1,523 @@ +package ebpf + +import ( + "bytes" + "fmt" + "math" + "path/filepath" + "strings" + "time" + "unsafe" + + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/unix" + + "github.com/pkg/errors" +) + +var ( + errNotSupported = errors.New("ebpf: not supported by kernel") +) + +const ( + // Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN. + // This is currently the maximum of spare space allocated for SKB + // and XDP programs, and equal to XDP_PACKET_HEADROOM + NET_IP_ALIGN. + outputPad = 256 + 2 +) + +// DefaultVerifierLogSize is the default number of bytes allocated for the +// verifier log. +const DefaultVerifierLogSize = 64 * 1024 + +// ProgramOptions control loading a program into the kernel. +type ProgramOptions struct { + // Controls the detail emitted by the kernel verifier. Set to non-zero + // to enable logging. + LogLevel uint32 + // Controls the output buffer size for the verifier. Defaults to + // DefaultVerifierLogSize. + LogSize int +} + +// ProgramSpec defines a Program +type ProgramSpec struct { + // Name is passed to the kernel as a debug aid. Must only contain + // alpha numeric and '_' characters. + Name string + Type ProgramType + AttachType AttachType + Instructions asm.Instructions + License string + KernelVersion uint32 +} + +// Copy returns a copy of the spec. +func (ps *ProgramSpec) Copy() *ProgramSpec { + if ps == nil { + return nil + } + + cpy := *ps + cpy.Instructions = make(asm.Instructions, len(ps.Instructions)) + copy(cpy.Instructions, ps.Instructions) + return &cpy +} + +// Program represents BPF program loaded into the kernel. +// +// It is not safe to close a Program which is used by other goroutines. +type Program struct { + // Contains the output of the kernel verifier if enabled, + // otherwise it is empty. + VerifierLog string + + fd *bpfFD + name string + abi ProgramABI +} + +// NewProgram creates a new Program. +// +// Loading a program for the first time will perform +// feature detection by loading small, temporary programs. +func NewProgram(spec *ProgramSpec) (*Program, error) { + return NewProgramWithOptions(spec, ProgramOptions{}) +} + +// NewProgramWithOptions creates a new Program. +// +// Loading a program for the first time will perform +// feature detection by loading small, temporary programs. +func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) { + attr, err := convertProgramSpec(spec, haveObjName.Result()) + if err != nil { + return nil, err + } + + logSize := DefaultVerifierLogSize + if opts.LogSize > 0 { + logSize = opts.LogSize + } + + var logBuf []byte + if opts.LogLevel > 0 { + logBuf = make([]byte, logSize) + attr.logLevel = opts.LogLevel + attr.logSize = uint32(len(logBuf)) + attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0])) + } + + fd, err := bpfProgLoad(attr) + if err == nil { + prog := newProgram(fd, spec.Name, &ProgramABI{spec.Type}) + prog.VerifierLog = convertCString(logBuf) + return prog, nil + } + + truncated := errors.Cause(err) == unix.ENOSPC + if opts.LogLevel == 0 { + // Re-run with the verifier enabled to get better error messages. + logBuf = make([]byte, logSize) + attr.logLevel = 1 + attr.logSize = uint32(len(logBuf)) + attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0])) + + _, nerr := bpfProgLoad(attr) + truncated = errors.Cause(nerr) == unix.ENOSPC + } + + logs := convertCString(logBuf) + if truncated { + logs += "\n(truncated...)" + } + + return nil, &loadError{err, logs} +} + +// NewProgramFromFD creates a program from a raw fd. +// +// You should not use fd after calling this function. +func NewProgramFromFD(fd int) (*Program, error) { + if fd < 0 { + return nil, errors.New("invalid fd") + } + bpfFd := newBPFFD(uint32(fd)) + + info, err := bpfGetProgInfoByFD(bpfFd) + if err != nil { + bpfFd.forget() + return nil, err + } + + var name string + if bpfName := convertCString(info.name[:]); bpfName != "" { + name = bpfName + } else { + name = convertCString(info.tag[:]) + } + + return newProgram(bpfFd, name, newProgramABIFromInfo(info)), nil +} + +func newProgram(fd *bpfFD, name string, abi *ProgramABI) *Program { + return &Program{ + name: name, + fd: fd, + abi: *abi, + } +} + +func convertProgramSpec(spec *ProgramSpec, includeName bool) (*bpfProgLoadAttr, error) { + if len(spec.Instructions) == 0 { + return nil, errors.New("Instructions cannot be empty") + } + + if len(spec.License) == 0 { + return nil, errors.New("License cannot be empty") + } + + buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize)) + err := spec.Instructions.Marshal(buf, internal.NativeEndian) + if err != nil { + return nil, err + } + + bytecode := buf.Bytes() + insCount := uint32(len(bytecode) / asm.InstructionSize) + lic := []byte(spec.License) + attr := &bpfProgLoadAttr{ + progType: spec.Type, + expectedAttachType: spec.AttachType, + insCount: insCount, + instructions: newPtr(unsafe.Pointer(&bytecode[0])), + license: newPtr(unsafe.Pointer(&lic[0])), + } + + name, err := newBPFObjName(spec.Name) + if err != nil { + return nil, err + } + + if includeName { + attr.progName = name + } + + return attr, nil +} + +func (p *Program) String() string { + if p.name != "" { + return fmt.Sprintf("%s(%s)#%s", p.abi.Type, p.name, p.fd) + } + return fmt.Sprintf("%s#%s", p.abi.Type, p.fd) +} + +// ABI gets the ABI of the Program +func (p *Program) ABI() ProgramABI { + return p.abi +} + +// FD gets the file descriptor of the Program. +// +// It is invalid to call this function after Close has been called. +func (p *Program) FD() int { + fd, err := p.fd.value() + if err != nil { + // Best effort: -1 is the number most likely to be an + // invalid file descriptor. + return -1 + } + + return int(fd) +} + +// Clone creates a duplicate of the Program. +// +// Closing the duplicate does not affect the original, and vice versa. +// +// Cloning a nil Program returns nil. +func (p *Program) Clone() (*Program, error) { + if p == nil { + return nil, nil + } + + dup, err := p.fd.dup() + if err != nil { + return nil, errors.Wrap(err, "can't clone program") + } + + return newProgram(dup, p.name, &p.abi), nil +} + +// Pin persists the Program past the lifetime of the process that created it +// +// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional +func (p *Program) Pin(fileName string) error { + return errors.Wrap(bpfPinObject(fileName, p.fd), "can't pin program") +} + +// Close unloads the program from the kernel. +func (p *Program) Close() error { + if p == nil { + return nil + } + + return p.fd.close() +} + +// Test runs the Program in the kernel with the given input and returns the +// value returned by the eBPF program. outLen may be zero. +// +// Note: the kernel expects at least 14 bytes input for an ethernet header for +// XDP and SKB programs. +// +// This function requires at least Linux 4.12. +func (p *Program) Test(in []byte) (uint32, []byte, error) { + ret, out, _, err := p.testRun(in, 1) + return ret, out, err +} + +// Benchmark runs the Program with the given input for a number of times +// and returns the time taken per iteration. +// +// The returned value is the return value of the last execution of +// the program. +// +// This function requires at least Linux 4.12. +func (p *Program) Benchmark(in []byte, repeat int) (uint32, time.Duration, error) { + ret, _, total, err := p.testRun(in, repeat) + return ret, total, err +} + +var noProgTestRun = featureTest{ + Fn: func() bool { + prog, err := NewProgram(&ProgramSpec{ + Type: SocketFilter, + Instructions: asm.Instructions{ + asm.LoadImm(asm.R0, 0, asm.DWord), + asm.Return(), + }, + License: "MIT", + }) + if err != nil { + // This may be because we lack sufficient permissions, etc. + return false + } + defer prog.Close() + + fd, err := prog.fd.value() + if err != nil { + return false + } + + // Programs require at least 14 bytes input + in := make([]byte, 14) + attr := bpfProgTestRunAttr{ + fd: fd, + dataSizeIn: uint32(len(in)), + dataIn: newPtr(unsafe.Pointer(&in[0])), + } + + _, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) + return errors.Cause(err) == unix.EINVAL + }, +} + +func (p *Program) testRun(in []byte, repeat int) (uint32, []byte, time.Duration, error) { + if uint(repeat) > math.MaxUint32 { + return 0, nil, 0, fmt.Errorf("repeat is too high") + } + + if len(in) == 0 { + return 0, nil, 0, fmt.Errorf("missing input") + } + + if uint(len(in)) > math.MaxUint32 { + return 0, nil, 0, fmt.Errorf("input is too long") + } + + if noProgTestRun.Result() { + return 0, nil, 0, errNotSupported + } + + // Older kernels ignore the dataSizeOut argument when copying to user space. + // Combined with things like bpf_xdp_adjust_head() we don't really know what the final + // size will be. Hence we allocate an output buffer which we hope will always be large + // enough, and panic if the kernel wrote past the end of the allocation. + // See https://patchwork.ozlabs.org/cover/1006822/ + out := make([]byte, len(in)+outputPad) + + fd, err := p.fd.value() + if err != nil { + return 0, nil, 0, err + } + + attr := bpfProgTestRunAttr{ + fd: fd, + dataSizeIn: uint32(len(in)), + dataSizeOut: uint32(len(out)), + dataIn: newPtr(unsafe.Pointer(&in[0])), + dataOut: newPtr(unsafe.Pointer(&out[0])), + repeat: uint32(repeat), + } + + _, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) + if err != nil { + return 0, nil, 0, errors.Wrap(err, "can't run test") + } + + if int(attr.dataSizeOut) > cap(out) { + // Houston, we have a problem. The program created more data than we allocated, + // and the kernel wrote past the end of our buffer. + panic("kernel wrote past end of output buffer") + } + out = out[:int(attr.dataSizeOut)] + + total := time.Duration(attr.duration) * time.Nanosecond + return attr.retval, out, total, nil +} + +func unmarshalProgram(buf []byte) (*Program, error) { + if len(buf) != 4 { + return nil, errors.New("program id requires 4 byte value") + } + + // Looking up an entry in a nested map or prog array returns an id, + // not an fd. + id := internal.NativeEndian.Uint32(buf) + fd, err := bpfGetProgramFDByID(id) + if err != nil { + return nil, err + } + + abi, err := newProgramABIFromFd(fd) + if err != nil { + _ = fd.close() + return nil, err + } + + return newProgram(fd, "", abi), nil +} + +// MarshalBinary implements BinaryMarshaler. +func (p *Program) MarshalBinary() ([]byte, error) { + value, err := p.fd.value() + if err != nil { + return nil, err + } + + buf := make([]byte, 4) + internal.NativeEndian.PutUint32(buf, value) + return buf, nil +} + +// Attach a Program to a container object fd +func (p *Program) Attach(fd int, typ AttachType, flags AttachFlags) error { + if fd < 0 { + return errors.New("invalid fd") + } + + pfd, err := p.fd.value() + if err != nil { + return err + } + + attr := bpfProgAlterAttr{ + targetFd: uint32(fd), + attachBpfFd: pfd, + attachType: uint32(typ), + attachFlags: uint32(flags), + } + + return bpfProgAlter(_ProgAttach, &attr) +} + +// Detach a Program from a container object fd +func (p *Program) Detach(fd int, typ AttachType, flags AttachFlags) error { + if fd < 0 { + return errors.New("invalid fd") + } + + pfd, err := p.fd.value() + if err != nil { + return err + } + + attr := bpfProgAlterAttr{ + targetFd: uint32(fd), + attachBpfFd: pfd, + attachType: uint32(typ), + attachFlags: uint32(flags), + } + + return bpfProgAlter(_ProgDetach, &attr) +} + +// LoadPinnedProgram loads a Program from a BPF file. +// +// Requires at least Linux 4.13, use LoadPinnedProgramExplicit on +// earlier versions. +func LoadPinnedProgram(fileName string) (*Program, error) { + fd, err := bpfGetObject(fileName) + if err != nil { + return nil, err + } + + abi, err := newProgramABIFromFd(fd) + if err != nil { + _ = fd.close() + return nil, err + } + + return newProgram(fd, filepath.Base(fileName), abi), nil +} + +// LoadPinnedProgramExplicit loads a program with explicit parameters. +func LoadPinnedProgramExplicit(fileName string, abi *ProgramABI) (*Program, error) { + fd, err := bpfGetObject(fileName) + if err != nil { + return nil, err + } + + return newProgram(fd, filepath.Base(fileName), abi), nil +} + +// SanitizeName replaces all invalid characters in name. +// +// Use this to automatically generate valid names for maps and +// programs at run time. +// +// Passing a negative value for replacement will delete characters +// instead of replacing them. +func SanitizeName(name string, replacement rune) string { + return strings.Map(func(char rune) rune { + if invalidBPFObjNameChar(char) { + return replacement + } + return char + }, name) +} + +type loadError struct { + cause error + verifierLog string +} + +func (le *loadError) Error() string { + if le.verifierLog == "" { + return fmt.Sprintf("failed to load program: %s", le.cause) + } + return fmt.Sprintf("failed to load program: %s: %s", le.cause, le.verifierLog) +} + +func (le *loadError) Cause() error { + return le.cause +} + +// IsNotSupported returns true if an error occurred because +// the kernel does not have support for a specific feature. +func IsNotSupported(err error) bool { + return errors.Cause(err) == errNotSupported +} diff --git a/vendor/github.com/cilium/ebpf/ptr_32_be.go b/vendor/github.com/cilium/ebpf/ptr_32_be.go new file mode 100644 index 0000000..7757744 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/ptr_32_be.go @@ -0,0 +1,14 @@ +// +build armbe mips mips64p32 + +package ebpf + +import ( + "unsafe" +) + +// ptr wraps an unsafe.Pointer to be 64bit to +// conform to the syscall specification. +type syscallPtr struct { + pad uint32 + ptr unsafe.Pointer +} diff --git a/vendor/github.com/cilium/ebpf/ptr_32_le.go b/vendor/github.com/cilium/ebpf/ptr_32_le.go new file mode 100644 index 0000000..14b805e --- /dev/null +++ b/vendor/github.com/cilium/ebpf/ptr_32_le.go @@ -0,0 +1,14 @@ +// +build 386 amd64p32 arm mipsle mips64p32le + +package ebpf + +import ( + "unsafe" +) + +// ptr wraps an unsafe.Pointer to be 64bit to +// conform to the syscall specification. +type syscallPtr struct { + ptr unsafe.Pointer + pad uint32 +} diff --git a/vendor/github.com/cilium/ebpf/ptr_64.go b/vendor/github.com/cilium/ebpf/ptr_64.go new file mode 100644 index 0000000..c897d72 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/ptr_64.go @@ -0,0 +1,14 @@ +// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le +// +build !armbe,!mips,!mips64p32 + +package ebpf + +import ( + "unsafe" +) + +// ptr wraps an unsafe.Pointer to be 64bit to +// conform to the syscall specification. +type syscallPtr struct { + ptr unsafe.Pointer +} diff --git a/vendor/github.com/cilium/ebpf/readme.md b/vendor/github.com/cilium/ebpf/readme.md new file mode 100644 index 0000000..26ab2b9 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/readme.md @@ -0,0 +1,20 @@ +eBPF +------- +[![](https://godoc.org/github.com/cilium/ebpf?status.svg)](https://godoc.org/github.com/cilium/ebpf) + +eBPF is a pure Go library that provides utilities for loading, compiling, and debugging eBPF programs. It has minimal external dependencies and is intended to be used in long running processes. + +[ebpf/asm](https://godoc.org/github.com/cilium/ebpf/asm) contains a basic assembler. + +The library is maintained by [Cloudflare](https://www.cloudflare.com) and [Cilium](https://www.cilium.io). Feel free to [join](https://cilium.herokuapp.com/) the [libbpf-go](https://cilium.slack.com/messages/libbpf-go) channel on Slack. + +## Current status + +The package is production ready, but **the API is explicitly unstable +right now**. Expect to update your code if you want to follow along. + +## Useful resources + +* [Cilium eBPF documentation](https://cilium.readthedocs.io/en/latest/bpf/#bpf-guide) (recommended) +* [Linux documentation on BPF](http://elixir.free-electrons.com/linux/latest/source/Documentation/networking/filter.txt) +* [eBPF features by Linux version](https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md) diff --git a/vendor/github.com/cilium/ebpf/syscalls.go b/vendor/github.com/cilium/ebpf/syscalls.go new file mode 100644 index 0000000..68abd3b --- /dev/null +++ b/vendor/github.com/cilium/ebpf/syscalls.go @@ -0,0 +1,420 @@ +package ebpf + +import ( + "bytes" + "path/filepath" + "runtime" + "strconv" + "strings" + "unsafe" + + "github.com/cilium/ebpf/internal/unix" + + "github.com/pkg/errors" +) + +var errClosedFd = errors.New("use of closed file descriptor") + +type bpfFD struct { + raw int64 +} + +func newBPFFD(value uint32) *bpfFD { + fd := &bpfFD{int64(value)} + runtime.SetFinalizer(fd, (*bpfFD).close) + return fd +} + +func (fd *bpfFD) String() string { + return strconv.FormatInt(fd.raw, 10) +} + +func (fd *bpfFD) value() (uint32, error) { + if fd.raw < 0 { + return 0, errClosedFd + } + + return uint32(fd.raw), nil +} + +func (fd *bpfFD) close() error { + if fd.raw < 0 { + return nil + } + + value := int(fd.raw) + fd.raw = -1 + + fd.forget() + return unix.Close(value) +} + +func (fd *bpfFD) forget() { + runtime.SetFinalizer(fd, nil) +} + +func (fd *bpfFD) dup() (*bpfFD, error) { + if fd.raw < 0 { + return nil, errClosedFd + } + + dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 0) + if err != nil { + return nil, errors.Wrap(err, "can't dup fd") + } + + return newBPFFD(uint32(dup)), nil +} + +// bpfObjName is a null-terminated string made up of +// 'A-Za-z0-9_' characters. +type bpfObjName [unix.BPF_OBJ_NAME_LEN]byte + +// newBPFObjName truncates the result if it is too long. +func newBPFObjName(name string) (bpfObjName, error) { + idx := strings.IndexFunc(name, invalidBPFObjNameChar) + if idx != -1 { + return bpfObjName{}, errors.Errorf("invalid character '%c' in name '%s'", name[idx], name) + } + + var result bpfObjName + copy(result[:unix.BPF_OBJ_NAME_LEN-1], name) + return result, nil +} + +func invalidBPFObjNameChar(char rune) bool { + switch { + case char >= 'A' && char <= 'Z': + fallthrough + case char >= 'a' && char <= 'z': + fallthrough + case char >= '0' && char <= '9': + fallthrough + case char == '_': + return false + default: + return true + } +} + +type bpfMapCreateAttr struct { + mapType MapType + keySize uint32 + valueSize uint32 + maxEntries uint32 + flags uint32 + innerMapFd uint32 // since 4.12 56f668dfe00d + numaNode uint32 // since 4.14 96eabe7a40aa + mapName bpfObjName // since 4.15 ad5b177bd73f +} + +type bpfMapOpAttr struct { + mapFd uint32 + padding uint32 + key syscallPtr + value syscallPtr + flags uint64 +} + +type bpfMapInfo struct { + mapType uint32 + id uint32 + keySize uint32 + valueSize uint32 + maxEntries uint32 + flags uint32 + mapName bpfObjName // since 4.15 ad5b177bd73f +} + +type bpfPinObjAttr struct { + fileName syscallPtr + fd uint32 + padding uint32 +} + +type bpfProgLoadAttr struct { + progType ProgramType + insCount uint32 + instructions syscallPtr + license syscallPtr + logLevel uint32 + logSize uint32 + logBuf syscallPtr + kernelVersion uint32 // since 4.1 2541517c32be + progFlags uint32 // since 4.11 e07b98d9bffe + progName bpfObjName // since 4.15 067cae47771c + progIfIndex uint32 // since 4.15 1f6f4cb7ba21 + expectedAttachType AttachType // since 4.17 5e43f899b03a +} + +type bpfProgInfo struct { + progType uint32 + id uint32 + tag [unix.BPF_TAG_SIZE]byte + jitedLen uint32 + xlatedLen uint32 + jited syscallPtr + xlated syscallPtr + loadTime uint64 // since 4.15 cb4d2b3f03d8 + createdByUID uint32 + nrMapIDs uint32 + mapIds syscallPtr + name bpfObjName +} + +type bpfProgTestRunAttr struct { + fd uint32 + retval uint32 + dataSizeIn uint32 + dataSizeOut uint32 + dataIn syscallPtr + dataOut syscallPtr + repeat uint32 + duration uint32 +} + +type bpfProgAlterAttr struct { + targetFd uint32 + attachBpfFd uint32 + attachType uint32 + attachFlags uint32 +} + +type bpfObjGetInfoByFDAttr struct { + fd uint32 + infoLen uint32 + info syscallPtr // May be either bpfMapInfo or bpfProgInfo +} + +type bpfGetFDByIDAttr struct { + id uint32 + next uint32 +} + +func newPtr(ptr unsafe.Pointer) syscallPtr { + return syscallPtr{ptr: ptr} +} + +func bpfProgLoad(attr *bpfProgLoadAttr) (*bpfFD, error) { + for { + fd, err := bpfCall(_ProgLoad, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + // As of ~4.20 the verifier can be interrupted by a signal, + // and returns EAGAIN in that case. + if err == unix.EAGAIN { + continue + } + + if err != nil { + return nil, err + } + + return newBPFFD(uint32(fd)), nil + } +} + +func bpfProgAlter(cmd int, attr *bpfProgAlterAttr) error { + _, err := bpfCall(cmd, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + return err +} + +func bpfMapCreate(attr *bpfMapCreateAttr) (*bpfFD, error) { + fd, err := bpfCall(_MapCreate, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) + if err != nil { + return nil, err + } + + return newBPFFD(uint32(fd)), nil +} + +func bpfMapLookupElem(m *bpfFD, key, valueOut syscallPtr) error { + fd, err := m.value() + if err != nil { + return err + } + + attr := bpfMapOpAttr{ + mapFd: fd, + key: key, + value: valueOut, + } + _, err = bpfCall(_MapLookupElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) + return err +} + +func bpfMapUpdateElem(m *bpfFD, key, valueOut syscallPtr, flags uint64) error { + fd, err := m.value() + if err != nil { + return err + } + + attr := bpfMapOpAttr{ + mapFd: fd, + key: key, + value: valueOut, + flags: flags, + } + _, err = bpfCall(_MapUpdateElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) + return err +} + +func bpfMapDeleteElem(m *bpfFD, key syscallPtr) error { + fd, err := m.value() + if err != nil { + return err + } + + attr := bpfMapOpAttr{ + mapFd: fd, + key: key, + } + _, err = bpfCall(_MapDeleteElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) + return err +} + +func bpfMapGetNextKey(m *bpfFD, key, nextKeyOut syscallPtr) error { + fd, err := m.value() + if err != nil { + return err + } + + attr := bpfMapOpAttr{ + mapFd: fd, + key: key, + value: nextKeyOut, + } + _, err = bpfCall(_MapGetNextKey, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) + return err +} + +const bpfFSType = 0xcafe4a11 + +func bpfPinObject(fileName string, fd *bpfFD) error { + dirName := filepath.Dir(fileName) + var statfs unix.Statfs_t + if err := unix.Statfs(dirName, &statfs); err != nil { + return err + } + if uint64(statfs.Type) != bpfFSType { + return errors.Errorf("%s is not on a bpf filesystem", fileName) + } + + value, err := fd.value() + if err != nil { + return err + } + + _, err = bpfCall(_ObjPin, unsafe.Pointer(&bpfPinObjAttr{ + fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])), + fd: value, + }), 16) + return errors.Wrapf(err, "pin object %s", fileName) +} + +func bpfGetObject(fileName string) (*bpfFD, error) { + ptr, err := bpfCall(_ObjGet, unsafe.Pointer(&bpfPinObjAttr{ + fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])), + }), 16) + if err != nil { + return nil, errors.Wrapf(err, "get object %s", fileName) + } + return newBPFFD(uint32(ptr)), nil +} + +func bpfGetObjectInfoByFD(fd *bpfFD, info unsafe.Pointer, size uintptr) error { + value, err := fd.value() + if err != nil { + return err + } + + // available from 4.13 + attr := bpfObjGetInfoByFDAttr{ + fd: value, + infoLen: uint32(size), + info: newPtr(info), + } + _, err = bpfCall(_ObjGetInfoByFD, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) + return errors.Wrapf(err, "fd %d", value) +} + +func bpfGetProgInfoByFD(fd *bpfFD) (*bpfProgInfo, error) { + var info bpfProgInfo + err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info)) + return &info, errors.Wrap(err, "can't get program info") +} + +func bpfGetMapInfoByFD(fd *bpfFD) (*bpfMapInfo, error) { + var info bpfMapInfo + err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info)) + return &info, errors.Wrap(err, "can't get map info:") +} + +var haveObjName = featureTest{ + Fn: func() bool { + name, err := newBPFObjName("feature_test") + if err != nil { + // This really is a fatal error, but it should be caught + // by the unit tests not working. + return false + } + + attr := bpfMapCreateAttr{ + mapType: Array, + keySize: 4, + valueSize: 4, + maxEntries: 1, + mapName: name, + } + + fd, err := bpfMapCreate(&attr) + if err != nil { + return false + } + + _ = fd.close() + return true + }, +} + +func bpfGetMapFDByID(id uint32) (*bpfFD, error) { + // available from 4.13 + attr := bpfGetFDByIDAttr{ + id: id, + } + ptr, err := bpfCall(_MapGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) + if err != nil { + return nil, errors.Wrapf(err, "can't get fd for map id %d", id) + } + return newBPFFD(uint32(ptr)), nil +} + +func bpfGetProgramFDByID(id uint32) (*bpfFD, error) { + // available from 4.13 + attr := bpfGetFDByIDAttr{ + id: id, + } + ptr, err := bpfCall(_ProgGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) + if err != nil { + return nil, errors.Wrapf(err, "can't get fd for program id %d", id) + } + return newBPFFD(uint32(ptr)), nil +} + +func bpfCall(cmd int, attr unsafe.Pointer, size uintptr) (uintptr, error) { + r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size) + runtime.KeepAlive(attr) + + var err error + if errNo != 0 { + err = errNo + } + + return r1, err +} + +func convertCString(in []byte) string { + inLen := bytes.IndexByte(in, 0) + if inLen == -1 { + return "" + } + return string(in[:inLen]) +} diff --git a/vendor/github.com/cilium/ebpf/types.go b/vendor/github.com/cilium/ebpf/types.go new file mode 100644 index 0000000..0daf9a7 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/types.go @@ -0,0 +1,189 @@ +package ebpf + +//go:generate stringer -output types_string.go -type=MapType,ProgramType + +// MapType indicates the type map structure +// that will be initialized in the kernel. +type MapType uint32 + +// All the various map types that can be created +const ( + UnspecifiedMap MapType = iota + // Hash is a hash map + Hash + // Array is an array map + Array + // ProgramArray - A program array map is a special kind of array map whose map + // values contain only file descriptors referring to other eBPF + // programs. Thus, both the key_size and value_size must be + // exactly four bytes. This map is used in conjunction with the + // TailCall helper. + ProgramArray + // PerfEventArray - A perf event array is used in conjunction with PerfEventRead + // and PerfEventOutput calls, to read the raw bpf_perf_data from the registers. + PerfEventArray + // PerCPUHash - This data structure is useful for people who have high performance + // network needs and can reconcile adds at the end of some cycle, so that + // hashes can be lock free without the use of XAdd, which can be costly. + PerCPUHash + // PerCPUArray - This data structure is useful for people who have high performance + // network needs and can reconcile adds at the end of some cycle, so that + // hashes can be lock free without the use of XAdd, which can be costly. + // Each CPU gets a copy of this hash, the contents of all of which can be reconciled + // later. + PerCPUArray + // StackTrace - This holds whole user and kernel stack traces, it can be retrieved with + // GetStackID + StackTrace + // CGroupArray - This is a very niche structure used to help SKBInCGroup determine + // if an skb is from a socket belonging to a specific cgroup + CGroupArray + // LRUHash - This allows you to create a small hash structure that will purge the + // least recently used items rather than thow an error when you run out of memory + LRUHash + // LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs, + // it has more to do with including the CPU id with the LRU calculation so that if a + // particular CPU is using a value over-and-over again, then it will be saved, but if + // a value is being retrieved a lot but sparsely across CPUs it is not as important, basically + // giving weight to CPU locality over overall usage. + LRUCPUHash + // LPMTrie - This is an implementation of Longest-Prefix-Match Trie structure. It is useful, + // for storing things like IP addresses which can be bit masked allowing for keys of differing + // values to refer to the same reference based on their masks. See wikipedia for more details. + LPMTrie + // ArrayOfMaps - Each item in the array is another map. The inner map mustn't be a map of maps + // itself. + ArrayOfMaps + // HashOfMaps - Each item in the hash map is another map. The inner map mustn't be a map of maps + // itself. + HashOfMaps +) + +// hasPerCPUValue returns true if the Map stores a value per CPU. +func (mt MapType) hasPerCPUValue() bool { + if mt == PerCPUHash || mt == PerCPUArray { + return true + } + return false +} + +const ( + _MapCreate = iota + _MapLookupElem + _MapUpdateElem + _MapDeleteElem + _MapGetNextKey + _ProgLoad + _ObjPin + _ObjGet + _ProgAttach + _ProgDetach + _ProgTestRun + _ProgGetNextID + _MapGetNextID + _ProgGetFDByID + _MapGetFDByID + _ObjGetInfoByFD +) + +const ( + _Any = iota + _NoExist + _Exist +) + +// ProgramType of the eBPF program +type ProgramType uint32 + +// eBPF program types +const ( + // Unrecognized program type + UnspecifiedProgram ProgramType = iota + // SocketFilter socket or seccomp filter + SocketFilter + // Kprobe program + Kprobe + // SchedCLS traffic control shaper + SchedCLS + // SchedACT routing control shaper + SchedACT + // TracePoint program + TracePoint + // XDP program + XDP + // PerfEvent program + PerfEvent + // CGroupSKB program + CGroupSKB + // CGroupSock program + CGroupSock + // LWTIn program + LWTIn + // LWTOut program + LWTOut + // LWTXmit program + LWTXmit + // SockOps program + SockOps + // SkSKB program + SkSKB + // CGroupDevice program + CGroupDevice + // SkMsg program + SkMsg + // RawTracepoint program + RawTracepoint + // CGroupSockAddr program + CGroupSockAddr + // LWTSeg6Local program + LWTSeg6Local + // LircMode2 program + LircMode2 + // SkReuseport program + SkReuseport + // FlowDissector program + FlowDissector + // CGroupSysctl program + CGroupSysctl + // RawTracepointWritable program + RawTracepointWritable + // CGroupSockopt program + CGroupSockopt +) + +// AttachType of the eBPF program, needed to differentiate allowed context accesses in +// some newer program types like CGroupSockAddr. Should be set to AttachNone if not required. +// Will cause invalid argument (EINVAL) at program load time if set incorrectly. +type AttachType uint32 + +// AttachNone is an alias for AttachCGroupInetIngress for readability reasons +const AttachNone AttachType = 0 + +const ( + AttachCGroupInetIngress AttachType = iota + AttachCGroupInetEgress + AttachCGroupInetSockCreate + AttachCGroupSockOps + AttachSkSKBStreamParser + AttachSkSKBStreamVerdict + AttachCGroupDevice + AttachSkMsgVerdict + AttachCGroupInet4Bind + AttachCGroupInet6Bind + AttachCGroupInet4Connect + AttachCGroupInet6Connect + AttachCGroupInet4PostBind + AttachCGroupInet6PostBind + AttachCGroupUDP4Sendmsg + AttachCGroupUDP6Sendmsg + AttachLircMode2 + AttachFlowDissector + AttachCGroupSysctl + AttachCGroupUDP4Recvmsg + AttachCGroupUDP6Recvmsg + AttachCGroupGetsockopt + AttachCGroupSetsockopt +) + +// AttachFlags of the eBPF program used in BPF_PROG_ATTACH command +type AttachFlags uint32 diff --git a/vendor/github.com/cilium/ebpf/types_string.go b/vendor/github.com/cilium/ebpf/types_string.go new file mode 100644 index 0000000..4813437 --- /dev/null +++ b/vendor/github.com/cilium/ebpf/types_string.go @@ -0,0 +1,78 @@ +// Code generated by "stringer -output types_string.go -type=MapType,ProgramType"; DO NOT EDIT. + +package ebpf + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[UnspecifiedMap-0] + _ = x[Hash-1] + _ = x[Array-2] + _ = x[ProgramArray-3] + _ = x[PerfEventArray-4] + _ = x[PerCPUHash-5] + _ = x[PerCPUArray-6] + _ = x[StackTrace-7] + _ = x[CGroupArray-8] + _ = x[LRUHash-9] + _ = x[LRUCPUHash-10] + _ = x[LPMTrie-11] + _ = x[ArrayOfMaps-12] + _ = x[HashOfMaps-13] +} + +const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMaps" + +var _MapType_index = [...]uint8{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136} + +func (i MapType) String() string { + if i >= MapType(len(_MapType_index)-1) { + return "MapType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _MapType_name[_MapType_index[i]:_MapType_index[i+1]] +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[UnspecifiedProgram-0] + _ = x[SocketFilter-1] + _ = x[Kprobe-2] + _ = x[SchedCLS-3] + _ = x[SchedACT-4] + _ = x[TracePoint-5] + _ = x[XDP-6] + _ = x[PerfEvent-7] + _ = x[CGroupSKB-8] + _ = x[CGroupSock-9] + _ = x[LWTIn-10] + _ = x[LWTOut-11] + _ = x[LWTXmit-12] + _ = x[SockOps-13] + _ = x[SkSKB-14] + _ = x[CGroupDevice-15] + _ = x[SkMsg-16] + _ = x[RawTracepoint-17] + _ = x[CGroupSockAddr-18] + _ = x[LWTSeg6Local-19] + _ = x[LircMode2-20] + _ = x[SkReuseport-21] + _ = x[FlowDissector-22] + _ = x[CGroupSysctl-23] + _ = x[RawTracepointWritable-24] + _ = x[CGroupSockopt-25] +} + +const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockopt" + +var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258} + +func (i ProgramType) String() string { + if i >= ProgramType(len(_ProgramType_index)-1) { + return "ProgramType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _ProgramType_name[_ProgramType_index[i]:_ProgramType_index[i+1]] +} diff --git a/vendor/github.com/containerd/console/LICENSE b/vendor/github.com/containerd/console/LICENSE new file mode 100644 index 0000000..584149b --- /dev/null +++ b/vendor/github.com/containerd/console/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright The containerd Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/containerd/console/README.md b/vendor/github.com/containerd/console/README.md new file mode 100644 index 0000000..5392fda --- /dev/null +++ b/vendor/github.com/containerd/console/README.md @@ -0,0 +1,27 @@ +# console + +[![Build Status](https://travis-ci.org/containerd/console.svg?branch=master)](https://travis-ci.org/containerd/console) + +Golang package for dealing with consoles. Light on deps and a simple API. + +## Modifying the current process + +```go +current := console.Current() +defer current.Reset() + +if err := current.SetRaw(); err != nil { +} +ws, err := current.Size() +current.Resize(ws) +``` + +## Project details + +console is a containerd sub-project, licensed under the [Apache 2.0 license](./LICENSE). +As a containerd sub-project, you will find the: + * [Project governance](https://github.com/containerd/project/blob/master/GOVERNANCE.md), + * [Maintainers](https://github.com/containerd/project/blob/master/MAINTAINERS), + * and [Contributing guidelines](https://github.com/containerd/project/blob/master/CONTRIBUTING.md) + +information in our [`containerd/project`](https://github.com/containerd/project) repository. diff --git a/vendor/github.com/containerd/console/console.go b/vendor/github.com/containerd/console/console.go new file mode 100644 index 0000000..c187a9b --- /dev/null +++ b/vendor/github.com/containerd/console/console.go @@ -0,0 +1,78 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package console + +import ( + "errors" + "io" + "os" +) + +var ErrNotAConsole = errors.New("provided file is not a console") + +type Console interface { + io.Reader + io.Writer + io.Closer + + // Resize resizes the console to the provided window size + Resize(WinSize) error + // ResizeFrom resizes the calling console to the size of the + // provided console + ResizeFrom(Console) error + // SetRaw sets the console in raw mode + SetRaw() error + // DisableEcho disables echo on the console + DisableEcho() error + // Reset restores the console to its orignal state + Reset() error + // Size returns the window size of the console + Size() (WinSize, error) + // Fd returns the console's file descriptor + Fd() uintptr + // Name returns the console's file name + Name() string +} + +// WinSize specifies the window size of the console +type WinSize struct { + // Height of the console + Height uint16 + // Width of the console + Width uint16 + x uint16 + y uint16 +} + +// Current returns the current processes console +func Current() Console { + c, err := ConsoleFromFile(os.Stdin) + if err != nil { + // stdin should always be a console for the design + // of this function + panic(err) + } + return c +} + +// ConsoleFromFile returns a console using the provided file +func ConsoleFromFile(f *os.File) (Console, error) { + if err := checkConsole(f); err != nil { + return nil, err + } + return newMaster(f) +} diff --git a/vendor/github.com/containerd/console/console_linux.go b/vendor/github.com/containerd/console/console_linux.go new file mode 100644 index 0000000..42274e1 --- /dev/null +++ b/vendor/github.com/containerd/console/console_linux.go @@ -0,0 +1,275 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package console + +import ( + "io" + "os" + "sync" + + "golang.org/x/sys/unix" +) + +const ( + maxEvents = 128 +) + +// Epoller manages multiple epoll consoles using edge-triggered epoll api so we +// dont have to deal with repeated wake-up of EPOLLER or EPOLLHUP. +// For more details, see: +// - https://github.com/systemd/systemd/pull/4262 +// - https://github.com/moby/moby/issues/27202 +// +// Example usage of Epoller and EpollConsole can be as follow: +// +// epoller, _ := NewEpoller() +// epollConsole, _ := epoller.Add(console) +// go epoller.Wait() +// var ( +// b bytes.Buffer +// wg sync.WaitGroup +// ) +// wg.Add(1) +// go func() { +// io.Copy(&b, epollConsole) +// wg.Done() +// }() +// // perform I/O on the console +// epollConsole.Shutdown(epoller.CloseConsole) +// wg.Wait() +// epollConsole.Close() +type Epoller struct { + efd int + mu sync.Mutex + fdMapping map[int]*EpollConsole +} + +// NewEpoller returns an instance of epoller with a valid epoll fd. +func NewEpoller() (*Epoller, error) { + efd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) + if err != nil { + return nil, err + } + return &Epoller{ + efd: efd, + fdMapping: make(map[int]*EpollConsole), + }, nil +} + +// Add creates an epoll console based on the provided console. The console will +// be registered with EPOLLET (i.e. using edge-triggered notification) and its +// file descriptor will be set to non-blocking mode. After this, user should use +// the return console to perform I/O. +func (e *Epoller) Add(console Console) (*EpollConsole, error) { + sysfd := int(console.Fd()) + // Set sysfd to non-blocking mode + if err := unix.SetNonblock(sysfd, true); err != nil { + return nil, err + } + + ev := unix.EpollEvent{ + Events: unix.EPOLLIN | unix.EPOLLOUT | unix.EPOLLRDHUP | unix.EPOLLET, + Fd: int32(sysfd), + } + if err := unix.EpollCtl(e.efd, unix.EPOLL_CTL_ADD, sysfd, &ev); err != nil { + return nil, err + } + ef := &EpollConsole{ + Console: console, + sysfd: sysfd, + readc: sync.NewCond(&sync.Mutex{}), + writec: sync.NewCond(&sync.Mutex{}), + } + e.mu.Lock() + e.fdMapping[sysfd] = ef + e.mu.Unlock() + return ef, nil +} + +// Wait starts the loop to wait for its consoles' notifications and signal +// appropriate console that it can perform I/O. +func (e *Epoller) Wait() error { + events := make([]unix.EpollEvent, maxEvents) + for { + n, err := unix.EpollWait(e.efd, events, -1) + if err != nil { + // EINTR: The call was interrupted by a signal handler before either + // any of the requested events occurred or the timeout expired + if err == unix.EINTR { + continue + } + return err + } + for i := 0; i < n; i++ { + ev := &events[i] + // the console is ready to be read from + if ev.Events&(unix.EPOLLIN|unix.EPOLLHUP|unix.EPOLLERR) != 0 { + if epfile := e.getConsole(int(ev.Fd)); epfile != nil { + epfile.signalRead() + } + } + // the console is ready to be written to + if ev.Events&(unix.EPOLLOUT|unix.EPOLLHUP|unix.EPOLLERR) != 0 { + if epfile := e.getConsole(int(ev.Fd)); epfile != nil { + epfile.signalWrite() + } + } + } + } +} + +// CloseConsole unregisters the console's file descriptor from epoll interface +func (e *Epoller) CloseConsole(fd int) error { + e.mu.Lock() + defer e.mu.Unlock() + delete(e.fdMapping, fd) + return unix.EpollCtl(e.efd, unix.EPOLL_CTL_DEL, fd, &unix.EpollEvent{}) +} + +func (e *Epoller) getConsole(sysfd int) *EpollConsole { + e.mu.Lock() + f := e.fdMapping[sysfd] + e.mu.Unlock() + return f +} + +// Close closes the epoll fd +func (e *Epoller) Close() error { + return unix.Close(e.efd) +} + +// EpollConsole acts like a console but registers its file descriptor with an +// epoll fd and uses epoll API to perform I/O. +type EpollConsole struct { + Console + readc *sync.Cond + writec *sync.Cond + sysfd int + closed bool +} + +// Read reads up to len(p) bytes into p. It returns the number of bytes read +// (0 <= n <= len(p)) and any error encountered. +// +// If the console's read returns EAGAIN or EIO, we assume that it's a +// temporary error because the other side went away and wait for the signal +// generated by epoll event to continue. +func (ec *EpollConsole) Read(p []byte) (n int, err error) { + var read int + ec.readc.L.Lock() + defer ec.readc.L.Unlock() + for { + read, err = ec.Console.Read(p[n:]) + n += read + if err != nil { + var hangup bool + if perr, ok := err.(*os.PathError); ok { + hangup = (perr.Err == unix.EAGAIN || perr.Err == unix.EIO) + } else { + hangup = (err == unix.EAGAIN || err == unix.EIO) + } + // if the other end disappear, assume this is temporary and wait for the + // signal to continue again. Unless we didnt read anything and the + // console is already marked as closed then we should exit + if hangup && !(n == 0 && len(p) > 0 && ec.closed) { + ec.readc.Wait() + continue + } + } + break + } + // if we didnt read anything then return io.EOF to end gracefully + if n == 0 && len(p) > 0 && err == nil { + err = io.EOF + } + // signal for others that we finished the read + ec.readc.Signal() + return n, err +} + +// Writes len(p) bytes from p to the console. It returns the number of bytes +// written from p (0 <= n <= len(p)) and any error encountered that caused +// the write to stop early. +// +// If writes to the console returns EAGAIN or EIO, we assume that it's a +// temporary error because the other side went away and wait for the signal +// generated by epoll event to continue. +func (ec *EpollConsole) Write(p []byte) (n int, err error) { + var written int + ec.writec.L.Lock() + defer ec.writec.L.Unlock() + for { + written, err = ec.Console.Write(p[n:]) + n += written + if err != nil { + var hangup bool + if perr, ok := err.(*os.PathError); ok { + hangup = (perr.Err == unix.EAGAIN || perr.Err == unix.EIO) + } else { + hangup = (err == unix.EAGAIN || err == unix.EIO) + } + // if the other end disappears, assume this is temporary and wait for the + // signal to continue again. + if hangup { + ec.writec.Wait() + continue + } + } + // unrecoverable error, break the loop and return the error + break + } + if n < len(p) && err == nil { + err = io.ErrShortWrite + } + // signal for others that we finished the write + ec.writec.Signal() + return n, err +} + +// Shutdown closes the file descriptor and signals call waiters for this fd. +// It accepts a callback which will be called with the console's fd. The +// callback typically will be used to do further cleanup such as unregister the +// console's fd from the epoll interface. +// User should call Shutdown and wait for all I/O operation to be finished +// before closing the console. +func (ec *EpollConsole) Shutdown(close func(int) error) error { + ec.readc.L.Lock() + defer ec.readc.L.Unlock() + ec.writec.L.Lock() + defer ec.writec.L.Unlock() + + ec.readc.Broadcast() + ec.writec.Broadcast() + ec.closed = true + return close(ec.sysfd) +} + +// signalRead signals that the console is readable. +func (ec *EpollConsole) signalRead() { + ec.readc.L.Lock() + ec.readc.Signal() + ec.readc.L.Unlock() +} + +// signalWrite signals that the console is writable. +func (ec *EpollConsole) signalWrite() { + ec.writec.L.Lock() + ec.writec.Signal() + ec.writec.L.Unlock() +} diff --git a/vendor/github.com/containerd/console/console_unix.go b/vendor/github.com/containerd/console/console_unix.go new file mode 100644 index 0000000..a4a8d12 --- /dev/null +++ b/vendor/github.com/containerd/console/console_unix.go @@ -0,0 +1,158 @@ +// +build darwin freebsd linux openbsd solaris + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package console + +import ( + "os" + + "golang.org/x/sys/unix" +) + +// NewPty creates a new pty pair +// The master is returned as the first console and a string +// with the path to the pty slave is returned as the second +func NewPty() (Console, string, error) { + f, err := os.OpenFile("/dev/ptmx", unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, "", err + } + slave, err := ptsname(f) + if err != nil { + return nil, "", err + } + if err := unlockpt(f); err != nil { + return nil, "", err + } + m, err := newMaster(f) + if err != nil { + return nil, "", err + } + return m, slave, nil +} + +type master struct { + f *os.File + original *unix.Termios +} + +func (m *master) Read(b []byte) (int, error) { + return m.f.Read(b) +} + +func (m *master) Write(b []byte) (int, error) { + return m.f.Write(b) +} + +func (m *master) Close() error { + return m.f.Close() +} + +func (m *master) Resize(ws WinSize) error { + return tcswinsz(m.f.Fd(), ws) +} + +func (m *master) ResizeFrom(c Console) error { + ws, err := c.Size() + if err != nil { + return err + } + return m.Resize(ws) +} + +func (m *master) Reset() error { + if m.original == nil { + return nil + } + return tcset(m.f.Fd(), m.original) +} + +func (m *master) getCurrent() (unix.Termios, error) { + var termios unix.Termios + if err := tcget(m.f.Fd(), &termios); err != nil { + return unix.Termios{}, err + } + return termios, nil +} + +func (m *master) SetRaw() error { + rawState, err := m.getCurrent() + if err != nil { + return err + } + rawState = cfmakeraw(rawState) + rawState.Oflag = rawState.Oflag | unix.OPOST + return tcset(m.f.Fd(), &rawState) +} + +func (m *master) DisableEcho() error { + rawState, err := m.getCurrent() + if err != nil { + return err + } + rawState.Lflag = rawState.Lflag &^ unix.ECHO + return tcset(m.f.Fd(), &rawState) +} + +func (m *master) Size() (WinSize, error) { + return tcgwinsz(m.f.Fd()) +} + +func (m *master) Fd() uintptr { + return m.f.Fd() +} + +func (m *master) Name() string { + return m.f.Name() +} + +// checkConsole checks if the provided file is a console +func checkConsole(f *os.File) error { + var termios unix.Termios + if tcget(f.Fd(), &termios) != nil { + return ErrNotAConsole + } + return nil +} + +func newMaster(f *os.File) (Console, error) { + m := &master{ + f: f, + } + t, err := m.getCurrent() + if err != nil { + return nil, err + } + m.original = &t + return m, nil +} + +// ClearONLCR sets the necessary tty_ioctl(4)s to ensure that a pty pair +// created by us acts normally. In particular, a not-very-well-known default of +// Linux unix98 ptys is that they have +onlcr by default. While this isn't a +// problem for terminal emulators, because we relay data from the terminal we +// also relay that funky line discipline. +func ClearONLCR(fd uintptr) error { + return setONLCR(fd, false) +} + +// SetONLCR sets the necessary tty_ioctl(4)s to ensure that a pty pair +// created by us acts as intended for a terminal emulator. +func SetONLCR(fd uintptr) error { + return setONLCR(fd, true) +} diff --git a/vendor/github.com/containerd/console/console_windows.go b/vendor/github.com/containerd/console/console_windows.go new file mode 100644 index 0000000..62dbe1c --- /dev/null +++ b/vendor/github.com/containerd/console/console_windows.go @@ -0,0 +1,216 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package console + +import ( + "fmt" + "os" + + "github.com/pkg/errors" + "golang.org/x/sys/windows" +) + +var ( + vtInputSupported bool + ErrNotImplemented = errors.New("not implemented") +) + +func (m *master) initStdios() { + m.in = windows.Handle(os.Stdin.Fd()) + if err := windows.GetConsoleMode(m.in, &m.inMode); err == nil { + // Validate that windows.ENABLE_VIRTUAL_TERMINAL_INPUT is supported, but do not set it. + if err = windows.SetConsoleMode(m.in, m.inMode|windows.ENABLE_VIRTUAL_TERMINAL_INPUT); err == nil { + vtInputSupported = true + } + // Unconditionally set the console mode back even on failure because SetConsoleMode + // remembers invalid bits on input handles. + windows.SetConsoleMode(m.in, m.inMode) + } else { + fmt.Printf("failed to get console mode for stdin: %v\n", err) + } + + m.out = windows.Handle(os.Stdout.Fd()) + if err := windows.GetConsoleMode(m.out, &m.outMode); err == nil { + if err := windows.SetConsoleMode(m.out, m.outMode|windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING); err == nil { + m.outMode |= windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING + } else { + windows.SetConsoleMode(m.out, m.outMode) + } + } else { + fmt.Printf("failed to get console mode for stdout: %v\n", err) + } + + m.err = windows.Handle(os.Stderr.Fd()) + if err := windows.GetConsoleMode(m.err, &m.errMode); err == nil { + if err := windows.SetConsoleMode(m.err, m.errMode|windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING); err == nil { + m.errMode |= windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING + } else { + windows.SetConsoleMode(m.err, m.errMode) + } + } else { + fmt.Printf("failed to get console mode for stderr: %v\n", err) + } +} + +type master struct { + in windows.Handle + inMode uint32 + + out windows.Handle + outMode uint32 + + err windows.Handle + errMode uint32 +} + +func (m *master) SetRaw() error { + if err := makeInputRaw(m.in, m.inMode); err != nil { + return err + } + + // Set StdOut and StdErr to raw mode, we ignore failures since + // windows.DISABLE_NEWLINE_AUTO_RETURN might not be supported on this version of + // Windows. + + windows.SetConsoleMode(m.out, m.outMode|windows.DISABLE_NEWLINE_AUTO_RETURN) + + windows.SetConsoleMode(m.err, m.errMode|windows.DISABLE_NEWLINE_AUTO_RETURN) + + return nil +} + +func (m *master) Reset() error { + for _, s := range []struct { + fd windows.Handle + mode uint32 + }{ + {m.in, m.inMode}, + {m.out, m.outMode}, + {m.err, m.errMode}, + } { + if err := windows.SetConsoleMode(s.fd, s.mode); err != nil { + return errors.Wrap(err, "unable to restore console mode") + } + } + + return nil +} + +func (m *master) Size() (WinSize, error) { + var info windows.ConsoleScreenBufferInfo + err := windows.GetConsoleScreenBufferInfo(m.out, &info) + if err != nil { + return WinSize{}, errors.Wrap(err, "unable to get console info") + } + + winsize := WinSize{ + Width: uint16(info.Window.Right - info.Window.Left + 1), + Height: uint16(info.Window.Bottom - info.Window.Top + 1), + } + + return winsize, nil +} + +func (m *master) Resize(ws WinSize) error { + return ErrNotImplemented +} + +func (m *master) ResizeFrom(c Console) error { + return ErrNotImplemented +} + +func (m *master) DisableEcho() error { + mode := m.inMode &^ windows.ENABLE_ECHO_INPUT + mode |= windows.ENABLE_PROCESSED_INPUT + mode |= windows.ENABLE_LINE_INPUT + + if err := windows.SetConsoleMode(m.in, mode); err != nil { + return errors.Wrap(err, "unable to set console to disable echo") + } + + return nil +} + +func (m *master) Close() error { + return nil +} + +func (m *master) Read(b []byte) (int, error) { + return os.Stdin.Read(b) +} + +func (m *master) Write(b []byte) (int, error) { + return os.Stdout.Write(b) +} + +func (m *master) Fd() uintptr { + return uintptr(m.in) +} + +// on windows, console can only be made from os.Std{in,out,err}, hence there +// isnt a single name here we can use. Return a dummy "console" value in this +// case should be sufficient. +func (m *master) Name() string { + return "console" +} + +// makeInputRaw puts the terminal (Windows Console) connected to the given +// file descriptor into raw mode +func makeInputRaw(fd windows.Handle, mode uint32) error { + // See + // -- https://msdn.microsoft.com/en-us/library/windows/desktop/ms686033(v=vs.85).aspx + // -- https://msdn.microsoft.com/en-us/library/windows/desktop/ms683462(v=vs.85).aspx + + // Disable these modes + mode &^= windows.ENABLE_ECHO_INPUT + mode &^= windows.ENABLE_LINE_INPUT + mode &^= windows.ENABLE_MOUSE_INPUT + mode &^= windows.ENABLE_WINDOW_INPUT + mode &^= windows.ENABLE_PROCESSED_INPUT + + // Enable these modes + mode |= windows.ENABLE_EXTENDED_FLAGS + mode |= windows.ENABLE_INSERT_MODE + mode |= windows.ENABLE_QUICK_EDIT_MODE + + if vtInputSupported { + mode |= windows.ENABLE_VIRTUAL_TERMINAL_INPUT + } + + if err := windows.SetConsoleMode(fd, mode); err != nil { + return errors.Wrap(err, "unable to set console to raw mode") + } + + return nil +} + +func checkConsole(f *os.File) error { + var mode uint32 + if err := windows.GetConsoleMode(windows.Handle(f.Fd()), &mode); err != nil { + return err + } + return nil +} + +func newMaster(f *os.File) (Console, error) { + if f != os.Stdin && f != os.Stdout && f != os.Stderr { + return nil, errors.New("creating a console from a file is not supported on windows") + } + m := &master{} + m.initStdios() + return m, nil +} diff --git a/vendor/github.com/containerd/console/tc_darwin.go b/vendor/github.com/containerd/console/tc_darwin.go new file mode 100644 index 0000000..b0128ab --- /dev/null +++ b/vendor/github.com/containerd/console/tc_darwin.go @@ -0,0 +1,53 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package console + +import ( + "fmt" + "os" + "unsafe" + + "golang.org/x/sys/unix" +) + +const ( + cmdTcGet = unix.TIOCGETA + cmdTcSet = unix.TIOCSETA +) + +func ioctl(fd, flag, data uintptr) error { + if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 { + return err + } + return nil +} + +// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// unlockpt should be called before opening the slave side of a pty. +func unlockpt(f *os.File) error { + var u int32 + return ioctl(f.Fd(), unix.TIOCPTYUNLK, uintptr(unsafe.Pointer(&u))) +} + +// ptsname retrieves the name of the first available pts for the given master. +func ptsname(f *os.File) (string, error) { + n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCPTYGNAME) + if err != nil { + return "", err + } + return fmt.Sprintf("/dev/pts/%d", n), nil +} diff --git a/vendor/github.com/containerd/console/tc_freebsd.go b/vendor/github.com/containerd/console/tc_freebsd.go new file mode 100644 index 0000000..04583a6 --- /dev/null +++ b/vendor/github.com/containerd/console/tc_freebsd.go @@ -0,0 +1,45 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package console + +import ( + "fmt" + "os" + + "golang.org/x/sys/unix" +) + +const ( + cmdTcGet = unix.TIOCGETA + cmdTcSet = unix.TIOCSETA +) + +// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// unlockpt should be called before opening the slave side of a pty. +// This does not exist on FreeBSD, it does not allocate controlling terminals on open +func unlockpt(f *os.File) error { + return nil +} + +// ptsname retrieves the name of the first available pts for the given master. +func ptsname(f *os.File) (string, error) { + n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) + if err != nil { + return "", err + } + return fmt.Sprintf("/dev/pts/%d", n), nil +} diff --git a/vendor/github.com/containerd/console/tc_linux.go b/vendor/github.com/containerd/console/tc_linux.go new file mode 100644 index 0000000..1bdd68e --- /dev/null +++ b/vendor/github.com/containerd/console/tc_linux.go @@ -0,0 +1,49 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package console + +import ( + "fmt" + "os" + "unsafe" + + "golang.org/x/sys/unix" +) + +const ( + cmdTcGet = unix.TCGETS + cmdTcSet = unix.TCSETS +) + +// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// unlockpt should be called before opening the slave side of a pty. +func unlockpt(f *os.File) error { + var u int32 + if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))); err != 0 { + return err + } + return nil +} + +// ptsname retrieves the name of the first available pts for the given master. +func ptsname(f *os.File) (string, error) { + var u uint32 + if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCGPTN, uintptr(unsafe.Pointer(&u))); err != 0 { + return "", err + } + return fmt.Sprintf("/dev/pts/%d", u), nil +} diff --git a/vendor/github.com/containerd/console/tc_openbsd_cgo.go b/vendor/github.com/containerd/console/tc_openbsd_cgo.go new file mode 100644 index 0000000..f0cec06 --- /dev/null +++ b/vendor/github.com/containerd/console/tc_openbsd_cgo.go @@ -0,0 +1,51 @@ +// +build openbsd,cgo + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package console + +import ( + "os" + + "golang.org/x/sys/unix" +) + +//#include +import "C" + +const ( + cmdTcGet = unix.TIOCGETA + cmdTcSet = unix.TIOCSETA +) + +// ptsname retrieves the name of the first available pts for the given master. +func ptsname(f *os.File) (string, error) { + ptspath, err := C.ptsname(C.int(f.Fd())) + if err != nil { + return "", err + } + return C.GoString(ptspath), nil +} + +// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// unlockpt should be called before opening the slave side of a pty. +func unlockpt(f *os.File) error { + if _, err := C.grantpt(C.int(f.Fd())); err != nil { + return err + } + return nil +} diff --git a/vendor/github.com/containerd/console/tc_openbsd_nocgo.go b/vendor/github.com/containerd/console/tc_openbsd_nocgo.go new file mode 100644 index 0000000..daccce2 --- /dev/null +++ b/vendor/github.com/containerd/console/tc_openbsd_nocgo.go @@ -0,0 +1,47 @@ +// +build openbsd,!cgo + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// +// Implementing the functions below requires cgo support. Non-cgo stubs +// versions are defined below to enable cross-compilation of source code +// that depends on these functions, but the resultant cross-compiled +// binaries cannot actually be used. If the stub function(s) below are +// actually invoked they will display an error message and cause the +// calling process to exit. +// + +package console + +import ( + "os" + + "golang.org/x/sys/unix" +) + +const ( + cmdTcGet = unix.TIOCGETA + cmdTcSet = unix.TIOCSETA +) + +func ptsname(f *os.File) (string, error) { + panic("ptsname() support requires cgo.") +} + +func unlockpt(f *os.File) error { + panic("unlockpt() support requires cgo.") +} diff --git a/vendor/github.com/containerd/console/tc_solaris_cgo.go b/vendor/github.com/containerd/console/tc_solaris_cgo.go new file mode 100644 index 0000000..e36a68e --- /dev/null +++ b/vendor/github.com/containerd/console/tc_solaris_cgo.go @@ -0,0 +1,51 @@ +// +build solaris,cgo + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package console + +import ( + "os" + + "golang.org/x/sys/unix" +) + +//#include +import "C" + +const ( + cmdTcGet = unix.TCGETS + cmdTcSet = unix.TCSETS +) + +// ptsname retrieves the name of the first available pts for the given master. +func ptsname(f *os.File) (string, error) { + ptspath, err := C.ptsname(C.int(f.Fd())) + if err != nil { + return "", err + } + return C.GoString(ptspath), nil +} + +// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// unlockpt should be called before opening the slave side of a pty. +func unlockpt(f *os.File) error { + if _, err := C.grantpt(C.int(f.Fd())); err != nil { + return err + } + return nil +} diff --git a/vendor/github.com/containerd/console/tc_solaris_nocgo.go b/vendor/github.com/containerd/console/tc_solaris_nocgo.go new file mode 100644 index 0000000..eb0bd2c --- /dev/null +++ b/vendor/github.com/containerd/console/tc_solaris_nocgo.go @@ -0,0 +1,47 @@ +// +build solaris,!cgo + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// +// Implementing the functions below requires cgo support. Non-cgo stubs +// versions are defined below to enable cross-compilation of source code +// that depends on these functions, but the resultant cross-compiled +// binaries cannot actually be used. If the stub function(s) below are +// actually invoked they will display an error message and cause the +// calling process to exit. +// + +package console + +import ( + "os" + + "golang.org/x/sys/unix" +) + +const ( + cmdTcGet = unix.TCGETS + cmdTcSet = unix.TCSETS +) + +func ptsname(f *os.File) (string, error) { + panic("ptsname() support requires cgo.") +} + +func unlockpt(f *os.File) error { + panic("unlockpt() support requires cgo.") +} diff --git a/vendor/github.com/containerd/console/tc_unix.go b/vendor/github.com/containerd/console/tc_unix.go new file mode 100644 index 0000000..7ae773c --- /dev/null +++ b/vendor/github.com/containerd/console/tc_unix.go @@ -0,0 +1,91 @@ +// +build darwin freebsd linux openbsd solaris + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package console + +import ( + "golang.org/x/sys/unix" +) + +func tcget(fd uintptr, p *unix.Termios) error { + termios, err := unix.IoctlGetTermios(int(fd), cmdTcGet) + if err != nil { + return err + } + *p = *termios + return nil +} + +func tcset(fd uintptr, p *unix.Termios) error { + return unix.IoctlSetTermios(int(fd), cmdTcSet, p) +} + +func tcgwinsz(fd uintptr) (WinSize, error) { + var ws WinSize + + uws, err := unix.IoctlGetWinsize(int(fd), unix.TIOCGWINSZ) + if err != nil { + return ws, err + } + + // Translate from unix.Winsize to console.WinSize + ws.Height = uws.Row + ws.Width = uws.Col + ws.x = uws.Xpixel + ws.y = uws.Ypixel + return ws, nil +} + +func tcswinsz(fd uintptr, ws WinSize) error { + // Translate from console.WinSize to unix.Winsize + + var uws unix.Winsize + uws.Row = ws.Height + uws.Col = ws.Width + uws.Xpixel = ws.x + uws.Ypixel = ws.y + + return unix.IoctlSetWinsize(int(fd), unix.TIOCSWINSZ, &uws) +} + +func setONLCR(fd uintptr, enable bool) error { + var termios unix.Termios + if err := tcget(fd, &termios); err != nil { + return err + } + if enable { + // Set +onlcr so we can act like a real terminal + termios.Oflag |= unix.ONLCR + } else { + // Set -onlcr so we don't have to deal with \r. + termios.Oflag &^= unix.ONLCR + } + return tcset(fd, &termios) +} + +func cfmakeraw(t unix.Termios) unix.Termios { + t.Iflag &^= (unix.IGNBRK | unix.BRKINT | unix.PARMRK | unix.ISTRIP | unix.INLCR | unix.IGNCR | unix.ICRNL | unix.IXON) + t.Oflag &^= unix.OPOST + t.Lflag &^= (unix.ECHO | unix.ECHONL | unix.ICANON | unix.ISIG | unix.IEXTEN) + t.Cflag &^= (unix.CSIZE | unix.PARENB) + t.Cflag &^= unix.CS8 + t.Cc[unix.VMIN] = 1 + t.Cc[unix.VTIME] = 0 + + return t +} diff --git a/vendor/github.com/coreos/go-systemd/LICENSE b/vendor/github.com/coreos/go-systemd/LICENSE new file mode 100644 index 0000000..37ec93a --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/coreos/go-systemd/NOTICE b/vendor/github.com/coreos/go-systemd/NOTICE new file mode 100644 index 0000000..23a0ada --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/NOTICE @@ -0,0 +1,5 @@ +CoreOS Project +Copyright 2018 CoreOS, Inc + +This product includes software developed at CoreOS, Inc. +(http://www.coreos.com/). diff --git a/vendor/github.com/coreos/go-systemd/README.md b/vendor/github.com/coreos/go-systemd/README.md new file mode 100644 index 0000000..cde3a8f --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/README.md @@ -0,0 +1,71 @@ +# go-systemd + +[![Build Status](https://travis-ci.org/coreos/go-systemd.png?branch=master)](https://travis-ci.org/coreos/go-systemd) +[![godoc](https://godoc.org/github.com/coreos/go-systemd?status.svg)](http://godoc.org/github.com/coreos/go-systemd) +![minimum golang 1.10](https://img.shields.io/badge/golang-1.10%2B-orange.svg) + + +Go bindings to systemd. The project has several packages: + +- `activation` - for writing and using socket activation from Go +- `daemon` - for notifying systemd of service status changes +- `dbus` - for starting/stopping/inspecting running services and units +- `journal` - for writing to systemd's logging service, journald +- `sdjournal` - for reading from journald by wrapping its C API +- `login1` - for integration with the systemd logind API +- `machine1` - for registering machines/containers with systemd +- `unit` - for (de)serialization and comparison of unit files + +## Socket Activation + +An example HTTP server using socket activation can be quickly set up by following this README on a Linux machine running systemd: + +https://github.com/coreos/go-systemd/tree/master/examples/activation/httpserver + +## systemd Service Notification + +The `daemon` package is an implementation of the [sd_notify protocol](https://www.freedesktop.org/software/systemd/man/sd_notify.html#Description). It can be used to inform systemd of service start-up completion, watchdog events, and other status changes. + +## D-Bus + +The `dbus` package connects to the [systemd D-Bus API](http://www.freedesktop.org/wiki/Software/systemd/dbus/) and lets you start, stop and introspect systemd units. The API docs are here: + +http://godoc.org/github.com/coreos/go-systemd/dbus + +### Debugging + +Create `/etc/dbus-1/system-local.conf` that looks like this: + +``` + + + + + + + +``` + +## Journal + +### Writing to the Journal + +Using the pure-Go `journal` package you can submit journal entries directly to systemd's journal, taking advantage of features like indexed key/value pairs for each log entry. + +### Reading from the Journal + +The `sdjournal` package provides read access to the journal by wrapping around journald's native C API; consequently it requires cgo and the journal headers to be available. + +## logind + +The `login1` package provides functions to integrate with the [systemd logind API](http://www.freedesktop.org/wiki/Software/systemd/logind/). + +## machined + +The `machine1` package allows interaction with the [systemd machined D-Bus API](http://www.freedesktop.org/wiki/Software/systemd/machined/). + +## Units + +The `unit` package provides various functions for working with [systemd unit files](http://www.freedesktop.org/software/systemd/man/systemd.unit.html). diff --git a/vendor/github.com/coreos/go-systemd/activation/files.go b/vendor/github.com/coreos/go-systemd/activation/files.go new file mode 100644 index 0000000..29dd18d --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/activation/files.go @@ -0,0 +1,67 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package activation implements primitives for systemd socket activation. +package activation + +import ( + "os" + "strconv" + "strings" + "syscall" +) + +const ( + // listenFdsStart corresponds to `SD_LISTEN_FDS_START`. + listenFdsStart = 3 +) + +// Files returns a slice containing a `os.File` object for each +// file descriptor passed to this process via systemd fd-passing protocol. +// +// The order of the file descriptors is preserved in the returned slice. +// `unsetEnv` is typically set to `true` in order to avoid clashes in +// fd usage and to avoid leaking environment flags to child processes. +func Files(unsetEnv bool) []*os.File { + if unsetEnv { + defer os.Unsetenv("LISTEN_PID") + defer os.Unsetenv("LISTEN_FDS") + defer os.Unsetenv("LISTEN_FDNAMES") + } + + pid, err := strconv.Atoi(os.Getenv("LISTEN_PID")) + if err != nil || pid != os.Getpid() { + return nil + } + + nfds, err := strconv.Atoi(os.Getenv("LISTEN_FDS")) + if err != nil || nfds == 0 { + return nil + } + + names := strings.Split(os.Getenv("LISTEN_FDNAMES"), ":") + + files := make([]*os.File, 0, nfds) + for fd := listenFdsStart; fd < listenFdsStart+nfds; fd++ { + syscall.CloseOnExec(fd) + name := "LISTEN_FD_" + strconv.Itoa(fd) + offset := fd - listenFdsStart + if offset < len(names) && len(names[offset]) > 0 { + name = names[offset] + } + files = append(files, os.NewFile(uintptr(fd), name)) + } + + return files +} diff --git a/vendor/github.com/coreos/go-systemd/activation/listeners.go b/vendor/github.com/coreos/go-systemd/activation/listeners.go new file mode 100644 index 0000000..bb5cc23 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/activation/listeners.go @@ -0,0 +1,103 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package activation + +import ( + "crypto/tls" + "net" +) + +// Listeners returns a slice containing a net.Listener for each matching socket type +// passed to this process. +// +// The order of the file descriptors is preserved in the returned slice. +// Nil values are used to fill any gaps. For example if systemd were to return file descriptors +// corresponding with "udp, tcp, tcp", then the slice would contain {nil, net.Listener, net.Listener} +func Listeners() ([]net.Listener, error) { + files := Files(true) + listeners := make([]net.Listener, len(files)) + + for i, f := range files { + if pc, err := net.FileListener(f); err == nil { + listeners[i] = pc + f.Close() + } + } + return listeners, nil +} + +// ListenersWithNames maps a listener name to a set of net.Listener instances. +func ListenersWithNames() (map[string][]net.Listener, error) { + files := Files(true) + listeners := map[string][]net.Listener{} + + for _, f := range files { + if pc, err := net.FileListener(f); err == nil { + current, ok := listeners[f.Name()] + if !ok { + listeners[f.Name()] = []net.Listener{pc} + } else { + listeners[f.Name()] = append(current, pc) + } + f.Close() + } + } + return listeners, nil +} + +// TLSListeners returns a slice containing a net.listener for each matching TCP socket type +// passed to this process. +// It uses default Listeners func and forces TCP sockets handlers to use TLS based on tlsConfig. +func TLSListeners(tlsConfig *tls.Config) ([]net.Listener, error) { + listeners, err := Listeners() + + if listeners == nil || err != nil { + return nil, err + } + + if tlsConfig != nil && err == nil { + for i, l := range listeners { + // Activate TLS only for TCP sockets + if l.Addr().Network() == "tcp" { + listeners[i] = tls.NewListener(l, tlsConfig) + } + } + } + + return listeners, err +} + +// TLSListenersWithNames maps a listener name to a net.Listener with +// the associated TLS configuration. +func TLSListenersWithNames(tlsConfig *tls.Config) (map[string][]net.Listener, error) { + listeners, err := ListenersWithNames() + + if listeners == nil || err != nil { + return nil, err + } + + if tlsConfig != nil && err == nil { + for _, ll := range listeners { + // Activate TLS only for TCP sockets + for i, l := range ll { + if l.Addr().Network() == "tcp" { + ll[i] = tls.NewListener(l, tlsConfig) + } + } + } + } + + return listeners, err +} diff --git a/vendor/github.com/coreos/go-systemd/activation/packetconns.go b/vendor/github.com/coreos/go-systemd/activation/packetconns.go new file mode 100644 index 0000000..a972067 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/activation/packetconns.go @@ -0,0 +1,38 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package activation + +import ( + "net" +) + +// PacketConns returns a slice containing a net.PacketConn for each matching socket type +// passed to this process. +// +// The order of the file descriptors is preserved in the returned slice. +// Nil values are used to fill any gaps. For example if systemd were to return file descriptors +// corresponding with "udp, tcp, udp", then the slice would contain {net.PacketConn, nil, net.PacketConn} +func PacketConns() ([]net.PacketConn, error) { + files := Files(true) + conns := make([]net.PacketConn, len(files)) + + for i, f := range files { + if pc, err := net.FilePacketConn(f); err == nil { + conns[i] = pc + f.Close() + } + } + return conns, nil +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/dbus.go b/vendor/github.com/coreos/go-systemd/dbus/dbus.go new file mode 100644 index 0000000..f652582 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/dbus.go @@ -0,0 +1,240 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Integration with the systemd D-Bus API. See http://www.freedesktop.org/wiki/Software/systemd/dbus/ +package dbus + +import ( + "encoding/hex" + "fmt" + "os" + "strconv" + "strings" + "sync" + + "github.com/godbus/dbus" +) + +const ( + alpha = `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ` + num = `0123456789` + alphanum = alpha + num + signalBuffer = 100 +) + +// needsEscape checks whether a byte in a potential dbus ObjectPath needs to be escaped +func needsEscape(i int, b byte) bool { + // Escape everything that is not a-z-A-Z-0-9 + // Also escape 0-9 if it's the first character + return strings.IndexByte(alphanum, b) == -1 || + (i == 0 && strings.IndexByte(num, b) != -1) +} + +// PathBusEscape sanitizes a constituent string of a dbus ObjectPath using the +// rules that systemd uses for serializing special characters. +func PathBusEscape(path string) string { + // Special case the empty string + if len(path) == 0 { + return "_" + } + n := []byte{} + for i := 0; i < len(path); i++ { + c := path[i] + if needsEscape(i, c) { + e := fmt.Sprintf("_%x", c) + n = append(n, []byte(e)...) + } else { + n = append(n, c) + } + } + return string(n) +} + +// pathBusUnescape is the inverse of PathBusEscape. +func pathBusUnescape(path string) string { + if path == "_" { + return "" + } + n := []byte{} + for i := 0; i < len(path); i++ { + c := path[i] + if c == '_' && i+2 < len(path) { + res, err := hex.DecodeString(path[i+1 : i+3]) + if err == nil { + n = append(n, res...) + } + i += 2 + } else { + n = append(n, c) + } + } + return string(n) +} + +// Conn is a connection to systemd's dbus endpoint. +type Conn struct { + // sysconn/sysobj are only used to call dbus methods + sysconn *dbus.Conn + sysobj dbus.BusObject + + // sigconn/sigobj are only used to receive dbus signals + sigconn *dbus.Conn + sigobj dbus.BusObject + + jobListener struct { + jobs map[dbus.ObjectPath]chan<- string + sync.Mutex + } + subStateSubscriber struct { + updateCh chan<- *SubStateUpdate + errCh chan<- error + sync.Mutex + ignore map[dbus.ObjectPath]int64 + cleanIgnore int64 + } + propertiesSubscriber struct { + updateCh chan<- *PropertiesUpdate + errCh chan<- error + sync.Mutex + } +} + +// New establishes a connection to any available bus and authenticates. +// Callers should call Close() when done with the connection. +func New() (*Conn, error) { + conn, err := NewSystemConnection() + if err != nil && os.Geteuid() == 0 { + return NewSystemdConnection() + } + return conn, err +} + +// NewSystemConnection establishes a connection to the system bus and authenticates. +// Callers should call Close() when done with the connection +func NewSystemConnection() (*Conn, error) { + return NewConnection(func() (*dbus.Conn, error) { + return dbusAuthHelloConnection(dbus.SystemBusPrivate) + }) +} + +// NewUserConnection establishes a connection to the session bus and +// authenticates. This can be used to connect to systemd user instances. +// Callers should call Close() when done with the connection. +func NewUserConnection() (*Conn, error) { + return NewConnection(func() (*dbus.Conn, error) { + return dbusAuthHelloConnection(dbus.SessionBusPrivate) + }) +} + +// NewSystemdConnection establishes a private, direct connection to systemd. +// This can be used for communicating with systemd without a dbus daemon. +// Callers should call Close() when done with the connection. +func NewSystemdConnection() (*Conn, error) { + return NewConnection(func() (*dbus.Conn, error) { + // We skip Hello when talking directly to systemd. + return dbusAuthConnection(func(opts ...dbus.ConnOption) (*dbus.Conn, error) { + return dbus.Dial("unix:path=/run/systemd/private") + }) + }) +} + +// Close closes an established connection +func (c *Conn) Close() { + c.sysconn.Close() + c.sigconn.Close() +} + +// NewConnection establishes a connection to a bus using a caller-supplied function. +// This allows connecting to remote buses through a user-supplied mechanism. +// The supplied function may be called multiple times, and should return independent connections. +// The returned connection must be fully initialised: the org.freedesktop.DBus.Hello call must have succeeded, +// and any authentication should be handled by the function. +func NewConnection(dialBus func() (*dbus.Conn, error)) (*Conn, error) { + sysconn, err := dialBus() + if err != nil { + return nil, err + } + + sigconn, err := dialBus() + if err != nil { + sysconn.Close() + return nil, err + } + + c := &Conn{ + sysconn: sysconn, + sysobj: systemdObject(sysconn), + sigconn: sigconn, + sigobj: systemdObject(sigconn), + } + + c.subStateSubscriber.ignore = make(map[dbus.ObjectPath]int64) + c.jobListener.jobs = make(map[dbus.ObjectPath]chan<- string) + + // Setup the listeners on jobs so that we can get completions + c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0, + "type='signal', interface='org.freedesktop.systemd1.Manager', member='JobRemoved'") + + c.dispatch() + return c, nil +} + +// GetManagerProperty returns the value of a property on the org.freedesktop.systemd1.Manager +// interface. The value is returned in its string representation, as defined at +// https://developer.gnome.org/glib/unstable/gvariant-text.html +func (c *Conn) GetManagerProperty(prop string) (string, error) { + variant, err := c.sysobj.GetProperty("org.freedesktop.systemd1.Manager." + prop) + if err != nil { + return "", err + } + return variant.String(), nil +} + +func dbusAuthConnection(createBus func(opts ...dbus.ConnOption) (*dbus.Conn, error)) (*dbus.Conn, error) { + conn, err := createBus() + if err != nil { + return nil, err + } + + // Only use EXTERNAL method, and hardcode the uid (not username) + // to avoid a username lookup (which requires a dynamically linked + // libc) + methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(os.Getuid()))} + + err = conn.Auth(methods) + if err != nil { + conn.Close() + return nil, err + } + + return conn, nil +} + +func dbusAuthHelloConnection(createBus func(opts ...dbus.ConnOption) (*dbus.Conn, error)) (*dbus.Conn, error) { + conn, err := dbusAuthConnection(createBus) + if err != nil { + return nil, err + } + + if err = conn.Hello(); err != nil { + conn.Close() + return nil, err + } + + return conn, nil +} + +func systemdObject(conn *dbus.Conn) dbus.BusObject { + return conn.Object("org.freedesktop.systemd1", dbus.ObjectPath("/org/freedesktop/systemd1")) +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/methods.go b/vendor/github.com/coreos/go-systemd/dbus/methods.go new file mode 100644 index 0000000..5f2790a --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/methods.go @@ -0,0 +1,594 @@ +// Copyright 2015, 2018 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "errors" + "fmt" + "path" + "strconv" + + "github.com/godbus/dbus" +) + +func (c *Conn) jobComplete(signal *dbus.Signal) { + var id uint32 + var job dbus.ObjectPath + var unit string + var result string + dbus.Store(signal.Body, &id, &job, &unit, &result) + c.jobListener.Lock() + out, ok := c.jobListener.jobs[job] + if ok { + out <- result + delete(c.jobListener.jobs, job) + } + c.jobListener.Unlock() +} + +func (c *Conn) startJob(ch chan<- string, job string, args ...interface{}) (int, error) { + if ch != nil { + c.jobListener.Lock() + defer c.jobListener.Unlock() + } + + var p dbus.ObjectPath + err := c.sysobj.Call(job, 0, args...).Store(&p) + if err != nil { + return 0, err + } + + if ch != nil { + c.jobListener.jobs[p] = ch + } + + // ignore error since 0 is fine if conversion fails + jobID, _ := strconv.Atoi(path.Base(string(p))) + + return jobID, nil +} + +// StartUnit enqueues a start job and depending jobs, if any (unless otherwise +// specified by the mode string). +// +// Takes the unit to activate, plus a mode string. The mode needs to be one of +// replace, fail, isolate, ignore-dependencies, ignore-requirements. If +// "replace" the call will start the unit and its dependencies, possibly +// replacing already queued jobs that conflict with this. If "fail" the call +// will start the unit and its dependencies, but will fail if this would change +// an already queued job. If "isolate" the call will start the unit in question +// and terminate all units that aren't dependencies of it. If +// "ignore-dependencies" it will start a unit but ignore all its dependencies. +// If "ignore-requirements" it will start a unit but only ignore the +// requirement dependencies. It is not recommended to make use of the latter +// two options. +// +// If the provided channel is non-nil, a result string will be sent to it upon +// job completion: one of done, canceled, timeout, failed, dependency, skipped. +// done indicates successful execution of a job. canceled indicates that a job +// has been canceled before it finished execution. timeout indicates that the +// job timeout was reached. failed indicates that the job failed. dependency +// indicates that a job this job has been depending on failed and the job hence +// has been removed too. skipped indicates that a job was skipped because it +// didn't apply to the units current state. +// +// If no error occurs, the ID of the underlying systemd job will be returned. There +// does exist the possibility for no error to be returned, but for the returned job +// ID to be 0. In this case, the actual underlying ID is not 0 and this datapoint +// should not be considered authoritative. +// +// If an error does occur, it will be returned to the user alongside a job ID of 0. +func (c *Conn) StartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.StartUnit", name, mode) +} + +// StopUnit is similar to StartUnit but stops the specified unit rather +// than starting it. +func (c *Conn) StopUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.StopUnit", name, mode) +} + +// ReloadUnit reloads a unit. Reloading is done only if the unit is already running and fails otherwise. +func (c *Conn) ReloadUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadUnit", name, mode) +} + +// RestartUnit restarts a service. If a service is restarted that isn't +// running it will be started. +func (c *Conn) RestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.RestartUnit", name, mode) +} + +// TryRestartUnit is like RestartUnit, except that a service that isn't running +// is not affected by the restart. +func (c *Conn) TryRestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.TryRestartUnit", name, mode) +} + +// ReloadOrRestartUnit attempts a reload if the unit supports it and use a restart +// otherwise. +func (c *Conn) ReloadOrRestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadOrRestartUnit", name, mode) +} + +// ReloadOrTryRestartUnit attempts a reload if the unit supports it and use a "Try" +// flavored restart otherwise. +func (c *Conn) ReloadOrTryRestartUnit(name string, mode string, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadOrTryRestartUnit", name, mode) +} + +// StartTransientUnit() may be used to create and start a transient unit, which +// will be released as soon as it is not running or referenced anymore or the +// system is rebooted. name is the unit name including suffix, and must be +// unique. mode is the same as in StartUnit(), properties contains properties +// of the unit. +func (c *Conn) StartTransientUnit(name string, mode string, properties []Property, ch chan<- string) (int, error) { + return c.startJob(ch, "org.freedesktop.systemd1.Manager.StartTransientUnit", name, mode, properties, make([]PropertyCollection, 0)) +} + +// KillUnit takes the unit name and a UNIX signal number to send. All of the unit's +// processes are killed. +func (c *Conn) KillUnit(name string, signal int32) { + c.sysobj.Call("org.freedesktop.systemd1.Manager.KillUnit", 0, name, "all", signal).Store() +} + +// ResetFailedUnit resets the "failed" state of a specific unit. +func (c *Conn) ResetFailedUnit(name string) error { + return c.sysobj.Call("org.freedesktop.systemd1.Manager.ResetFailedUnit", 0, name).Store() +} + +// SystemState returns the systemd state. Equivalent to `systemctl is-system-running`. +func (c *Conn) SystemState() (*Property, error) { + var err error + var prop dbus.Variant + + obj := c.sysconn.Object("org.freedesktop.systemd1", "/org/freedesktop/systemd1") + err = obj.Call("org.freedesktop.DBus.Properties.Get", 0, "org.freedesktop.systemd1.Manager", "SystemState").Store(&prop) + if err != nil { + return nil, err + } + + return &Property{Name: "SystemState", Value: prop}, nil +} + +// getProperties takes the unit path and returns all of its dbus object properties, for the given dbus interface +func (c *Conn) getProperties(path dbus.ObjectPath, dbusInterface string) (map[string]interface{}, error) { + var err error + var props map[string]dbus.Variant + + if !path.IsValid() { + return nil, fmt.Errorf("invalid unit name: %v", path) + } + + obj := c.sysconn.Object("org.freedesktop.systemd1", path) + err = obj.Call("org.freedesktop.DBus.Properties.GetAll", 0, dbusInterface).Store(&props) + if err != nil { + return nil, err + } + + out := make(map[string]interface{}, len(props)) + for k, v := range props { + out[k] = v.Value() + } + + return out, nil +} + +// GetUnitProperties takes the (unescaped) unit name and returns all of its dbus object properties. +func (c *Conn) GetUnitProperties(unit string) (map[string]interface{}, error) { + path := unitPath(unit) + return c.getProperties(path, "org.freedesktop.systemd1.Unit") +} + +// GetUnitPathProperties takes the (escaped) unit path and returns all of its dbus object properties. +func (c *Conn) GetUnitPathProperties(path dbus.ObjectPath) (map[string]interface{}, error) { + return c.getProperties(path, "org.freedesktop.systemd1.Unit") +} + +func (c *Conn) getProperty(unit string, dbusInterface string, propertyName string) (*Property, error) { + var err error + var prop dbus.Variant + + path := unitPath(unit) + if !path.IsValid() { + return nil, errors.New("invalid unit name: " + unit) + } + + obj := c.sysconn.Object("org.freedesktop.systemd1", path) + err = obj.Call("org.freedesktop.DBus.Properties.Get", 0, dbusInterface, propertyName).Store(&prop) + if err != nil { + return nil, err + } + + return &Property{Name: propertyName, Value: prop}, nil +} + +func (c *Conn) GetUnitProperty(unit string, propertyName string) (*Property, error) { + return c.getProperty(unit, "org.freedesktop.systemd1.Unit", propertyName) +} + +// GetServiceProperty returns property for given service name and property name +func (c *Conn) GetServiceProperty(service string, propertyName string) (*Property, error) { + return c.getProperty(service, "org.freedesktop.systemd1.Service", propertyName) +} + +// GetUnitTypeProperties returns the extra properties for a unit, specific to the unit type. +// Valid values for unitType: Service, Socket, Target, Device, Mount, Automount, Snapshot, Timer, Swap, Path, Slice, Scope +// return "dbus.Error: Unknown interface" if the unitType is not the correct type of the unit +func (c *Conn) GetUnitTypeProperties(unit string, unitType string) (map[string]interface{}, error) { + path := unitPath(unit) + return c.getProperties(path, "org.freedesktop.systemd1."+unitType) +} + +// SetUnitProperties() may be used to modify certain unit properties at runtime. +// Not all properties may be changed at runtime, but many resource management +// settings (primarily those in systemd.cgroup(5)) may. The changes are applied +// instantly, and stored on disk for future boots, unless runtime is true, in which +// case the settings only apply until the next reboot. name is the name of the unit +// to modify. properties are the settings to set, encoded as an array of property +// name and value pairs. +func (c *Conn) SetUnitProperties(name string, runtime bool, properties ...Property) error { + return c.sysobj.Call("org.freedesktop.systemd1.Manager.SetUnitProperties", 0, name, runtime, properties).Store() +} + +func (c *Conn) GetUnitTypeProperty(unit string, unitType string, propertyName string) (*Property, error) { + return c.getProperty(unit, "org.freedesktop.systemd1."+unitType, propertyName) +} + +type UnitStatus struct { + Name string // The primary unit name as string + Description string // The human readable description string + LoadState string // The load state (i.e. whether the unit file has been loaded successfully) + ActiveState string // The active state (i.e. whether the unit is currently started or not) + SubState string // The sub state (a more fine-grained version of the active state that is specific to the unit type, which the active state is not) + Followed string // A unit that is being followed in its state by this unit, if there is any, otherwise the empty string. + Path dbus.ObjectPath // The unit object path + JobId uint32 // If there is a job queued for the job unit the numeric job id, 0 otherwise + JobType string // The job type as string + JobPath dbus.ObjectPath // The job object path +} + +type storeFunc func(retvalues ...interface{}) error + +func (c *Conn) listUnitsInternal(f storeFunc) ([]UnitStatus, error) { + result := make([][]interface{}, 0) + err := f(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + status := make([]UnitStatus, len(result)) + statusInterface := make([]interface{}, len(status)) + for i := range status { + statusInterface[i] = &status[i] + } + + err = dbus.Store(resultInterface, statusInterface...) + if err != nil { + return nil, err + } + + return status, nil +} + +// ListUnits returns an array with all currently loaded units. Note that +// units may be known by multiple names at the same time, and hence there might +// be more unit names loaded than actual units behind them. +// Also note that a unit is only loaded if it is active and/or enabled. +// Units that are both disabled and inactive will thus not be returned. +func (c *Conn) ListUnits() ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnits", 0).Store) +} + +// ListUnitsFiltered returns an array with units filtered by state. +// It takes a list of units' statuses to filter. +func (c *Conn) ListUnitsFiltered(states []string) ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitsFiltered", 0, states).Store) +} + +// ListUnitsByPatterns returns an array with units. +// It takes a list of units' statuses and names to filter. +// Note that units may be known by multiple names at the same time, +// and hence there might be more unit names loaded than actual units behind them. +func (c *Conn) ListUnitsByPatterns(states []string, patterns []string) ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitsByPatterns", 0, states, patterns).Store) +} + +// ListUnitsByNames returns an array with units. It takes a list of units' +// names and returns an UnitStatus array. Comparing to ListUnitsByPatterns +// method, this method returns statuses even for inactive or non-existing +// units. Input array should contain exact unit names, but not patterns. +// Note: Requires systemd v230 or higher +func (c *Conn) ListUnitsByNames(units []string) ([]UnitStatus, error) { + return c.listUnitsInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitsByNames", 0, units).Store) +} + +type UnitFile struct { + Path string + Type string +} + +func (c *Conn) listUnitFilesInternal(f storeFunc) ([]UnitFile, error) { + result := make([][]interface{}, 0) + err := f(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + files := make([]UnitFile, len(result)) + fileInterface := make([]interface{}, len(files)) + for i := range files { + fileInterface[i] = &files[i] + } + + err = dbus.Store(resultInterface, fileInterface...) + if err != nil { + return nil, err + } + + return files, nil +} + +// ListUnitFiles returns an array of all available units on disk. +func (c *Conn) ListUnitFiles() ([]UnitFile, error) { + return c.listUnitFilesInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitFiles", 0).Store) +} + +// ListUnitFilesByPatterns returns an array of all available units on disk matched the patterns. +func (c *Conn) ListUnitFilesByPatterns(states []string, patterns []string) ([]UnitFile, error) { + return c.listUnitFilesInternal(c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnitFilesByPatterns", 0, states, patterns).Store) +} + +type LinkUnitFileChange EnableUnitFileChange + +// LinkUnitFiles() links unit files (that are located outside of the +// usual unit search paths) into the unit search path. +// +// It takes a list of absolute paths to unit files to link and two +// booleans. The first boolean controls whether the unit shall be +// enabled for runtime only (true, /run), or persistently (false, +// /etc). +// The second controls whether symlinks pointing to other units shall +// be replaced if necessary. +// +// This call returns a list of the changes made. The list consists of +// structures with three strings: the type of the change (one of symlink +// or unlink), the file name of the symlink and the destination of the +// symlink. +func (c *Conn) LinkUnitFiles(files []string, runtime bool, force bool) ([]LinkUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.LinkUnitFiles", 0, files, runtime, force).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]LinkUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +// EnableUnitFiles() may be used to enable one or more units in the system (by +// creating symlinks to them in /etc or /run). +// +// It takes a list of unit files to enable (either just file names or full +// absolute paths if the unit files are residing outside the usual unit +// search paths), and two booleans: the first controls whether the unit shall +// be enabled for runtime only (true, /run), or persistently (false, /etc). +// The second one controls whether symlinks pointing to other units shall +// be replaced if necessary. +// +// This call returns one boolean and an array with the changes made. The +// boolean signals whether the unit files contained any enablement +// information (i.e. an [Install]) section. The changes list consists of +// structures with three strings: the type of the change (one of symlink +// or unlink), the file name of the symlink and the destination of the +// symlink. +func (c *Conn) EnableUnitFiles(files []string, runtime bool, force bool) (bool, []EnableUnitFileChange, error) { + var carries_install_info bool + + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.EnableUnitFiles", 0, files, runtime, force).Store(&carries_install_info, &result) + if err != nil { + return false, nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]EnableUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return false, nil, err + } + + return carries_install_info, changes, nil +} + +type EnableUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// DisableUnitFiles() may be used to disable one or more units in the system (by +// removing symlinks to them from /etc or /run). +// +// It takes a list of unit files to disable (either just file names or full +// absolute paths if the unit files are residing outside the usual unit +// search paths), and one boolean: whether the unit was enabled for runtime +// only (true, /run), or persistently (false, /etc). +// +// This call returns an array with the changes made. The changes list +// consists of structures with three strings: the type of the change (one of +// symlink or unlink), the file name of the symlink and the destination of the +// symlink. +func (c *Conn) DisableUnitFiles(files []string, runtime bool) ([]DisableUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.DisableUnitFiles", 0, files, runtime).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]DisableUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +type DisableUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// MaskUnitFiles masks one or more units in the system +// +// It takes three arguments: +// * list of units to mask (either just file names or full +// absolute paths if the unit files are residing outside +// the usual unit search paths) +// * runtime to specify whether the unit was enabled for runtime +// only (true, /run/systemd/..), or persistently (false, /etc/systemd/..) +// * force flag +func (c *Conn) MaskUnitFiles(files []string, runtime bool, force bool) ([]MaskUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.MaskUnitFiles", 0, files, runtime, force).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]MaskUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +type MaskUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// UnmaskUnitFiles unmasks one or more units in the system +// +// It takes two arguments: +// * list of unit files to mask (either just file names or full +// absolute paths if the unit files are residing outside +// the usual unit search paths) +// * runtime to specify whether the unit was enabled for runtime +// only (true, /run/systemd/..), or persistently (false, /etc/systemd/..) +func (c *Conn) UnmaskUnitFiles(files []string, runtime bool) ([]UnmaskUnitFileChange, error) { + result := make([][]interface{}, 0) + err := c.sysobj.Call("org.freedesktop.systemd1.Manager.UnmaskUnitFiles", 0, files, runtime).Store(&result) + if err != nil { + return nil, err + } + + resultInterface := make([]interface{}, len(result)) + for i := range result { + resultInterface[i] = result[i] + } + + changes := make([]UnmaskUnitFileChange, len(result)) + changesInterface := make([]interface{}, len(changes)) + for i := range changes { + changesInterface[i] = &changes[i] + } + + err = dbus.Store(resultInterface, changesInterface...) + if err != nil { + return nil, err + } + + return changes, nil +} + +type UnmaskUnitFileChange struct { + Type string // Type of the change (one of symlink or unlink) + Filename string // File name of the symlink + Destination string // Destination of the symlink +} + +// Reload instructs systemd to scan for and reload unit files. This is +// equivalent to a 'systemctl daemon-reload'. +func (c *Conn) Reload() error { + return c.sysobj.Call("org.freedesktop.systemd1.Manager.Reload", 0).Store() +} + +func unitPath(name string) dbus.ObjectPath { + return dbus.ObjectPath("/org/freedesktop/systemd1/unit/" + PathBusEscape(name)) +} + +// unitName returns the unescaped base element of the supplied escaped path +func unitName(dpath dbus.ObjectPath) string { + return pathBusUnescape(path.Base(string(dpath))) +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/properties.go b/vendor/github.com/coreos/go-systemd/dbus/properties.go new file mode 100644 index 0000000..6c81895 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/properties.go @@ -0,0 +1,237 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "github.com/godbus/dbus" +) + +// From the systemd docs: +// +// The properties array of StartTransientUnit() may take many of the settings +// that may also be configured in unit files. Not all parameters are currently +// accepted though, but we plan to cover more properties with future release. +// Currently you may set the Description, Slice and all dependency types of +// units, as well as RemainAfterExit, ExecStart for service units, +// TimeoutStopUSec and PIDs for scope units, and CPUAccounting, CPUShares, +// BlockIOAccounting, BlockIOWeight, BlockIOReadBandwidth, +// BlockIOWriteBandwidth, BlockIODeviceWeight, MemoryAccounting, MemoryLimit, +// DevicePolicy, DeviceAllow for services/scopes/slices. These fields map +// directly to their counterparts in unit files and as normal D-Bus object +// properties. The exception here is the PIDs field of scope units which is +// used for construction of the scope only and specifies the initial PIDs to +// add to the scope object. + +type Property struct { + Name string + Value dbus.Variant +} + +type PropertyCollection struct { + Name string + Properties []Property +} + +type execStart struct { + Path string // the binary path to execute + Args []string // an array with all arguments to pass to the executed command, starting with argument 0 + UncleanIsFailure bool // a boolean whether it should be considered a failure if the process exits uncleanly +} + +// PropExecStart sets the ExecStart service property. The first argument is a +// slice with the binary path to execute followed by the arguments to pass to +// the executed command. See +// http://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecStart= +func PropExecStart(command []string, uncleanIsFailure bool) Property { + execStarts := []execStart{ + execStart{ + Path: command[0], + Args: command, + UncleanIsFailure: uncleanIsFailure, + }, + } + + return Property{ + Name: "ExecStart", + Value: dbus.MakeVariant(execStarts), + } +} + +// PropRemainAfterExit sets the RemainAfterExit service property. See +// http://www.freedesktop.org/software/systemd/man/systemd.service.html#RemainAfterExit= +func PropRemainAfterExit(b bool) Property { + return Property{ + Name: "RemainAfterExit", + Value: dbus.MakeVariant(b), + } +} + +// PropType sets the Type service property. See +// http://www.freedesktop.org/software/systemd/man/systemd.service.html#Type= +func PropType(t string) Property { + return Property{ + Name: "Type", + Value: dbus.MakeVariant(t), + } +} + +// PropDescription sets the Description unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit#Description= +func PropDescription(desc string) Property { + return Property{ + Name: "Description", + Value: dbus.MakeVariant(desc), + } +} + +func propDependency(name string, units []string) Property { + return Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + +// PropRequires sets the Requires unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Requires= +func PropRequires(units ...string) Property { + return propDependency("Requires", units) +} + +// PropRequiresOverridable sets the RequiresOverridable unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiresOverridable= +func PropRequiresOverridable(units ...string) Property { + return propDependency("RequiresOverridable", units) +} + +// PropRequisite sets the Requisite unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Requisite= +func PropRequisite(units ...string) Property { + return propDependency("Requisite", units) +} + +// PropRequisiteOverridable sets the RequisiteOverridable unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequisiteOverridable= +func PropRequisiteOverridable(units ...string) Property { + return propDependency("RequisiteOverridable", units) +} + +// PropWants sets the Wants unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Wants= +func PropWants(units ...string) Property { + return propDependency("Wants", units) +} + +// PropBindsTo sets the BindsTo unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#BindsTo= +func PropBindsTo(units ...string) Property { + return propDependency("BindsTo", units) +} + +// PropRequiredBy sets the RequiredBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiredBy= +func PropRequiredBy(units ...string) Property { + return propDependency("RequiredBy", units) +} + +// PropRequiredByOverridable sets the RequiredByOverridable unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiredByOverridable= +func PropRequiredByOverridable(units ...string) Property { + return propDependency("RequiredByOverridable", units) +} + +// PropWantedBy sets the WantedBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#WantedBy= +func PropWantedBy(units ...string) Property { + return propDependency("WantedBy", units) +} + +// PropBoundBy sets the BoundBy unit property. See +// http://www.freedesktop.org/software/systemd/main/systemd.unit.html#BoundBy= +func PropBoundBy(units ...string) Property { + return propDependency("BoundBy", units) +} + +// PropConflicts sets the Conflicts unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Conflicts= +func PropConflicts(units ...string) Property { + return propDependency("Conflicts", units) +} + +// PropConflictedBy sets the ConflictedBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#ConflictedBy= +func PropConflictedBy(units ...string) Property { + return propDependency("ConflictedBy", units) +} + +// PropBefore sets the Before unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Before= +func PropBefore(units ...string) Property { + return propDependency("Before", units) +} + +// PropAfter sets the After unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#After= +func PropAfter(units ...string) Property { + return propDependency("After", units) +} + +// PropOnFailure sets the OnFailure unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#OnFailure= +func PropOnFailure(units ...string) Property { + return propDependency("OnFailure", units) +} + +// PropTriggers sets the Triggers unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Triggers= +func PropTriggers(units ...string) Property { + return propDependency("Triggers", units) +} + +// PropTriggeredBy sets the TriggeredBy unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#TriggeredBy= +func PropTriggeredBy(units ...string) Property { + return propDependency("TriggeredBy", units) +} + +// PropPropagatesReloadTo sets the PropagatesReloadTo unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#PropagatesReloadTo= +func PropPropagatesReloadTo(units ...string) Property { + return propDependency("PropagatesReloadTo", units) +} + +// PropRequiresMountsFor sets the RequiresMountsFor unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiresMountsFor= +func PropRequiresMountsFor(units ...string) Property { + return propDependency("RequiresMountsFor", units) +} + +// PropSlice sets the Slice unit property. See +// http://www.freedesktop.org/software/systemd/man/systemd.resource-control.html#Slice= +func PropSlice(slice string) Property { + return Property{ + Name: "Slice", + Value: dbus.MakeVariant(slice), + } +} + +// PropPids sets the PIDs field of scope units used in the initial construction +// of the scope only and specifies the initial PIDs to add to the scope object. +// See https://www.freedesktop.org/wiki/Software/systemd/ControlGroupInterface/#properties +func PropPids(pids ...uint32) Property { + return Property{ + Name: "PIDs", + Value: dbus.MakeVariant(pids), + } +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/set.go b/vendor/github.com/coreos/go-systemd/dbus/set.go new file mode 100644 index 0000000..17c5d48 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/set.go @@ -0,0 +1,47 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +type set struct { + data map[string]bool +} + +func (s *set) Add(value string) { + s.data[value] = true +} + +func (s *set) Remove(value string) { + delete(s.data, value) +} + +func (s *set) Contains(value string) (exists bool) { + _, exists = s.data[value] + return +} + +func (s *set) Length() int { + return len(s.data) +} + +func (s *set) Values() (values []string) { + for val := range s.data { + values = append(values, val) + } + return +} + +func newSet() *set { + return &set{make(map[string]bool)} +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/subscription.go b/vendor/github.com/coreos/go-systemd/dbus/subscription.go new file mode 100644 index 0000000..f6d7a08 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/subscription.go @@ -0,0 +1,333 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "errors" + "log" + "time" + + "github.com/godbus/dbus" +) + +const ( + cleanIgnoreInterval = int64(10 * time.Second) + ignoreInterval = int64(30 * time.Millisecond) +) + +// Subscribe sets up this connection to subscribe to all systemd dbus events. +// This is required before calling SubscribeUnits. When the connection closes +// systemd will automatically stop sending signals so there is no need to +// explicitly call Unsubscribe(). +func (c *Conn) Subscribe() error { + c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0, + "type='signal',interface='org.freedesktop.systemd1.Manager',member='UnitNew'") + c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0, + "type='signal',interface='org.freedesktop.DBus.Properties',member='PropertiesChanged'") + + return c.sigobj.Call("org.freedesktop.systemd1.Manager.Subscribe", 0).Store() +} + +// Unsubscribe this connection from systemd dbus events. +func (c *Conn) Unsubscribe() error { + return c.sigobj.Call("org.freedesktop.systemd1.Manager.Unsubscribe", 0).Store() +} + +func (c *Conn) dispatch() { + ch := make(chan *dbus.Signal, signalBuffer) + + c.sigconn.Signal(ch) + + go func() { + for { + signal, ok := <-ch + if !ok { + return + } + + if signal.Name == "org.freedesktop.systemd1.Manager.JobRemoved" { + c.jobComplete(signal) + } + + if c.subStateSubscriber.updateCh == nil && + c.propertiesSubscriber.updateCh == nil { + continue + } + + var unitPath dbus.ObjectPath + switch signal.Name { + case "org.freedesktop.systemd1.Manager.JobRemoved": + unitName := signal.Body[2].(string) + c.sysobj.Call("org.freedesktop.systemd1.Manager.GetUnit", 0, unitName).Store(&unitPath) + case "org.freedesktop.systemd1.Manager.UnitNew": + unitPath = signal.Body[1].(dbus.ObjectPath) + case "org.freedesktop.DBus.Properties.PropertiesChanged": + if signal.Body[0].(string) == "org.freedesktop.systemd1.Unit" { + unitPath = signal.Path + + if len(signal.Body) >= 2 { + if changed, ok := signal.Body[1].(map[string]dbus.Variant); ok { + c.sendPropertiesUpdate(unitPath, changed) + } + } + } + } + + if unitPath == dbus.ObjectPath("") { + continue + } + + c.sendSubStateUpdate(unitPath) + } + }() +} + +// SubscribeUnits returns two unbuffered channels which will receive all changed units every +// interval. Deleted units are sent as nil. +func (c *Conn) SubscribeUnits(interval time.Duration) (<-chan map[string]*UnitStatus, <-chan error) { + return c.SubscribeUnitsCustom(interval, 0, func(u1, u2 *UnitStatus) bool { return *u1 != *u2 }, nil) +} + +// SubscribeUnitsCustom is like SubscribeUnits but lets you specify the buffer +// size of the channels, the comparison function for detecting changes and a filter +// function for cutting down on the noise that your channel receives. +func (c *Conn) SubscribeUnitsCustom(interval time.Duration, buffer int, isChanged func(*UnitStatus, *UnitStatus) bool, filterUnit func(string) bool) (<-chan map[string]*UnitStatus, <-chan error) { + old := make(map[string]*UnitStatus) + statusChan := make(chan map[string]*UnitStatus, buffer) + errChan := make(chan error, buffer) + + go func() { + for { + timerChan := time.After(interval) + + units, err := c.ListUnits() + if err == nil { + cur := make(map[string]*UnitStatus) + for i := range units { + if filterUnit != nil && filterUnit(units[i].Name) { + continue + } + cur[units[i].Name] = &units[i] + } + + // add all new or changed units + changed := make(map[string]*UnitStatus) + for n, u := range cur { + if oldU, ok := old[n]; !ok || isChanged(oldU, u) { + changed[n] = u + } + delete(old, n) + } + + // add all deleted units + for oldN := range old { + changed[oldN] = nil + } + + old = cur + + if len(changed) != 0 { + statusChan <- changed + } + } else { + errChan <- err + } + + <-timerChan + } + }() + + return statusChan, errChan +} + +type SubStateUpdate struct { + UnitName string + SubState string +} + +// SetSubStateSubscriber writes to updateCh when any unit's substate changes. +// Although this writes to updateCh on every state change, the reported state +// may be more recent than the change that generated it (due to an unavoidable +// race in the systemd dbus interface). That is, this method provides a good +// way to keep a current view of all units' states, but is not guaranteed to +// show every state transition they go through. Furthermore, state changes +// will only be written to the channel with non-blocking writes. If updateCh +// is full, it attempts to write an error to errCh; if errCh is full, the error +// passes silently. +func (c *Conn) SetSubStateSubscriber(updateCh chan<- *SubStateUpdate, errCh chan<- error) { + if c == nil { + msg := "nil receiver" + select { + case errCh <- errors.New(msg): + default: + log.Printf("full error channel while reporting: %s\n", msg) + } + return + } + + c.subStateSubscriber.Lock() + defer c.subStateSubscriber.Unlock() + c.subStateSubscriber.updateCh = updateCh + c.subStateSubscriber.errCh = errCh +} + +func (c *Conn) sendSubStateUpdate(unitPath dbus.ObjectPath) { + c.subStateSubscriber.Lock() + defer c.subStateSubscriber.Unlock() + + if c.subStateSubscriber.updateCh == nil { + return + } + + isIgnored := c.shouldIgnore(unitPath) + defer c.cleanIgnore() + if isIgnored { + return + } + + info, err := c.GetUnitPathProperties(unitPath) + if err != nil { + select { + case c.subStateSubscriber.errCh <- err: + default: + log.Printf("full error channel while reporting: %s\n", err) + } + return + } + defer c.updateIgnore(unitPath, info) + + name, ok := info["Id"].(string) + if !ok { + msg := "failed to cast info.Id" + select { + case c.subStateSubscriber.errCh <- errors.New(msg): + default: + log.Printf("full error channel while reporting: %s\n", err) + } + return + } + substate, ok := info["SubState"].(string) + if !ok { + msg := "failed to cast info.SubState" + select { + case c.subStateSubscriber.errCh <- errors.New(msg): + default: + log.Printf("full error channel while reporting: %s\n", msg) + } + return + } + + update := &SubStateUpdate{name, substate} + select { + case c.subStateSubscriber.updateCh <- update: + default: + msg := "update channel is full" + select { + case c.subStateSubscriber.errCh <- errors.New(msg): + default: + log.Printf("full error channel while reporting: %s\n", msg) + } + return + } +} + +// The ignore functions work around a wart in the systemd dbus interface. +// Requesting the properties of an unloaded unit will cause systemd to send a +// pair of UnitNew/UnitRemoved signals. Because we need to get a unit's +// properties on UnitNew (as that's the only indication of a new unit coming up +// for the first time), we would enter an infinite loop if we did not attempt +// to detect and ignore these spurious signals. The signal themselves are +// indistinguishable from relevant ones, so we (somewhat hackishly) ignore an +// unloaded unit's signals for a short time after requesting its properties. +// This means that we will miss e.g. a transient unit being restarted +// *immediately* upon failure and also a transient unit being started +// immediately after requesting its status (with systemctl status, for example, +// because this causes a UnitNew signal to be sent which then causes us to fetch +// the properties). + +func (c *Conn) shouldIgnore(path dbus.ObjectPath) bool { + t, ok := c.subStateSubscriber.ignore[path] + return ok && t >= time.Now().UnixNano() +} + +func (c *Conn) updateIgnore(path dbus.ObjectPath, info map[string]interface{}) { + loadState, ok := info["LoadState"].(string) + if !ok { + return + } + + // unit is unloaded - it will trigger bad systemd dbus behavior + if loadState == "not-found" { + c.subStateSubscriber.ignore[path] = time.Now().UnixNano() + ignoreInterval + } +} + +// without this, ignore would grow unboundedly over time +func (c *Conn) cleanIgnore() { + now := time.Now().UnixNano() + if c.subStateSubscriber.cleanIgnore < now { + c.subStateSubscriber.cleanIgnore = now + cleanIgnoreInterval + + for p, t := range c.subStateSubscriber.ignore { + if t < now { + delete(c.subStateSubscriber.ignore, p) + } + } + } +} + +// PropertiesUpdate holds a map of a unit's changed properties +type PropertiesUpdate struct { + UnitName string + Changed map[string]dbus.Variant +} + +// SetPropertiesSubscriber writes to updateCh when any unit's properties +// change. Every property change reported by systemd will be sent; that is, no +// transitions will be "missed" (as they might be with SetSubStateSubscriber). +// However, state changes will only be written to the channel with non-blocking +// writes. If updateCh is full, it attempts to write an error to errCh; if +// errCh is full, the error passes silently. +func (c *Conn) SetPropertiesSubscriber(updateCh chan<- *PropertiesUpdate, errCh chan<- error) { + c.propertiesSubscriber.Lock() + defer c.propertiesSubscriber.Unlock() + c.propertiesSubscriber.updateCh = updateCh + c.propertiesSubscriber.errCh = errCh +} + +// we don't need to worry about shouldIgnore() here because +// sendPropertiesUpdate doesn't call GetProperties() +func (c *Conn) sendPropertiesUpdate(unitPath dbus.ObjectPath, changedProps map[string]dbus.Variant) { + c.propertiesSubscriber.Lock() + defer c.propertiesSubscriber.Unlock() + + if c.propertiesSubscriber.updateCh == nil { + return + } + + update := &PropertiesUpdate{unitName(unitPath), changedProps} + + select { + case c.propertiesSubscriber.updateCh <- update: + default: + msg := "update channel is full" + select { + case c.propertiesSubscriber.errCh <- errors.New(msg): + default: + log.Printf("full error channel while reporting: %s\n", msg) + } + return + } +} diff --git a/vendor/github.com/coreos/go-systemd/dbus/subscription_set.go b/vendor/github.com/coreos/go-systemd/dbus/subscription_set.go new file mode 100644 index 0000000..5b408d5 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/dbus/subscription_set.go @@ -0,0 +1,57 @@ +// Copyright 2015 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbus + +import ( + "time" +) + +// SubscriptionSet returns a subscription set which is like conn.Subscribe but +// can filter to only return events for a set of units. +type SubscriptionSet struct { + *set + conn *Conn +} + +func (s *SubscriptionSet) filter(unit string) bool { + return !s.Contains(unit) +} + +// Subscribe starts listening for dbus events for all of the units in the set. +// Returns channels identical to conn.SubscribeUnits. +func (s *SubscriptionSet) Subscribe() (<-chan map[string]*UnitStatus, <-chan error) { + // TODO: Make fully evented by using systemd 209 with properties changed values + return s.conn.SubscribeUnitsCustom(time.Second, 0, + mismatchUnitStatus, + func(unit string) bool { return s.filter(unit) }, + ) +} + +// NewSubscriptionSet returns a new subscription set. +func (conn *Conn) NewSubscriptionSet() *SubscriptionSet { + return &SubscriptionSet{newSet(), conn} +} + +// mismatchUnitStatus returns true if the provided UnitStatus objects +// are not equivalent. false is returned if the objects are equivalent. +// Only the Name, Description and state-related fields are used in +// the comparison. +func mismatchUnitStatus(u1, u2 *UnitStatus) bool { + return u1.Name != u2.Name || + u1.Description != u2.Description || + u1.LoadState != u2.LoadState || + u1.ActiveState != u2.ActiveState || + u1.SubState != u2.SubState +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/LICENSE b/vendor/github.com/cyphar/filepath-securejoin/LICENSE new file mode 100644 index 0000000..bec842f --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/LICENSE @@ -0,0 +1,28 @@ +Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +Copyright (C) 2017 SUSE LLC. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/cyphar/filepath-securejoin/README.md b/vendor/github.com/cyphar/filepath-securejoin/README.md new file mode 100644 index 0000000..49b2baa --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/README.md @@ -0,0 +1,65 @@ +## `filepath-securejoin` ## + +[![Build Status](https://travis-ci.org/cyphar/filepath-securejoin.svg?branch=master)](https://travis-ci.org/cyphar/filepath-securejoin) + +An implementation of `SecureJoin`, a [candidate for inclusion in the Go +standard library][go#20126]. The purpose of this function is to be a "secure" +alternative to `filepath.Join`, and in particular it provides certain +guarantees that are not provided by `filepath.Join`. + +This is the function prototype: + +```go +func SecureJoin(root, unsafePath string) (string, error) +``` + +This library **guarantees** the following: + +* If no error is set, the resulting string **must** be a child path of + `SecureJoin` and will not contain any symlink path components (they will all + be expanded). + +* When expanding symlinks, all symlink path components **must** be resolved + relative to the provided root. In particular, this can be considered a + userspace implementation of how `chroot(2)` operates on file paths. Note that + these symlinks will **not** be expanded lexically (`filepath.Clean` is not + called on the input before processing). + +* Non-existant path components are unaffected by `SecureJoin` (similar to + `filepath.EvalSymlinks`'s semantics). + +* The returned path will always be `filepath.Clean`ed and thus not contain any + `..` components. + +A (trivial) implementation of this function on GNU/Linux systems could be done +with the following (note that this requires root privileges and is far more +opaque than the implementation in this library, and also requires that +`readlink` is inside the `root` path): + +```go +package securejoin + +import ( + "os/exec" + "path/filepath" +) + +func SecureJoin(root, unsafePath string) (string, error) { + unsafePath = string(filepath.Separator) + unsafePath + cmd := exec.Command("chroot", root, + "readlink", "--canonicalize-missing", "--no-newline", unsafePath) + output, err := cmd.CombinedOutput() + if err != nil { + return "", err + } + expanded := string(output) + return filepath.Join(root, expanded), nil +} +``` + +[go#20126]: https://github.com/golang/go/issues/20126 + +### License ### + +The license of this project is the same as Go, which is a BSD 3-clause license +available in the `LICENSE` file. diff --git a/vendor/github.com/cyphar/filepath-securejoin/join.go b/vendor/github.com/cyphar/filepath-securejoin/join.go new file mode 100644 index 0000000..c4ca3d7 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/join.go @@ -0,0 +1,134 @@ +// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +// Copyright (C) 2017 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package securejoin is an implementation of the hopefully-soon-to-be-included +// SecureJoin helper that is meant to be part of the "path/filepath" package. +// The purpose of this project is to provide a PoC implementation to make the +// SecureJoin proposal (https://github.com/golang/go/issues/20126) more +// tangible. +package securejoin + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "syscall" + + "github.com/pkg/errors" +) + +// ErrSymlinkLoop is returned by SecureJoinVFS when too many symlinks have been +// evaluated in attempting to securely join the two given paths. +var ErrSymlinkLoop = errors.Wrap(syscall.ELOOP, "secure join") + +// IsNotExist tells you if err is an error that implies that either the path +// accessed does not exist (or path components don't exist). This is +// effectively a more broad version of os.IsNotExist. +func IsNotExist(err error) bool { + // If it's a bone-fide ENOENT just bail. + if os.IsNotExist(errors.Cause(err)) { + return true + } + + // Check that it's not actually an ENOTDIR, which in some cases is a more + // convoluted case of ENOENT (usually involving weird paths). + var errno error + switch err := errors.Cause(err).(type) { + case *os.PathError: + errno = err.Err + case *os.LinkError: + errno = err.Err + case *os.SyscallError: + errno = err.Err + } + return errno == syscall.ENOTDIR || errno == syscall.ENOENT +} + +// SecureJoinVFS joins the two given path components (similar to Join) except +// that the returned path is guaranteed to be scoped inside the provided root +// path (when evaluated). Any symbolic links in the path are evaluated with the +// given root treated as the root of the filesystem, similar to a chroot. The +// filesystem state is evaluated through the given VFS interface (if nil, the +// standard os.* family of functions are used). +// +// Note that the guarantees provided by this function only apply if the path +// components in the returned string are not modified (in other words are not +// replaced with symlinks on the filesystem) after this function has returned. +// Such a symlink race is necessarily out-of-scope of SecureJoin. +func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { + // Use the os.* VFS implementation if none was specified. + if vfs == nil { + vfs = osVFS{} + } + + var path bytes.Buffer + n := 0 + for unsafePath != "" { + if n > 255 { + return "", ErrSymlinkLoop + } + + // Next path component, p. + i := strings.IndexRune(unsafePath, filepath.Separator) + var p string + if i == -1 { + p, unsafePath = unsafePath, "" + } else { + p, unsafePath = unsafePath[:i], unsafePath[i+1:] + } + + // Create a cleaned path, using the lexical semantics of /../a, to + // create a "scoped" path component which can safely be joined to fullP + // for evaluation. At this point, path.String() doesn't contain any + // symlink components. + cleanP := filepath.Clean(string(filepath.Separator) + path.String() + p) + if cleanP == string(filepath.Separator) { + path.Reset() + continue + } + fullP := filepath.Clean(root + cleanP) + + // Figure out whether the path is a symlink. + fi, err := vfs.Lstat(fullP) + if err != nil && !IsNotExist(err) { + return "", err + } + // Treat non-existent path components the same as non-symlinks (we + // can't do any better here). + if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 { + path.WriteString(p) + path.WriteRune(filepath.Separator) + continue + } + + // Only increment when we actually dereference a link. + n++ + + // It's a symlink, expand it by prepending it to the yet-unparsed path. + dest, err := vfs.Readlink(fullP) + if err != nil { + return "", err + } + // Absolute symlinks reset any work we've already done. + if filepath.IsAbs(dest) { + path.Reset() + } + unsafePath = dest + string(filepath.Separator) + unsafePath + } + + // We have to clean path.String() here because it may contain '..' + // components that are entirely lexical, but would be misleading otherwise. + // And finally do a final clean to ensure that root is also lexically + // clean. + fullP := filepath.Clean(string(filepath.Separator) + path.String()) + return filepath.Clean(root + fullP), nil +} + +// SecureJoin is a wrapper around SecureJoinVFS that just uses the os.* library +// of functions as the VFS. If in doubt, use this function over SecureJoinVFS. +func SecureJoin(root, unsafePath string) (string, error) { + return SecureJoinVFS(root, unsafePath, nil) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/vendor.conf b/vendor/github.com/cyphar/filepath-securejoin/vendor.conf new file mode 100644 index 0000000..66bb574 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/vendor.conf @@ -0,0 +1 @@ +github.com/pkg/errors v0.8.0 diff --git a/vendor/github.com/cyphar/filepath-securejoin/vfs.go b/vendor/github.com/cyphar/filepath-securejoin/vfs.go new file mode 100644 index 0000000..a82a5ea --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/vfs.go @@ -0,0 +1,41 @@ +// Copyright (C) 2017 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package securejoin + +import "os" + +// In future this should be moved into a separate package, because now there +// are several projects (umoci and go-mtree) that are using this sort of +// interface. + +// VFS is the minimal interface necessary to use SecureJoinVFS. A nil VFS is +// equivalent to using the standard os.* family of functions. This is mainly +// used for the purposes of mock testing, but also can be used to otherwise use +// SecureJoin with VFS-like system. +type VFS interface { + // Lstat returns a FileInfo describing the named file. If the file is a + // symbolic link, the returned FileInfo describes the symbolic link. Lstat + // makes no attempt to follow the link. These semantics are identical to + // os.Lstat. + Lstat(name string) (os.FileInfo, error) + + // Readlink returns the destination of the named symbolic link. These + // semantics are identical to os.Readlink. + Readlink(name string) (string, error) +} + +// osVFS is the "nil" VFS, in that it just passes everything through to the os +// module. +type osVFS struct{} + +// Lstat returns a FileInfo describing the named file. If the file is a +// symbolic link, the returned FileInfo describes the symbolic link. Lstat +// makes no attempt to follow the link. These semantics are identical to +// os.Lstat. +func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) } + +// Readlink returns the destination of the named symbolic link. These +// semantics are identical to os.Readlink. +func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) } diff --git a/vendor/github.com/docker/go-units/LICENSE b/vendor/github.com/docker/go-units/LICENSE new file mode 100644 index 0000000..b55b37b --- /dev/null +++ b/vendor/github.com/docker/go-units/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2015 Docker, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/docker/go-units/README.md b/vendor/github.com/docker/go-units/README.md new file mode 100644 index 0000000..4f70a4e --- /dev/null +++ b/vendor/github.com/docker/go-units/README.md @@ -0,0 +1,16 @@ +[![GoDoc](https://godoc.org/github.com/docker/go-units?status.svg)](https://godoc.org/github.com/docker/go-units) + +# Introduction + +go-units is a library to transform human friendly measurements into machine friendly values. + +## Usage + +See the [docs in godoc](https://godoc.org/github.com/docker/go-units) for examples and documentation. + +## Copyright and license + +Copyright © 2015 Docker, Inc. + +go-units is licensed under the Apache License, Version 2.0. +See [LICENSE](LICENSE) for the full text of the license. diff --git a/vendor/github.com/docker/go-units/duration.go b/vendor/github.com/docker/go-units/duration.go new file mode 100644 index 0000000..ba02af2 --- /dev/null +++ b/vendor/github.com/docker/go-units/duration.go @@ -0,0 +1,35 @@ +// Package units provides helper function to parse and print size and time units +// in human-readable format. +package units + +import ( + "fmt" + "time" +) + +// HumanDuration returns a human-readable approximation of a duration +// (eg. "About a minute", "4 hours ago", etc.). +func HumanDuration(d time.Duration) string { + if seconds := int(d.Seconds()); seconds < 1 { + return "Less than a second" + } else if seconds == 1 { + return "1 second" + } else if seconds < 60 { + return fmt.Sprintf("%d seconds", seconds) + } else if minutes := int(d.Minutes()); minutes == 1 { + return "About a minute" + } else if minutes < 46 { + return fmt.Sprintf("%d minutes", minutes) + } else if hours := int(d.Hours() + 0.5); hours == 1 { + return "About an hour" + } else if hours < 48 { + return fmt.Sprintf("%d hours", hours) + } else if hours < 24*7*2 { + return fmt.Sprintf("%d days", hours/24) + } else if hours < 24*30*2 { + return fmt.Sprintf("%d weeks", hours/24/7) + } else if hours < 24*365*2 { + return fmt.Sprintf("%d months", hours/24/30) + } + return fmt.Sprintf("%d years", int(d.Hours())/24/365) +} diff --git a/vendor/github.com/docker/go-units/size.go b/vendor/github.com/docker/go-units/size.go new file mode 100644 index 0000000..85f6ab0 --- /dev/null +++ b/vendor/github.com/docker/go-units/size.go @@ -0,0 +1,108 @@ +package units + +import ( + "fmt" + "regexp" + "strconv" + "strings" +) + +// See: http://en.wikipedia.org/wiki/Binary_prefix +const ( + // Decimal + + KB = 1000 + MB = 1000 * KB + GB = 1000 * MB + TB = 1000 * GB + PB = 1000 * TB + + // Binary + + KiB = 1024 + MiB = 1024 * KiB + GiB = 1024 * MiB + TiB = 1024 * GiB + PiB = 1024 * TiB +) + +type unitMap map[string]int64 + +var ( + decimalMap = unitMap{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB} + binaryMap = unitMap{"k": KiB, "m": MiB, "g": GiB, "t": TiB, "p": PiB} + sizeRegex = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[iI]?[bB]?$`) +) + +var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"} +var binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"} + +func getSizeAndUnit(size float64, base float64, _map []string) (float64, string) { + i := 0 + unitsLimit := len(_map) - 1 + for size >= base && i < unitsLimit { + size = size / base + i++ + } + return size, _map[i] +} + +// CustomSize returns a human-readable approximation of a size +// using custom format. +func CustomSize(format string, size float64, base float64, _map []string) string { + size, unit := getSizeAndUnit(size, base, _map) + return fmt.Sprintf(format, size, unit) +} + +// HumanSizeWithPrecision allows the size to be in any precision, +// instead of 4 digit precision used in units.HumanSize. +func HumanSizeWithPrecision(size float64, precision int) string { + size, unit := getSizeAndUnit(size, 1000.0, decimapAbbrs) + return fmt.Sprintf("%.*g%s", precision, size, unit) +} + +// HumanSize returns a human-readable approximation of a size +// capped at 4 valid numbers (eg. "2.746 MB", "796 KB"). +func HumanSize(size float64) string { + return HumanSizeWithPrecision(size, 4) +} + +// BytesSize returns a human-readable size in bytes, kibibytes, +// mebibytes, gibibytes, or tebibytes (eg. "44kiB", "17MiB"). +func BytesSize(size float64) string { + return CustomSize("%.4g%s", size, 1024.0, binaryAbbrs) +} + +// FromHumanSize returns an integer from a human-readable specification of a +// size using SI standard (eg. "44kB", "17MB"). +func FromHumanSize(size string) (int64, error) { + return parseSize(size, decimalMap) +} + +// RAMInBytes parses a human-readable string representing an amount of RAM +// in bytes, kibibytes, mebibytes, gibibytes, or tebibytes and +// returns the number of bytes, or -1 if the string is unparseable. +// Units are case-insensitive, and the 'b' suffix is optional. +func RAMInBytes(size string) (int64, error) { + return parseSize(size, binaryMap) +} + +// Parses the human-readable size string into the amount it represents. +func parseSize(sizeStr string, uMap unitMap) (int64, error) { + matches := sizeRegex.FindStringSubmatch(sizeStr) + if len(matches) != 4 { + return -1, fmt.Errorf("invalid size: '%s'", sizeStr) + } + + size, err := strconv.ParseFloat(matches[1], 64) + if err != nil { + return -1, err + } + + unitPrefix := strings.ToLower(matches[3]) + if mul, ok := uMap[unitPrefix]; ok { + size *= float64(mul) + } + + return int64(size), nil +} diff --git a/vendor/github.com/docker/go-units/ulimit.go b/vendor/github.com/docker/go-units/ulimit.go new file mode 100644 index 0000000..5ac7fd8 --- /dev/null +++ b/vendor/github.com/docker/go-units/ulimit.go @@ -0,0 +1,118 @@ +package units + +import ( + "fmt" + "strconv" + "strings" +) + +// Ulimit is a human friendly version of Rlimit. +type Ulimit struct { + Name string + Hard int64 + Soft int64 +} + +// Rlimit specifies the resource limits, such as max open files. +type Rlimit struct { + Type int `json:"type,omitempty"` + Hard uint64 `json:"hard,omitempty"` + Soft uint64 `json:"soft,omitempty"` +} + +const ( + // magic numbers for making the syscall + // some of these are defined in the syscall package, but not all. + // Also since Windows client doesn't get access to the syscall package, need to + // define these here + rlimitAs = 9 + rlimitCore = 4 + rlimitCPU = 0 + rlimitData = 2 + rlimitFsize = 1 + rlimitLocks = 10 + rlimitMemlock = 8 + rlimitMsgqueue = 12 + rlimitNice = 13 + rlimitNofile = 7 + rlimitNproc = 6 + rlimitRss = 5 + rlimitRtprio = 14 + rlimitRttime = 15 + rlimitSigpending = 11 + rlimitStack = 3 +) + +var ulimitNameMapping = map[string]int{ + //"as": rlimitAs, // Disabled since this doesn't seem usable with the way Docker inits a container. + "core": rlimitCore, + "cpu": rlimitCPU, + "data": rlimitData, + "fsize": rlimitFsize, + "locks": rlimitLocks, + "memlock": rlimitMemlock, + "msgqueue": rlimitMsgqueue, + "nice": rlimitNice, + "nofile": rlimitNofile, + "nproc": rlimitNproc, + "rss": rlimitRss, + "rtprio": rlimitRtprio, + "rttime": rlimitRttime, + "sigpending": rlimitSigpending, + "stack": rlimitStack, +} + +// ParseUlimit parses and returns a Ulimit from the specified string. +func ParseUlimit(val string) (*Ulimit, error) { + parts := strings.SplitN(val, "=", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("invalid ulimit argument: %s", val) + } + + if _, exists := ulimitNameMapping[parts[0]]; !exists { + return nil, fmt.Errorf("invalid ulimit type: %s", parts[0]) + } + + var ( + soft int64 + hard = &soft // default to soft in case no hard was set + temp int64 + err error + ) + switch limitVals := strings.Split(parts[1], ":"); len(limitVals) { + case 2: + temp, err = strconv.ParseInt(limitVals[1], 10, 64) + if err != nil { + return nil, err + } + hard = &temp + fallthrough + case 1: + soft, err = strconv.ParseInt(limitVals[0], 10, 64) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("too many limit value arguments - %s, can only have up to two, `soft[:hard]`", parts[1]) + } + + if soft > *hard { + return nil, fmt.Errorf("ulimit soft limit must be less than or equal to hard limit: %d > %d", soft, *hard) + } + + return &Ulimit{Name: parts[0], Soft: soft, Hard: *hard}, nil +} + +// GetRlimit returns the RLimit corresponding to Ulimit. +func (u *Ulimit) GetRlimit() (*Rlimit, error) { + t, exists := ulimitNameMapping[u.Name] + if !exists { + return nil, fmt.Errorf("invalid ulimit name %s", u.Name) + } + + return &Rlimit{Type: t, Soft: uint64(u.Soft), Hard: uint64(u.Hard)}, nil +} + +func (u *Ulimit) String() string { + return fmt.Sprintf("%s=%d:%d", u.Name, u.Soft, u.Hard) +} diff --git a/vendor/github.com/godbus/dbus/LICENSE b/vendor/github.com/godbus/dbus/LICENSE new file mode 100644 index 0000000..670d88f --- /dev/null +++ b/vendor/github.com/godbus/dbus/LICENSE @@ -0,0 +1,25 @@ +Copyright (c) 2013, Georg Reinke (), Google +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/godbus/dbus/README.markdown b/vendor/github.com/godbus/dbus/README.markdown new file mode 100644 index 0000000..fd29648 --- /dev/null +++ b/vendor/github.com/godbus/dbus/README.markdown @@ -0,0 +1,44 @@ +[![Build Status](https://travis-ci.org/godbus/dbus.svg?branch=master)](https://travis-ci.org/godbus/dbus) + +dbus +---- + +dbus is a simple library that implements native Go client bindings for the +D-Bus message bus system. + +### Features + +* Complete native implementation of the D-Bus message protocol +* Go-like API (channels for signals / asynchronous method calls, Goroutine-safe connections) +* Subpackages that help with the introspection / property interfaces + +### Installation + +This packages requires Go 1.7. If you installed it and set up your GOPATH, just run: + +``` +go get github.com/godbus/dbus +``` + +If you want to use the subpackages, you can install them the same way. + +### Usage + +The complete package documentation and some simple examples are available at +[godoc.org](http://godoc.org/github.com/godbus/dbus). Also, the +[_examples](https://github.com/godbus/dbus/tree/master/_examples) directory +gives a short overview over the basic usage. + +#### Projects using godbus +- [notify](https://github.com/esiqveland/notify) provides desktop notifications over dbus into a library. +- [go-bluetooth](https://github.com/muka/go-bluetooth) provides a bluetooth client over bluez dbus API. + +Please note that the API is considered unstable for now and may change without +further notice. + +### License + +go.dbus is available under the Simplified BSD License; see LICENSE for the full +text. + +Nearly all of the credit for this library goes to github.com/guelfey/go.dbus. diff --git a/vendor/github.com/godbus/dbus/auth.go b/vendor/github.com/godbus/dbus/auth.go new file mode 100644 index 0000000..b0dcb54 --- /dev/null +++ b/vendor/github.com/godbus/dbus/auth.go @@ -0,0 +1,252 @@ +package dbus + +import ( + "bufio" + "bytes" + "errors" + "io" + "os" + "strconv" +) + +// AuthStatus represents the Status of an authentication mechanism. +type AuthStatus byte + +const ( + // AuthOk signals that authentication is finished; the next command + // from the server should be an OK. + AuthOk AuthStatus = iota + + // AuthContinue signals that additional data is needed; the next command + // from the server should be a DATA. + AuthContinue + + // AuthError signals an error; the server sent invalid data or some + // other unexpected thing happened and the current authentication + // process should be aborted. + AuthError +) + +type authState byte + +const ( + waitingForData authState = iota + waitingForOk + waitingForReject +) + +// Auth defines the behaviour of an authentication mechanism. +type Auth interface { + // Return the name of the mechnism, the argument to the first AUTH command + // and the next status. + FirstData() (name, resp []byte, status AuthStatus) + + // Process the given DATA command, and return the argument to the DATA + // command and the next status. If len(resp) == 0, no DATA command is sent. + HandleData(data []byte) (resp []byte, status AuthStatus) +} + +// Auth authenticates the connection, trying the given list of authentication +// mechanisms (in that order). If nil is passed, the EXTERNAL and +// DBUS_COOKIE_SHA1 mechanisms are tried for the current user. For private +// connections, this method must be called before sending any messages to the +// bus. Auth must not be called on shared connections. +func (conn *Conn) Auth(methods []Auth) error { + if methods == nil { + uid := strconv.Itoa(os.Getuid()) + methods = []Auth{AuthExternal(uid), AuthCookieSha1(uid, getHomeDir())} + } + in := bufio.NewReader(conn.transport) + err := conn.transport.SendNullByte() + if err != nil { + return err + } + err = authWriteLine(conn.transport, []byte("AUTH")) + if err != nil { + return err + } + s, err := authReadLine(in) + if err != nil { + return err + } + if len(s) < 2 || !bytes.Equal(s[0], []byte("REJECTED")) { + return errors.New("dbus: authentication protocol error") + } + s = s[1:] + for _, v := range s { + for _, m := range methods { + if name, data, status := m.FirstData(); bytes.Equal(v, name) { + var ok bool + err = authWriteLine(conn.transport, []byte("AUTH"), []byte(v), data) + if err != nil { + return err + } + switch status { + case AuthOk: + err, ok = conn.tryAuth(m, waitingForOk, in) + case AuthContinue: + err, ok = conn.tryAuth(m, waitingForData, in) + default: + panic("dbus: invalid authentication status") + } + if err != nil { + return err + } + if ok { + if conn.transport.SupportsUnixFDs() { + err = authWriteLine(conn, []byte("NEGOTIATE_UNIX_FD")) + if err != nil { + return err + } + line, err := authReadLine(in) + if err != nil { + return err + } + switch { + case bytes.Equal(line[0], []byte("AGREE_UNIX_FD")): + conn.EnableUnixFDs() + conn.unixFD = true + case bytes.Equal(line[0], []byte("ERROR")): + default: + return errors.New("dbus: authentication protocol error") + } + } + err = authWriteLine(conn.transport, []byte("BEGIN")) + if err != nil { + return err + } + go conn.inWorker() + return nil + } + } + } + } + return errors.New("dbus: authentication failed") +} + +// tryAuth tries to authenticate with m as the mechanism, using state as the +// initial authState and in for reading input. It returns (nil, true) on +// success, (nil, false) on a REJECTED and (someErr, false) if some other +// error occured. +func (conn *Conn) tryAuth(m Auth, state authState, in *bufio.Reader) (error, bool) { + for { + s, err := authReadLine(in) + if err != nil { + return err, false + } + switch { + case state == waitingForData && string(s[0]) == "DATA": + if len(s) != 2 { + err = authWriteLine(conn.transport, []byte("ERROR")) + if err != nil { + return err, false + } + continue + } + data, status := m.HandleData(s[1]) + switch status { + case AuthOk, AuthContinue: + if len(data) != 0 { + err = authWriteLine(conn.transport, []byte("DATA"), data) + if err != nil { + return err, false + } + } + if status == AuthOk { + state = waitingForOk + } + case AuthError: + err = authWriteLine(conn.transport, []byte("ERROR")) + if err != nil { + return err, false + } + } + case state == waitingForData && string(s[0]) == "REJECTED": + return nil, false + case state == waitingForData && string(s[0]) == "ERROR": + err = authWriteLine(conn.transport, []byte("CANCEL")) + if err != nil { + return err, false + } + state = waitingForReject + case state == waitingForData && string(s[0]) == "OK": + if len(s) != 2 { + err = authWriteLine(conn.transport, []byte("CANCEL")) + if err != nil { + return err, false + } + state = waitingForReject + } + conn.uuid = string(s[1]) + return nil, true + case state == waitingForData: + err = authWriteLine(conn.transport, []byte("ERROR")) + if err != nil { + return err, false + } + case state == waitingForOk && string(s[0]) == "OK": + if len(s) != 2 { + err = authWriteLine(conn.transport, []byte("CANCEL")) + if err != nil { + return err, false + } + state = waitingForReject + } + conn.uuid = string(s[1]) + return nil, true + case state == waitingForOk && string(s[0]) == "REJECTED": + return nil, false + case state == waitingForOk && (string(s[0]) == "DATA" || + string(s[0]) == "ERROR"): + + err = authWriteLine(conn.transport, []byte("CANCEL")) + if err != nil { + return err, false + } + state = waitingForReject + case state == waitingForOk: + err = authWriteLine(conn.transport, []byte("ERROR")) + if err != nil { + return err, false + } + case state == waitingForReject && string(s[0]) == "REJECTED": + return nil, false + case state == waitingForReject: + return errors.New("dbus: authentication protocol error"), false + default: + panic("dbus: invalid auth state") + } + } +} + +// authReadLine reads a line and separates it into its fields. +func authReadLine(in *bufio.Reader) ([][]byte, error) { + data, err := in.ReadBytes('\n') + if err != nil { + return nil, err + } + data = bytes.TrimSuffix(data, []byte("\r\n")) + return bytes.Split(data, []byte{' '}), nil +} + +// authWriteLine writes the given line in the authentication protocol format +// (elements of data separated by a " " and terminated by "\r\n"). +func authWriteLine(out io.Writer, data ...[]byte) error { + buf := make([]byte, 0) + for i, v := range data { + buf = append(buf, v...) + if i != len(data)-1 { + buf = append(buf, ' ') + } + } + buf = append(buf, '\r') + buf = append(buf, '\n') + n, err := out.Write(buf) + if err != nil { + return err + } + if n != len(buf) { + return io.ErrUnexpectedEOF + } + return nil +} diff --git a/vendor/github.com/godbus/dbus/auth_anonymous.go b/vendor/github.com/godbus/dbus/auth_anonymous.go new file mode 100644 index 0000000..75f3ad3 --- /dev/null +++ b/vendor/github.com/godbus/dbus/auth_anonymous.go @@ -0,0 +1,16 @@ +package dbus + +// AuthAnonymous returns an Auth that uses the ANONYMOUS mechanism. +func AuthAnonymous() Auth { + return &authAnonymous{} +} + +type authAnonymous struct{} + +func (a *authAnonymous) FirstData() (name, resp []byte, status AuthStatus) { + return []byte("ANONYMOUS"), nil, AuthOk +} + +func (a *authAnonymous) HandleData(data []byte) (resp []byte, status AuthStatus) { + return nil, AuthError +} diff --git a/vendor/github.com/godbus/dbus/auth_external.go b/vendor/github.com/godbus/dbus/auth_external.go new file mode 100644 index 0000000..7e376d3 --- /dev/null +++ b/vendor/github.com/godbus/dbus/auth_external.go @@ -0,0 +1,26 @@ +package dbus + +import ( + "encoding/hex" +) + +// AuthExternal returns an Auth that authenticates as the given user with the +// EXTERNAL mechanism. +func AuthExternal(user string) Auth { + return authExternal{user} +} + +// AuthExternal implements the EXTERNAL authentication mechanism. +type authExternal struct { + user string +} + +func (a authExternal) FirstData() ([]byte, []byte, AuthStatus) { + b := make([]byte, 2*len(a.user)) + hex.Encode(b, []byte(a.user)) + return []byte("EXTERNAL"), b, AuthOk +} + +func (a authExternal) HandleData(b []byte) ([]byte, AuthStatus) { + return nil, AuthError +} diff --git a/vendor/github.com/godbus/dbus/auth_sha1.go b/vendor/github.com/godbus/dbus/auth_sha1.go new file mode 100644 index 0000000..df15b46 --- /dev/null +++ b/vendor/github.com/godbus/dbus/auth_sha1.go @@ -0,0 +1,102 @@ +package dbus + +import ( + "bufio" + "bytes" + "crypto/rand" + "crypto/sha1" + "encoding/hex" + "os" +) + +// AuthCookieSha1 returns an Auth that authenticates as the given user with the +// DBUS_COOKIE_SHA1 mechanism. The home parameter should specify the home +// directory of the user. +func AuthCookieSha1(user, home string) Auth { + return authCookieSha1{user, home} +} + +type authCookieSha1 struct { + user, home string +} + +func (a authCookieSha1) FirstData() ([]byte, []byte, AuthStatus) { + b := make([]byte, 2*len(a.user)) + hex.Encode(b, []byte(a.user)) + return []byte("DBUS_COOKIE_SHA1"), b, AuthContinue +} + +func (a authCookieSha1) HandleData(data []byte) ([]byte, AuthStatus) { + challenge := make([]byte, len(data)/2) + _, err := hex.Decode(challenge, data) + if err != nil { + return nil, AuthError + } + b := bytes.Split(challenge, []byte{' '}) + if len(b) != 3 { + return nil, AuthError + } + context := b[0] + id := b[1] + svchallenge := b[2] + cookie := a.getCookie(context, id) + if cookie == nil { + return nil, AuthError + } + clchallenge := a.generateChallenge() + if clchallenge == nil { + return nil, AuthError + } + hash := sha1.New() + hash.Write(bytes.Join([][]byte{svchallenge, clchallenge, cookie}, []byte{':'})) + hexhash := make([]byte, 2*hash.Size()) + hex.Encode(hexhash, hash.Sum(nil)) + data = append(clchallenge, ' ') + data = append(data, hexhash...) + resp := make([]byte, 2*len(data)) + hex.Encode(resp, data) + return resp, AuthOk +} + +// getCookie searches for the cookie identified by id in context and returns +// the cookie content or nil. (Since HandleData can't return a specific error, +// but only whether an error occured, this function also doesn't bother to +// return an error.) +func (a authCookieSha1) getCookie(context, id []byte) []byte { + file, err := os.Open(a.home + "/.dbus-keyrings/" + string(context)) + if err != nil { + return nil + } + defer file.Close() + rd := bufio.NewReader(file) + for { + line, err := rd.ReadBytes('\n') + if err != nil { + return nil + } + line = line[:len(line)-1] + b := bytes.Split(line, []byte{' '}) + if len(b) != 3 { + return nil + } + if bytes.Equal(b[0], id) { + return b[2] + } + } +} + +// generateChallenge returns a random, hex-encoded challenge, or nil on error +// (see above). +func (a authCookieSha1) generateChallenge() []byte { + b := make([]byte, 16) + n, err := rand.Read(b) + if err != nil { + return nil + } + if n != 16 { + return nil + } + enc := make([]byte, 32) + hex.Encode(enc, b) + return enc +} diff --git a/vendor/github.com/godbus/dbus/call.go b/vendor/github.com/godbus/dbus/call.go new file mode 100644 index 0000000..2cb1890 --- /dev/null +++ b/vendor/github.com/godbus/dbus/call.go @@ -0,0 +1,60 @@ +package dbus + +import ( + "context" + "errors" +) + +var errSignature = errors.New("dbus: mismatched signature") + +// Call represents a pending or completed method call. +type Call struct { + Destination string + Path ObjectPath + Method string + Args []interface{} + + // Strobes when the call is complete. + Done chan *Call + + // After completion, the error status. If this is non-nil, it may be an + // error message from the peer (with Error as its type) or some other error. + Err error + + // Holds the response once the call is done. + Body []interface{} + + // tracks context and canceler + ctx context.Context + ctxCanceler context.CancelFunc +} + +func (c *Call) Context() context.Context { + if c.ctx == nil { + return context.Background() + } + + return c.ctx +} + +func (c *Call) ContextCancel() { + if c.ctxCanceler != nil { + c.ctxCanceler() + } +} + +// Store stores the body of the reply into the provided pointers. It returns +// an error if the signatures of the body and retvalues don't match, or if +// the error status is not nil. +func (c *Call) Store(retvalues ...interface{}) error { + if c.Err != nil { + return c.Err + } + + return Store(c.Body, retvalues...) +} + +func (c *Call) done() { + c.Done <- c + c.ContextCancel() +} diff --git a/vendor/github.com/godbus/dbus/conn.go b/vendor/github.com/godbus/dbus/conn.go new file mode 100644 index 0000000..b38920b --- /dev/null +++ b/vendor/github.com/godbus/dbus/conn.go @@ -0,0 +1,847 @@ +package dbus + +import ( + "context" + "errors" + "io" + "os" + "reflect" + "strings" + "sync" +) + +var ( + systemBus *Conn + systemBusLck sync.Mutex + sessionBus *Conn + sessionBusLck sync.Mutex +) + +// ErrClosed is the error returned by calls on a closed connection. +var ErrClosed = errors.New("dbus: connection closed by user") + +// Conn represents a connection to a message bus (usually, the system or +// session bus). +// +// Connections are either shared or private. Shared connections +// are shared between calls to the functions that return them. As a result, +// the methods Close, Auth and Hello must not be called on them. +// +// Multiple goroutines may invoke methods on a connection simultaneously. +type Conn struct { + transport + + busObj BusObject + unixFD bool + uuid string + + handler Handler + signalHandler SignalHandler + serialGen SerialGenerator + + names *nameTracker + calls *callTracker + outHandler *outputHandler + + eavesdropped chan<- *Message + eavesdroppedLck sync.Mutex +} + +// SessionBus returns a shared connection to the session bus, connecting to it +// if not already done. +func SessionBus() (conn *Conn, err error) { + sessionBusLck.Lock() + defer sessionBusLck.Unlock() + if sessionBus != nil { + return sessionBus, nil + } + defer func() { + if conn != nil { + sessionBus = conn + } + }() + conn, err = SessionBusPrivate() + if err != nil { + return + } + if err = conn.Auth(nil); err != nil { + conn.Close() + conn = nil + return + } + if err = conn.Hello(); err != nil { + conn.Close() + conn = nil + } + return +} + +func getSessionBusAddress() (string, error) { + if address := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); address != "" && address != "autolaunch:" { + return address, nil + + } else if address := tryDiscoverDbusSessionBusAddress(); address != "" { + os.Setenv("DBUS_SESSION_BUS_ADDRESS", address) + return address, nil + } + return getSessionBusPlatformAddress() +} + +// SessionBusPrivate returns a new private connection to the session bus. +func SessionBusPrivate(opts ...ConnOption) (*Conn, error) { + address, err := getSessionBusAddress() + if err != nil { + return nil, err + } + + return Dial(address, opts...) +} + +// SessionBusPrivate returns a new private connection to the session bus. +// +// Deprecated: use SessionBusPrivate with options instead. +func SessionBusPrivateHandler(handler Handler, signalHandler SignalHandler) (*Conn, error) { + return SessionBusPrivate(WithHandler(handler), WithSignalHandler(signalHandler)) +} + +// SystemBus returns a shared connection to the system bus, connecting to it if +// not already done. +func SystemBus() (conn *Conn, err error) { + systemBusLck.Lock() + defer systemBusLck.Unlock() + if systemBus != nil { + return systemBus, nil + } + defer func() { + if conn != nil { + systemBus = conn + } + }() + conn, err = SystemBusPrivate() + if err != nil { + return + } + if err = conn.Auth(nil); err != nil { + conn.Close() + conn = nil + return + } + if err = conn.Hello(); err != nil { + conn.Close() + conn = nil + } + return +} + +// SystemBusPrivate returns a new private connection to the system bus. +func SystemBusPrivate(opts ...ConnOption) (*Conn, error) { + return Dial(getSystemBusPlatformAddress(), opts...) +} + +// SystemBusPrivateHandler returns a new private connection to the system bus, using the provided handlers. +// +// Deprecated: use SystemBusPrivate with options instead. +func SystemBusPrivateHandler(handler Handler, signalHandler SignalHandler) (*Conn, error) { + return SystemBusPrivate(WithHandler(handler), WithSignalHandler(signalHandler)) +} + +// Dial establishes a new private connection to the message bus specified by address. +func Dial(address string, opts ...ConnOption) (*Conn, error) { + tr, err := getTransport(address) + if err != nil { + return nil, err + } + return newConn(tr, opts...) +} + +// DialHandler establishes a new private connection to the message bus specified by address, using the supplied handlers. +// +// Deprecated: use Dial with options instead. +func DialHandler(address string, handler Handler, signalHandler SignalHandler) (*Conn, error) { + return Dial(address, WithSignalHandler(signalHandler)) +} + +// ConnOption is a connection option. +type ConnOption func(conn *Conn) error + +// WithHandler overrides the default handler. +func WithHandler(handler Handler) ConnOption { + return func(conn *Conn) error { + conn.handler = handler + return nil + } +} + +// WithSignalHandler overrides the default signal handler. +func WithSignalHandler(handler SignalHandler) ConnOption { + return func(conn *Conn) error { + conn.signalHandler = handler + return nil + } +} + +// WithSerialGenerator overrides the default signals generator. +func WithSerialGenerator(gen SerialGenerator) ConnOption { + return func(conn *Conn) error { + conn.serialGen = gen + return nil + } +} + +// NewConn creates a new private *Conn from an already established connection. +func NewConn(conn io.ReadWriteCloser, opts ...ConnOption) (*Conn, error) { + return newConn(genericTransport{conn}, opts...) +} + +// NewConnHandler creates a new private *Conn from an already established connection, using the supplied handlers. +// +// Deprecated: use NewConn with options instead. +func NewConnHandler(conn io.ReadWriteCloser, handler Handler, signalHandler SignalHandler) (*Conn, error) { + return NewConn(genericTransport{conn}, WithHandler(handler), WithSignalHandler(signalHandler)) +} + +// newConn creates a new *Conn from a transport. +func newConn(tr transport, opts ...ConnOption) (*Conn, error) { + conn := new(Conn) + conn.transport = tr + for _, opt := range opts { + if err := opt(conn); err != nil { + return nil, err + } + } + conn.calls = newCallTracker() + if conn.handler == nil { + conn.handler = NewDefaultHandler() + } + if conn.signalHandler == nil { + conn.signalHandler = NewDefaultSignalHandler() + } + if conn.serialGen == nil { + conn.serialGen = newSerialGenerator() + } + conn.outHandler = &outputHandler{conn: conn} + conn.names = newNameTracker() + conn.busObj = conn.Object("org.freedesktop.DBus", "/org/freedesktop/DBus") + return conn, nil +} + +// BusObject returns the object owned by the bus daemon which handles +// administrative requests. +func (conn *Conn) BusObject() BusObject { + return conn.busObj +} + +// Close closes the connection. Any blocked operations will return with errors +// and the channels passed to Eavesdrop and Signal are closed. This method must +// not be called on shared connections. +func (conn *Conn) Close() error { + conn.outHandler.close() + if term, ok := conn.signalHandler.(Terminator); ok { + term.Terminate() + } + + if term, ok := conn.handler.(Terminator); ok { + term.Terminate() + } + + conn.eavesdroppedLck.Lock() + if conn.eavesdropped != nil { + close(conn.eavesdropped) + } + conn.eavesdroppedLck.Unlock() + + return conn.transport.Close() +} + +// Eavesdrop causes conn to send all incoming messages to the given channel +// without further processing. Method replies, errors and signals will not be +// sent to the appropiate channels and method calls will not be handled. If nil +// is passed, the normal behaviour is restored. +// +// The caller has to make sure that ch is sufficiently buffered; +// if a message arrives when a write to ch is not possible, the message is +// discarded. +func (conn *Conn) Eavesdrop(ch chan<- *Message) { + conn.eavesdroppedLck.Lock() + conn.eavesdropped = ch + conn.eavesdroppedLck.Unlock() +} + +// GetSerial returns an unused serial. +func (conn *Conn) getSerial() uint32 { + return conn.serialGen.GetSerial() +} + +// Hello sends the initial org.freedesktop.DBus.Hello call. This method must be +// called after authentication, but before sending any other messages to the +// bus. Hello must not be called for shared connections. +func (conn *Conn) Hello() error { + var s string + err := conn.busObj.Call("org.freedesktop.DBus.Hello", 0).Store(&s) + if err != nil { + return err + } + conn.names.acquireUniqueConnectionName(s) + return nil +} + +// inWorker runs in an own goroutine, reading incoming messages from the +// transport and dispatching them appropiately. +func (conn *Conn) inWorker() { + for { + msg, err := conn.ReadMessage() + if err != nil { + if _, ok := err.(InvalidMessageError); !ok { + // Some read error occured (usually EOF); we can't really do + // anything but to shut down all stuff and returns errors to all + // pending replies. + conn.Close() + conn.calls.finalizeAllWithError(err) + return + } + // invalid messages are ignored + continue + } + conn.eavesdroppedLck.Lock() + if conn.eavesdropped != nil { + select { + case conn.eavesdropped <- msg: + default: + } + conn.eavesdroppedLck.Unlock() + continue + } + conn.eavesdroppedLck.Unlock() + dest, _ := msg.Headers[FieldDestination].value.(string) + found := dest == "" || + !conn.names.uniqueNameIsKnown() || + conn.names.isKnownName(dest) + if !found { + // Eavesdropped a message, but no channel for it is registered. + // Ignore it. + continue + } + switch msg.Type { + case TypeError: + conn.serialGen.RetireSerial(conn.calls.handleDBusError(msg)) + case TypeMethodReply: + conn.serialGen.RetireSerial(conn.calls.handleReply(msg)) + case TypeSignal: + conn.handleSignal(msg) + case TypeMethodCall: + go conn.handleCall(msg) + } + + } +} + +func (conn *Conn) handleSignal(msg *Message) { + iface := msg.Headers[FieldInterface].value.(string) + member := msg.Headers[FieldMember].value.(string) + // as per http://dbus.freedesktop.org/doc/dbus-specification.html , + // sender is optional for signals. + sender, _ := msg.Headers[FieldSender].value.(string) + if iface == "org.freedesktop.DBus" && sender == "org.freedesktop.DBus" { + if member == "NameLost" { + // If we lost the name on the bus, remove it from our + // tracking list. + name, ok := msg.Body[0].(string) + if !ok { + panic("Unable to read the lost name") + } + conn.names.loseName(name) + } else if member == "NameAcquired" { + // If we acquired the name on the bus, add it to our + // tracking list. + name, ok := msg.Body[0].(string) + if !ok { + panic("Unable to read the acquired name") + } + conn.names.acquireName(name) + } + } + signal := &Signal{ + Sender: sender, + Path: msg.Headers[FieldPath].value.(ObjectPath), + Name: iface + "." + member, + Body: msg.Body, + } + conn.signalHandler.DeliverSignal(iface, member, signal) +} + +// Names returns the list of all names that are currently owned by this +// connection. The slice is always at least one element long, the first element +// being the unique name of the connection. +func (conn *Conn) Names() []string { + return conn.names.listKnownNames() +} + +// Object returns the object identified by the given destination name and path. +func (conn *Conn) Object(dest string, path ObjectPath) BusObject { + return &Object{conn, dest, path} +} + +// outWorker runs in an own goroutine, encoding and sending messages that are +// sent to conn.out. +func (conn *Conn) sendMessage(msg *Message) { + conn.sendMessageAndIfClosed(msg, func() {}) +} + +func (conn *Conn) sendMessageAndIfClosed(msg *Message, ifClosed func()) { + err := conn.outHandler.sendAndIfClosed(msg, ifClosed) + conn.calls.handleSendError(msg, err) + if err != nil { + conn.serialGen.RetireSerial(msg.serial) + } else if msg.Type != TypeMethodCall { + conn.serialGen.RetireSerial(msg.serial) + } +} + +// Send sends the given message to the message bus. You usually don't need to +// use this; use the higher-level equivalents (Call / Go, Emit and Export) +// instead. If msg is a method call and NoReplyExpected is not set, a non-nil +// call is returned and the same value is sent to ch (which must be buffered) +// once the call is complete. Otherwise, ch is ignored and a Call structure is +// returned of which only the Err member is valid. +func (conn *Conn) Send(msg *Message, ch chan *Call) *Call { + return conn.send(context.Background(), msg, ch) +} + +// SendWithContext acts like Send but takes a context +func (conn *Conn) SendWithContext(ctx context.Context, msg *Message, ch chan *Call) *Call { + return conn.send(ctx, msg, ch) +} + +func (conn *Conn) send(ctx context.Context, msg *Message, ch chan *Call) *Call { + if ctx == nil { + panic("nil context") + } + + var call *Call + ctx, canceler := context.WithCancel(ctx) + msg.serial = conn.getSerial() + if msg.Type == TypeMethodCall && msg.Flags&FlagNoReplyExpected == 0 { + if ch == nil { + ch = make(chan *Call, 5) + } else if cap(ch) == 0 { + panic("dbus: unbuffered channel passed to (*Conn).Send") + } + call = new(Call) + call.Destination, _ = msg.Headers[FieldDestination].value.(string) + call.Path, _ = msg.Headers[FieldPath].value.(ObjectPath) + iface, _ := msg.Headers[FieldInterface].value.(string) + member, _ := msg.Headers[FieldMember].value.(string) + call.Method = iface + "." + member + call.Args = msg.Body + call.Done = ch + call.ctx = ctx + call.ctxCanceler = canceler + conn.calls.track(msg.serial, call) + go func() { + <-ctx.Done() + conn.calls.handleSendError(msg, ctx.Err()) + }() + conn.sendMessageAndIfClosed(msg, func() { + conn.calls.handleSendError(msg, ErrClosed) + canceler() + }) + } else { + canceler() + call = &Call{Err: nil} + conn.sendMessageAndIfClosed(msg, func() { + call = &Call{Err: ErrClosed} + }) + } + return call +} + +// sendError creates an error message corresponding to the parameters and sends +// it to conn.out. +func (conn *Conn) sendError(err error, dest string, serial uint32) { + var e *Error + switch em := err.(type) { + case Error: + e = &em + case *Error: + e = em + case DBusError: + name, body := em.DBusError() + e = NewError(name, body) + default: + e = MakeFailedError(err) + } + msg := new(Message) + msg.Type = TypeError + msg.serial = conn.getSerial() + msg.Headers = make(map[HeaderField]Variant) + if dest != "" { + msg.Headers[FieldDestination] = MakeVariant(dest) + } + msg.Headers[FieldErrorName] = MakeVariant(e.Name) + msg.Headers[FieldReplySerial] = MakeVariant(serial) + msg.Body = e.Body + if len(e.Body) > 0 { + msg.Headers[FieldSignature] = MakeVariant(SignatureOf(e.Body...)) + } + conn.sendMessage(msg) +} + +// sendReply creates a method reply message corresponding to the parameters and +// sends it to conn.out. +func (conn *Conn) sendReply(dest string, serial uint32, values ...interface{}) { + msg := new(Message) + msg.Type = TypeMethodReply + msg.serial = conn.getSerial() + msg.Headers = make(map[HeaderField]Variant) + if dest != "" { + msg.Headers[FieldDestination] = MakeVariant(dest) + } + msg.Headers[FieldReplySerial] = MakeVariant(serial) + msg.Body = values + if len(values) > 0 { + msg.Headers[FieldSignature] = MakeVariant(SignatureOf(values...)) + } + conn.sendMessage(msg) +} + +func (conn *Conn) defaultSignalAction(fn func(h *defaultSignalHandler, ch chan<- *Signal), ch chan<- *Signal) { + if !isDefaultSignalHandler(conn.signalHandler) { + return + } + handler := conn.signalHandler.(*defaultSignalHandler) + fn(handler, ch) +} + +// Signal registers the given channel to be passed all received signal messages. +// The caller has to make sure that ch is sufficiently buffered; if a message +// arrives when a write to c is not possible, it is discarded. +// +// Multiple of these channels can be registered at the same time. +// +// These channels are "overwritten" by Eavesdrop; i.e., if there currently is a +// channel for eavesdropped messages, this channel receives all signals, and +// none of the channels passed to Signal will receive any signals. +func (conn *Conn) Signal(ch chan<- *Signal) { + conn.defaultSignalAction((*defaultSignalHandler).addSignal, ch) +} + +// RemoveSignal removes the given channel from the list of the registered channels. +func (conn *Conn) RemoveSignal(ch chan<- *Signal) { + conn.defaultSignalAction((*defaultSignalHandler).removeSignal, ch) +} + +// SupportsUnixFDs returns whether the underlying transport supports passing of +// unix file descriptors. If this is false, method calls containing unix file +// descriptors will return an error and emitted signals containing them will +// not be sent. +func (conn *Conn) SupportsUnixFDs() bool { + return conn.unixFD +} + +// Error represents a D-Bus message of type Error. +type Error struct { + Name string + Body []interface{} +} + +func NewError(name string, body []interface{}) *Error { + return &Error{name, body} +} + +func (e Error) Error() string { + if len(e.Body) >= 1 { + s, ok := e.Body[0].(string) + if ok { + return s + } + } + return e.Name +} + +// Signal represents a D-Bus message of type Signal. The name member is given in +// "interface.member" notation, e.g. org.freedesktop.D-Bus.NameLost. +type Signal struct { + Sender string + Path ObjectPath + Name string + Body []interface{} +} + +// transport is a D-Bus transport. +type transport interface { + // Read and Write raw data (for example, for the authentication protocol). + io.ReadWriteCloser + + // Send the initial null byte used for the EXTERNAL mechanism. + SendNullByte() error + + // Returns whether this transport supports passing Unix FDs. + SupportsUnixFDs() bool + + // Signal the transport that Unix FD passing is enabled for this connection. + EnableUnixFDs() + + // Read / send a message, handling things like Unix FDs. + ReadMessage() (*Message, error) + SendMessage(*Message) error +} + +var ( + transports = make(map[string]func(string) (transport, error)) +) + +func getTransport(address string) (transport, error) { + var err error + var t transport + + addresses := strings.Split(address, ";") + for _, v := range addresses { + i := strings.IndexRune(v, ':') + if i == -1 { + err = errors.New("dbus: invalid bus address (no transport)") + continue + } + f := transports[v[:i]] + if f == nil { + err = errors.New("dbus: invalid bus address (invalid or unsupported transport)") + continue + } + t, err = f(v[i+1:]) + if err == nil { + return t, nil + } + } + return nil, err +} + +// dereferenceAll returns a slice that, assuming that vs is a slice of pointers +// of arbitrary types, containes the values that are obtained from dereferencing +// all elements in vs. +func dereferenceAll(vs []interface{}) []interface{} { + for i := range vs { + v := reflect.ValueOf(vs[i]) + v = v.Elem() + vs[i] = v.Interface() + } + return vs +} + +// getKey gets a key from a the list of keys. Returns "" on error / not found... +func getKey(s, key string) string { + for _, keyEqualsValue := range strings.Split(s, ",") { + keyValue := strings.SplitN(keyEqualsValue, "=", 2) + if len(keyValue) == 2 && keyValue[0] == key { + return keyValue[1] + } + } + return "" +} + +type outputHandler struct { + conn *Conn + sendLck sync.Mutex + closed struct { + isClosed bool + lck sync.RWMutex + } +} + +func (h *outputHandler) sendAndIfClosed(msg *Message, ifClosed func()) error { + h.closed.lck.RLock() + defer h.closed.lck.RUnlock() + if h.closed.isClosed { + ifClosed() + return nil + } + h.sendLck.Lock() + defer h.sendLck.Unlock() + return h.conn.SendMessage(msg) +} + +func (h *outputHandler) close() { + h.closed.lck.Lock() + defer h.closed.lck.Unlock() + h.closed.isClosed = true +} + +type serialGenerator struct { + lck sync.Mutex + nextSerial uint32 + serialUsed map[uint32]bool +} + +func newSerialGenerator() *serialGenerator { + return &serialGenerator{ + serialUsed: map[uint32]bool{0: true}, + nextSerial: 1, + } +} + +func (gen *serialGenerator) GetSerial() uint32 { + gen.lck.Lock() + defer gen.lck.Unlock() + n := gen.nextSerial + for gen.serialUsed[n] { + n++ + } + gen.serialUsed[n] = true + gen.nextSerial = n + 1 + return n +} + +func (gen *serialGenerator) RetireSerial(serial uint32) { + gen.lck.Lock() + defer gen.lck.Unlock() + delete(gen.serialUsed, serial) +} + +type nameTracker struct { + lck sync.RWMutex + unique string + names map[string]struct{} +} + +func newNameTracker() *nameTracker { + return &nameTracker{names: map[string]struct{}{}} +} +func (tracker *nameTracker) acquireUniqueConnectionName(name string) { + tracker.lck.Lock() + defer tracker.lck.Unlock() + tracker.unique = name +} +func (tracker *nameTracker) acquireName(name string) { + tracker.lck.Lock() + defer tracker.lck.Unlock() + tracker.names[name] = struct{}{} +} +func (tracker *nameTracker) loseName(name string) { + tracker.lck.Lock() + defer tracker.lck.Unlock() + delete(tracker.names, name) +} + +func (tracker *nameTracker) uniqueNameIsKnown() bool { + tracker.lck.RLock() + defer tracker.lck.RUnlock() + return tracker.unique != "" +} +func (tracker *nameTracker) isKnownName(name string) bool { + tracker.lck.RLock() + defer tracker.lck.RUnlock() + _, ok := tracker.names[name] + return ok || name == tracker.unique +} +func (tracker *nameTracker) listKnownNames() []string { + tracker.lck.RLock() + defer tracker.lck.RUnlock() + out := make([]string, 0, len(tracker.names)+1) + out = append(out, tracker.unique) + for k := range tracker.names { + out = append(out, k) + } + return out +} + +type callTracker struct { + calls map[uint32]*Call + lck sync.RWMutex +} + +func newCallTracker() *callTracker { + return &callTracker{calls: map[uint32]*Call{}} +} + +func (tracker *callTracker) track(sn uint32, call *Call) { + tracker.lck.Lock() + tracker.calls[sn] = call + tracker.lck.Unlock() +} + +func (tracker *callTracker) handleReply(msg *Message) uint32 { + serial := msg.Headers[FieldReplySerial].value.(uint32) + tracker.lck.RLock() + _, ok := tracker.calls[serial] + tracker.lck.RUnlock() + if ok { + tracker.finalizeWithBody(serial, msg.Body) + } + return serial +} + +func (tracker *callTracker) handleDBusError(msg *Message) uint32 { + serial := msg.Headers[FieldReplySerial].value.(uint32) + tracker.lck.RLock() + _, ok := tracker.calls[serial] + tracker.lck.RUnlock() + if ok { + name, _ := msg.Headers[FieldErrorName].value.(string) + tracker.finalizeWithError(serial, Error{name, msg.Body}) + } + return serial +} + +func (tracker *callTracker) handleSendError(msg *Message, err error) { + if err == nil { + return + } + tracker.lck.RLock() + _, ok := tracker.calls[msg.serial] + tracker.lck.RUnlock() + if ok { + tracker.finalizeWithError(msg.serial, err) + } +} + +// finalize was the only func that did not strobe Done +func (tracker *callTracker) finalize(sn uint32) { + tracker.lck.Lock() + defer tracker.lck.Unlock() + c, ok := tracker.calls[sn] + if ok { + delete(tracker.calls, sn) + c.ContextCancel() + } + return +} + +func (tracker *callTracker) finalizeWithBody(sn uint32, body []interface{}) { + tracker.lck.Lock() + c, ok := tracker.calls[sn] + if ok { + delete(tracker.calls, sn) + } + tracker.lck.Unlock() + if ok { + c.Body = body + c.done() + } + return +} + +func (tracker *callTracker) finalizeWithError(sn uint32, err error) { + tracker.lck.Lock() + c, ok := tracker.calls[sn] + if ok { + delete(tracker.calls, sn) + } + tracker.lck.Unlock() + if ok { + c.Err = err + c.done() + } + return +} + +func (tracker *callTracker) finalizeAllWithError(err error) { + tracker.lck.Lock() + closedCalls := make([]*Call, 0, len(tracker.calls)) + for sn := range tracker.calls { + closedCalls = append(closedCalls, tracker.calls[sn]) + } + tracker.calls = map[uint32]*Call{} + tracker.lck.Unlock() + for _, call := range closedCalls { + call.Err = err + call.done() + } +} diff --git a/vendor/github.com/godbus/dbus/conn_darwin.go b/vendor/github.com/godbus/dbus/conn_darwin.go new file mode 100644 index 0000000..6e2e402 --- /dev/null +++ b/vendor/github.com/godbus/dbus/conn_darwin.go @@ -0,0 +1,37 @@ +package dbus + +import ( + "errors" + "fmt" + "os" + "os/exec" +) + +const defaultSystemBusAddress = "unix:path=/opt/local/var/run/dbus/system_bus_socket" + +func getSessionBusPlatformAddress() (string, error) { + cmd := exec.Command("launchctl", "getenv", "DBUS_LAUNCHD_SESSION_BUS_SOCKET") + b, err := cmd.CombinedOutput() + + if err != nil { + return "", err + } + + if len(b) == 0 { + return "", errors.New("dbus: couldn't determine address of session bus") + } + + return "unix:path=" + string(b[:len(b)-1]), nil +} + +func getSystemBusPlatformAddress() string { + address := os.Getenv("DBUS_LAUNCHD_SESSION_BUS_SOCKET") + if address != "" { + return fmt.Sprintf("unix:path=%s", address) + } + return defaultSystemBusAddress +} + +func tryDiscoverDbusSessionBusAddress() string { + return "" +} diff --git a/vendor/github.com/godbus/dbus/conn_other.go b/vendor/github.com/godbus/dbus/conn_other.go new file mode 100644 index 0000000..289044a --- /dev/null +++ b/vendor/github.com/godbus/dbus/conn_other.go @@ -0,0 +1,91 @@ +// +build !darwin + +package dbus + +import ( + "bytes" + "errors" + "fmt" + "io/ioutil" + "os" + "os/exec" + "os/user" + "path" + "strings" +) + +func getSessionBusPlatformAddress() (string, error) { + cmd := exec.Command("dbus-launch") + b, err := cmd.CombinedOutput() + + if err != nil { + return "", err + } + + i := bytes.IndexByte(b, '=') + j := bytes.IndexByte(b, '\n') + + if i == -1 || j == -1 { + return "", errors.New("dbus: couldn't determine address of session bus") + } + + env, addr := string(b[0:i]), string(b[i+1:j]) + os.Setenv(env, addr) + + return addr, nil +} + +// tryDiscoverDbusSessionBusAddress tries to discover an existing dbus session +// and return the value of its DBUS_SESSION_BUS_ADDRESS. +// It tries different techniques employed by different operating systems, +// returning the first valid address it finds, or an empty string. +// +// * /run/user//bus if this exists, it *is* the bus socket. present on +// Ubuntu 18.04 +// * /run/user//dbus-session: if this exists, it can be parsed for the bus +// address. present on Ubuntu 16.04 +// +// See https://dbus.freedesktop.org/doc/dbus-launch.1.html +func tryDiscoverDbusSessionBusAddress() string { + if runtimeDirectory, err := getRuntimeDirectory(); err == nil { + + if runUserBusFile := path.Join(runtimeDirectory, "bus"); fileExists(runUserBusFile) { + // if /run/user//bus exists, that file itself + // *is* the unix socket, so return its path + return fmt.Sprintf("unix:path=%s", runUserBusFile) + } + if runUserSessionDbusFile := path.Join(runtimeDirectory, "dbus-session"); fileExists(runUserSessionDbusFile) { + // if /run/user//dbus-session exists, it's a + // text file // containing the address of the socket, e.g.: + // DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-E1c73yNqrG + + if f, err := ioutil.ReadFile(runUserSessionDbusFile); err == nil { + fileContent := string(f) + + prefix := "DBUS_SESSION_BUS_ADDRESS=" + + if strings.HasPrefix(fileContent, prefix) { + address := strings.TrimRight(strings.TrimPrefix(fileContent, prefix), "\n\r") + return address + } + } + } + } + return "" +} + +func getRuntimeDirectory() (string, error) { + if currentUser, err := user.Current(); err != nil { + return "", err + } else { + return fmt.Sprintf("/run/user/%s", currentUser.Uid), nil + } +} + +func fileExists(filename string) bool { + if _, err := os.Stat(filename); !os.IsNotExist(err) { + return true + } else { + return false + } +} diff --git a/vendor/github.com/godbus/dbus/conn_unix.go b/vendor/github.com/godbus/dbus/conn_unix.go new file mode 100644 index 0000000..4cba8ae --- /dev/null +++ b/vendor/github.com/godbus/dbus/conn_unix.go @@ -0,0 +1,18 @@ +//+build !windows,!solaris,!darwin + +package dbus + +import ( + "os" + "fmt" +) + +const defaultSystemBusAddress = "unix:path=/var/run/dbus/system_bus_socket" + +func getSystemBusPlatformAddress() string { + address := os.Getenv("DBUS_SYSTEM_BUS_ADDRESS") + if address != "" { + return fmt.Sprintf("unix:path=%s", address) + } + return defaultSystemBusAddress +} \ No newline at end of file diff --git a/vendor/github.com/godbus/dbus/conn_windows.go b/vendor/github.com/godbus/dbus/conn_windows.go new file mode 100644 index 0000000..4291e45 --- /dev/null +++ b/vendor/github.com/godbus/dbus/conn_windows.go @@ -0,0 +1,15 @@ +//+build windows + +package dbus + +import "os" + +const defaultSystemBusAddress = "tcp:host=127.0.0.1,port=12434" + +func getSystemBusPlatformAddress() string { + address := os.Getenv("DBUS_SYSTEM_BUS_ADDRESS") + if address != "" { + return address + } + return defaultSystemBusAddress +} diff --git a/vendor/github.com/godbus/dbus/dbus.go b/vendor/github.com/godbus/dbus/dbus.go new file mode 100644 index 0000000..c6d0d3c --- /dev/null +++ b/vendor/github.com/godbus/dbus/dbus.go @@ -0,0 +1,427 @@ +package dbus + +import ( + "errors" + "fmt" + "reflect" + "strings" +) + +var ( + byteType = reflect.TypeOf(byte(0)) + boolType = reflect.TypeOf(false) + uint8Type = reflect.TypeOf(uint8(0)) + int16Type = reflect.TypeOf(int16(0)) + uint16Type = reflect.TypeOf(uint16(0)) + intType = reflect.TypeOf(int(0)) + uintType = reflect.TypeOf(uint(0)) + int32Type = reflect.TypeOf(int32(0)) + uint32Type = reflect.TypeOf(uint32(0)) + int64Type = reflect.TypeOf(int64(0)) + uint64Type = reflect.TypeOf(uint64(0)) + float64Type = reflect.TypeOf(float64(0)) + stringType = reflect.TypeOf("") + signatureType = reflect.TypeOf(Signature{""}) + objectPathType = reflect.TypeOf(ObjectPath("")) + variantType = reflect.TypeOf(Variant{Signature{""}, nil}) + interfacesType = reflect.TypeOf([]interface{}{}) + interfaceType = reflect.TypeOf((*interface{})(nil)).Elem() + unixFDType = reflect.TypeOf(UnixFD(0)) + unixFDIndexType = reflect.TypeOf(UnixFDIndex(0)) +) + +// An InvalidTypeError signals that a value which cannot be represented in the +// D-Bus wire format was passed to a function. +type InvalidTypeError struct { + Type reflect.Type +} + +func (e InvalidTypeError) Error() string { + return "dbus: invalid type " + e.Type.String() +} + +// Store copies the values contained in src to dest, which must be a slice of +// pointers. It converts slices of interfaces from src to corresponding structs +// in dest. An error is returned if the lengths of src and dest or the types of +// their elements don't match. +func Store(src []interface{}, dest ...interface{}) error { + if len(src) != len(dest) { + return errors.New("dbus.Store: length mismatch") + } + + for i := range src { + if err := storeInterfaces(src[i], dest[i]); err != nil { + return err + } + } + return nil +} + +func storeInterfaces(src, dest interface{}) error { + return store(reflect.ValueOf(dest), reflect.ValueOf(src)) +} + +func store(dest, src reflect.Value) error { + if dest.Kind() == reflect.Ptr { + return store(dest.Elem(), src) + } + switch src.Kind() { + case reflect.Slice: + return storeSlice(dest, src) + case reflect.Map: + return storeMap(dest, src) + default: + return storeBase(dest, src) + } +} + +func storeBase(dest, src reflect.Value) error { + return setDest(dest, src) +} + +func setDest(dest, src reflect.Value) error { + if !isVariant(src.Type()) && isVariant(dest.Type()) { + //special conversion for dbus.Variant + dest.Set(reflect.ValueOf(MakeVariant(src.Interface()))) + return nil + } + if isVariant(src.Type()) && !isVariant(dest.Type()) { + src = getVariantValue(src) + } + if !src.Type().ConvertibleTo(dest.Type()) { + return fmt.Errorf( + "dbus.Store: type mismatch: cannot convert %s to %s", + src.Type(), dest.Type()) + } + dest.Set(src.Convert(dest.Type())) + return nil +} + +func kindsAreCompatible(dest, src reflect.Type) bool { + switch { + case isVariant(dest): + return true + case dest.Kind() == reflect.Interface: + return true + default: + return dest.Kind() == src.Kind() + } +} + +func isConvertibleTo(dest, src reflect.Type) bool { + switch { + case isVariant(dest): + return true + case dest.Kind() == reflect.Interface: + return true + case dest.Kind() == reflect.Slice: + return src.Kind() == reflect.Slice && + isConvertibleTo(dest.Elem(), src.Elem()) + case dest.Kind() == reflect.Struct: + return src == interfacesType + default: + return src.ConvertibleTo(dest) + } +} + +func storeMap(dest, src reflect.Value) error { + switch { + case !kindsAreCompatible(dest.Type(), src.Type()): + return fmt.Errorf( + "dbus.Store: type mismatch: "+ + "map: cannot store a value of %s into %s", + src.Type(), dest.Type()) + case isVariant(dest.Type()): + return storeMapIntoVariant(dest, src) + case dest.Kind() == reflect.Interface: + return storeMapIntoInterface(dest, src) + case isConvertibleTo(dest.Type().Key(), src.Type().Key()) && + isConvertibleTo(dest.Type().Elem(), src.Type().Elem()): + return storeMapIntoMap(dest, src) + default: + return fmt.Errorf( + "dbus.Store: type mismatch: "+ + "map: cannot convert a value of %s into %s", + src.Type(), dest.Type()) + } +} + +func storeMapIntoVariant(dest, src reflect.Value) error { + dv := reflect.MakeMap(src.Type()) + err := store(dv, src) + if err != nil { + return err + } + return storeBase(dest, dv) +} + +func storeMapIntoInterface(dest, src reflect.Value) error { + var dv reflect.Value + if isVariant(src.Type().Elem()) { + //Convert variants to interface{} recursively when converting + //to interface{} + dv = reflect.MakeMap( + reflect.MapOf(src.Type().Key(), interfaceType)) + } else { + dv = reflect.MakeMap(src.Type()) + } + err := store(dv, src) + if err != nil { + return err + } + return storeBase(dest, dv) +} + +func storeMapIntoMap(dest, src reflect.Value) error { + if dest.IsNil() { + dest.Set(reflect.MakeMap(dest.Type())) + } + keys := src.MapKeys() + for _, key := range keys { + dkey := key.Convert(dest.Type().Key()) + dval := reflect.New(dest.Type().Elem()).Elem() + err := store(dval, getVariantValue(src.MapIndex(key))) + if err != nil { + return err + } + dest.SetMapIndex(dkey, dval) + } + return nil +} + +func storeSlice(dest, src reflect.Value) error { + switch { + case src.Type() == interfacesType && dest.Kind() == reflect.Struct: + //The decoder always decodes structs as slices of interface{} + return storeStruct(dest, src) + case !kindsAreCompatible(dest.Type(), src.Type()): + return fmt.Errorf( + "dbus.Store: type mismatch: "+ + "slice: cannot store a value of %s into %s", + src.Type(), dest.Type()) + case isVariant(dest.Type()): + return storeSliceIntoVariant(dest, src) + case dest.Kind() == reflect.Interface: + return storeSliceIntoInterface(dest, src) + case isConvertibleTo(dest.Type().Elem(), src.Type().Elem()): + return storeSliceIntoSlice(dest, src) + default: + return fmt.Errorf( + "dbus.Store: type mismatch: "+ + "slice: cannot convert a value of %s into %s", + src.Type(), dest.Type()) + } +} + +func storeStruct(dest, src reflect.Value) error { + if isVariant(dest.Type()) { + return storeBase(dest, src) + } + dval := make([]interface{}, 0, dest.NumField()) + dtype := dest.Type() + for i := 0; i < dest.NumField(); i++ { + field := dest.Field(i) + ftype := dtype.Field(i) + if ftype.PkgPath != "" { + continue + } + if ftype.Tag.Get("dbus") == "-" { + continue + } + dval = append(dval, field.Addr().Interface()) + } + if src.Len() != len(dval) { + return fmt.Errorf( + "dbus.Store: type mismatch: "+ + "destination struct does not have "+ + "enough fields need: %d have: %d", + src.Len(), len(dval)) + } + return Store(src.Interface().([]interface{}), dval...) +} + +func storeSliceIntoVariant(dest, src reflect.Value) error { + dv := reflect.MakeSlice(src.Type(), src.Len(), src.Cap()) + err := store(dv, src) + if err != nil { + return err + } + return storeBase(dest, dv) +} + +func storeSliceIntoInterface(dest, src reflect.Value) error { + var dv reflect.Value + if isVariant(src.Type().Elem()) { + //Convert variants to interface{} recursively when converting + //to interface{} + dv = reflect.MakeSlice(reflect.SliceOf(interfaceType), + src.Len(), src.Cap()) + } else { + dv = reflect.MakeSlice(src.Type(), src.Len(), src.Cap()) + } + err := store(dv, src) + if err != nil { + return err + } + return storeBase(dest, dv) +} + +func storeSliceIntoSlice(dest, src reflect.Value) error { + if dest.IsNil() || dest.Len() < src.Len() { + dest.Set(reflect.MakeSlice(dest.Type(), src.Len(), src.Cap())) + } + if dest.Len() != src.Len() { + return fmt.Errorf( + "dbus.Store: type mismatch: "+ + "slices are different lengths "+ + "need: %d have: %d", + src.Len(), dest.Len()) + } + for i := 0; i < src.Len(); i++ { + err := store(dest.Index(i), getVariantValue(src.Index(i))) + if err != nil { + return err + } + } + return nil +} + +func getVariantValue(in reflect.Value) reflect.Value { + if isVariant(in.Type()) { + return reflect.ValueOf(in.Interface().(Variant).Value()) + } + return in +} + +func isVariant(t reflect.Type) bool { + return t == variantType +} + +// An ObjectPath is an object path as defined by the D-Bus spec. +type ObjectPath string + +// IsValid returns whether the object path is valid. +func (o ObjectPath) IsValid() bool { + s := string(o) + if len(s) == 0 { + return false + } + if s[0] != '/' { + return false + } + if s[len(s)-1] == '/' && len(s) != 1 { + return false + } + // probably not used, but technically possible + if s == "/" { + return true + } + split := strings.Split(s[1:], "/") + for _, v := range split { + if len(v) == 0 { + return false + } + for _, c := range v { + if !isMemberChar(c) { + return false + } + } + } + return true +} + +// A UnixFD is a Unix file descriptor sent over the wire. See the package-level +// documentation for more information about Unix file descriptor passsing. +type UnixFD int32 + +// A UnixFDIndex is the representation of a Unix file descriptor in a message. +type UnixFDIndex uint32 + +// alignment returns the alignment of values of type t. +func alignment(t reflect.Type) int { + switch t { + case variantType: + return 1 + case objectPathType: + return 4 + case signatureType: + return 1 + case interfacesType: + return 4 + } + switch t.Kind() { + case reflect.Uint8: + return 1 + case reflect.Uint16, reflect.Int16: + return 2 + case reflect.Uint, reflect.Int, reflect.Uint32, reflect.Int32, reflect.String, reflect.Array, reflect.Slice, reflect.Map: + return 4 + case reflect.Uint64, reflect.Int64, reflect.Float64, reflect.Struct: + return 8 + case reflect.Ptr: + return alignment(t.Elem()) + } + return 1 +} + +// isKeyType returns whether t is a valid type for a D-Bus dict. +func isKeyType(t reflect.Type) bool { + switch t.Kind() { + case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Int16, reflect.Int32, reflect.Int64, reflect.Float64, + reflect.String, reflect.Uint, reflect.Int: + + return true + } + return false +} + +// isValidInterface returns whether s is a valid name for an interface. +func isValidInterface(s string) bool { + if len(s) == 0 || len(s) > 255 || s[0] == '.' { + return false + } + elem := strings.Split(s, ".") + if len(elem) < 2 { + return false + } + for _, v := range elem { + if len(v) == 0 { + return false + } + if v[0] >= '0' && v[0] <= '9' { + return false + } + for _, c := range v { + if !isMemberChar(c) { + return false + } + } + } + return true +} + +// isValidMember returns whether s is a valid name for a member. +func isValidMember(s string) bool { + if len(s) == 0 || len(s) > 255 { + return false + } + i := strings.Index(s, ".") + if i != -1 { + return false + } + if s[0] >= '0' && s[0] <= '9' { + return false + } + for _, c := range s { + if !isMemberChar(c) { + return false + } + } + return true +} + +func isMemberChar(c rune) bool { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || c == '_' +} diff --git a/vendor/github.com/godbus/dbus/decoder.go b/vendor/github.com/godbus/dbus/decoder.go new file mode 100644 index 0000000..5c27d3b --- /dev/null +++ b/vendor/github.com/godbus/dbus/decoder.go @@ -0,0 +1,235 @@ +package dbus + +import ( + "encoding/binary" + "io" + "reflect" +) + +type decoder struct { + in io.Reader + order binary.ByteOrder + pos int +} + +// newDecoder returns a new decoder that reads values from in. The input is +// expected to be in the given byte order. +func newDecoder(in io.Reader, order binary.ByteOrder) *decoder { + dec := new(decoder) + dec.in = in + dec.order = order + return dec +} + +// align aligns the input to the given boundary and panics on error. +func (dec *decoder) align(n int) { + if dec.pos%n != 0 { + newpos := (dec.pos + n - 1) & ^(n - 1) + empty := make([]byte, newpos-dec.pos) + if _, err := io.ReadFull(dec.in, empty); err != nil { + panic(err) + } + dec.pos = newpos + } +} + +// Calls binary.Read(dec.in, dec.order, v) and panics on read errors. +func (dec *decoder) binread(v interface{}) { + if err := binary.Read(dec.in, dec.order, v); err != nil { + panic(err) + } +} + +func (dec *decoder) Decode(sig Signature) (vs []interface{}, err error) { + defer func() { + var ok bool + v := recover() + if err, ok = v.(error); ok { + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = FormatError("unexpected EOF") + } + } + }() + vs = make([]interface{}, 0) + s := sig.str + for s != "" { + err, rem := validSingle(s, 0) + if err != nil { + return nil, err + } + v := dec.decode(s[:len(s)-len(rem)], 0) + vs = append(vs, v) + s = rem + } + return vs, nil +} + +func (dec *decoder) decode(s string, depth int) interface{} { + dec.align(alignment(typeFor(s))) + switch s[0] { + case 'y': + var b [1]byte + if _, err := dec.in.Read(b[:]); err != nil { + panic(err) + } + dec.pos++ + return b[0] + case 'b': + i := dec.decode("u", depth).(uint32) + switch { + case i == 0: + return false + case i == 1: + return true + default: + panic(FormatError("invalid value for boolean")) + } + case 'n': + var i int16 + dec.binread(&i) + dec.pos += 2 + return i + case 'i': + var i int32 + dec.binread(&i) + dec.pos += 4 + return i + case 'x': + var i int64 + dec.binread(&i) + dec.pos += 8 + return i + case 'q': + var i uint16 + dec.binread(&i) + dec.pos += 2 + return i + case 'u': + var i uint32 + dec.binread(&i) + dec.pos += 4 + return i + case 't': + var i uint64 + dec.binread(&i) + dec.pos += 8 + return i + case 'd': + var f float64 + dec.binread(&f) + dec.pos += 8 + return f + case 's': + length := dec.decode("u", depth).(uint32) + b := make([]byte, int(length)+1) + if _, err := io.ReadFull(dec.in, b); err != nil { + panic(err) + } + dec.pos += int(length) + 1 + return string(b[:len(b)-1]) + case 'o': + return ObjectPath(dec.decode("s", depth).(string)) + case 'g': + length := dec.decode("y", depth).(byte) + b := make([]byte, int(length)+1) + if _, err := io.ReadFull(dec.in, b); err != nil { + panic(err) + } + dec.pos += int(length) + 1 + sig, err := ParseSignature(string(b[:len(b)-1])) + if err != nil { + panic(err) + } + return sig + case 'v': + if depth >= 64 { + panic(FormatError("input exceeds container depth limit")) + } + var variant Variant + sig := dec.decode("g", depth).(Signature) + if len(sig.str) == 0 { + panic(FormatError("variant signature is empty")) + } + err, rem := validSingle(sig.str, 0) + if err != nil { + panic(err) + } + if rem != "" { + panic(FormatError("variant signature has multiple types")) + } + variant.sig = sig + variant.value = dec.decode(sig.str, depth+1) + return variant + case 'h': + return UnixFDIndex(dec.decode("u", depth).(uint32)) + case 'a': + if len(s) > 1 && s[1] == '{' { + ksig := s[2:3] + vsig := s[3 : len(s)-1] + v := reflect.MakeMap(reflect.MapOf(typeFor(ksig), typeFor(vsig))) + if depth >= 63 { + panic(FormatError("input exceeds container depth limit")) + } + length := dec.decode("u", depth).(uint32) + // Even for empty maps, the correct padding must be included + dec.align(8) + spos := dec.pos + for dec.pos < spos+int(length) { + dec.align(8) + if !isKeyType(v.Type().Key()) { + panic(InvalidTypeError{v.Type()}) + } + kv := dec.decode(ksig, depth+2) + vv := dec.decode(vsig, depth+2) + v.SetMapIndex(reflect.ValueOf(kv), reflect.ValueOf(vv)) + } + return v.Interface() + } + if depth >= 64 { + panic(FormatError("input exceeds container depth limit")) + } + length := dec.decode("u", depth).(uint32) + v := reflect.MakeSlice(reflect.SliceOf(typeFor(s[1:])), 0, int(length)) + // Even for empty arrays, the correct padding must be included + align := alignment(typeFor(s[1:])) + if len(s) > 1 && s[1] == '(' { + //Special case for arrays of structs + //structs decode as a slice of interface{} values + //but the dbus alignment does not match this + align = 8 + } + dec.align(align) + spos := dec.pos + for dec.pos < spos+int(length) { + ev := dec.decode(s[1:], depth+1) + v = reflect.Append(v, reflect.ValueOf(ev)) + } + return v.Interface() + case '(': + if depth >= 64 { + panic(FormatError("input exceeds container depth limit")) + } + dec.align(8) + v := make([]interface{}, 0) + s = s[1 : len(s)-1] + for s != "" { + err, rem := validSingle(s, 0) + if err != nil { + panic(err) + } + ev := dec.decode(s[:len(s)-len(rem)], depth+1) + v = append(v, ev) + s = rem + } + return v + default: + panic(SignatureError{Sig: s}) + } +} + +// A FormatError is an error in the wire format. +type FormatError string + +func (e FormatError) Error() string { + return "dbus: wire format error: " + string(e) +} diff --git a/vendor/github.com/godbus/dbus/default_handler.go b/vendor/github.com/godbus/dbus/default_handler.go new file mode 100644 index 0000000..81dbcc7 --- /dev/null +++ b/vendor/github.com/godbus/dbus/default_handler.go @@ -0,0 +1,321 @@ +package dbus + +import ( + "bytes" + "reflect" + "strings" + "sync" +) + +func newIntrospectIntf(h *defaultHandler) *exportedIntf { + methods := make(map[string]Method) + methods["Introspect"] = exportedMethod{ + reflect.ValueOf(func(msg Message) (string, *Error) { + path := msg.Headers[FieldPath].value.(ObjectPath) + return h.introspectPath(path), nil + }), + } + return newExportedIntf(methods, true) +} + +//NewDefaultHandler returns an instance of the default +//call handler. This is useful if you want to implement only +//one of the two handlers but not both. +// +// Deprecated: this is the default value, don't use it, it will be unexported. +func NewDefaultHandler() *defaultHandler { + h := &defaultHandler{ + objects: make(map[ObjectPath]*exportedObj), + defaultIntf: make(map[string]*exportedIntf), + } + h.defaultIntf["org.freedesktop.DBus.Introspectable"] = newIntrospectIntf(h) + return h +} + +type defaultHandler struct { + sync.RWMutex + objects map[ObjectPath]*exportedObj + defaultIntf map[string]*exportedIntf +} + +func (h *defaultHandler) PathExists(path ObjectPath) bool { + _, ok := h.objects[path] + return ok +} + +func (h *defaultHandler) introspectPath(path ObjectPath) string { + subpath := make(map[string]struct{}) + var xml bytes.Buffer + xml.WriteString("") + for obj, _ := range h.objects { + p := string(path) + if p != "/" { + p += "/" + } + if strings.HasPrefix(string(obj), p) { + node_name := strings.Split(string(obj[len(p):]), "/")[0] + subpath[node_name] = struct{}{} + } + } + for s, _ := range subpath { + xml.WriteString("\n\t") + } + xml.WriteString("\n") + return xml.String() +} + +func (h *defaultHandler) LookupObject(path ObjectPath) (ServerObject, bool) { + h.RLock() + defer h.RUnlock() + object, ok := h.objects[path] + if ok { + return object, ok + } + + // If an object wasn't found for this exact path, + // look for a matching subtree registration + subtreeObject := newExportedObject() + path = path[:strings.LastIndex(string(path), "/")] + for len(path) > 0 { + object, ok = h.objects[path] + if ok { + for name, iface := range object.interfaces { + // Only include this handler if it registered for the subtree + if iface.isFallbackInterface() { + subtreeObject.interfaces[name] = iface + } + } + break + } + + path = path[:strings.LastIndex(string(path), "/")] + } + + for name, intf := range h.defaultIntf { + if _, exists := subtreeObject.interfaces[name]; exists { + continue + } + subtreeObject.interfaces[name] = intf + } + + return subtreeObject, true +} + +func (h *defaultHandler) AddObject(path ObjectPath, object *exportedObj) { + h.Lock() + h.objects[path] = object + h.Unlock() +} + +func (h *defaultHandler) DeleteObject(path ObjectPath) { + h.Lock() + delete(h.objects, path) + h.Unlock() +} + +type exportedMethod struct { + reflect.Value +} + +func (m exportedMethod) Call(args ...interface{}) ([]interface{}, error) { + t := m.Type() + + params := make([]reflect.Value, len(args)) + for i := 0; i < len(args); i++ { + params[i] = reflect.ValueOf(args[i]).Elem() + } + + ret := m.Value.Call(params) + + err := ret[t.NumOut()-1].Interface().(*Error) + ret = ret[:t.NumOut()-1] + out := make([]interface{}, len(ret)) + for i, val := range ret { + out[i] = val.Interface() + } + if err == nil { + //concrete type to interface nil is a special case + return out, nil + } + return out, err +} + +func (m exportedMethod) NumArguments() int { + return m.Value.Type().NumIn() +} + +func (m exportedMethod) ArgumentValue(i int) interface{} { + return reflect.Zero(m.Type().In(i)).Interface() +} + +func (m exportedMethod) NumReturns() int { + return m.Value.Type().NumOut() +} + +func (m exportedMethod) ReturnValue(i int) interface{} { + return reflect.Zero(m.Type().Out(i)).Interface() +} + +func newExportedObject() *exportedObj { + return &exportedObj{ + interfaces: make(map[string]*exportedIntf), + } +} + +type exportedObj struct { + mu sync.RWMutex + interfaces map[string]*exportedIntf +} + +func (obj *exportedObj) LookupInterface(name string) (Interface, bool) { + if name == "" { + return obj, true + } + obj.mu.RLock() + defer obj.mu.RUnlock() + intf, exists := obj.interfaces[name] + return intf, exists +} + +func (obj *exportedObj) AddInterface(name string, iface *exportedIntf) { + obj.mu.Lock() + defer obj.mu.Unlock() + obj.interfaces[name] = iface +} + +func (obj *exportedObj) DeleteInterface(name string) { + obj.mu.Lock() + defer obj.mu.Unlock() + delete(obj.interfaces, name) +} + +func (obj *exportedObj) LookupMethod(name string) (Method, bool) { + obj.mu.RLock() + defer obj.mu.RUnlock() + for _, intf := range obj.interfaces { + method, exists := intf.LookupMethod(name) + if exists { + return method, exists + } + } + return nil, false +} + +func (obj *exportedObj) isFallbackInterface() bool { + return false +} + +func newExportedIntf(methods map[string]Method, includeSubtree bool) *exportedIntf { + return &exportedIntf{ + methods: methods, + includeSubtree: includeSubtree, + } +} + +type exportedIntf struct { + methods map[string]Method + + // Whether or not this export is for the entire subtree + includeSubtree bool +} + +func (obj *exportedIntf) LookupMethod(name string) (Method, bool) { + out, exists := obj.methods[name] + return out, exists +} + +func (obj *exportedIntf) isFallbackInterface() bool { + return obj.includeSubtree +} + +//NewDefaultSignalHandler returns an instance of the default +//signal handler. This is useful if you want to implement only +//one of the two handlers but not both. +// +// Deprecated: this is the default value, don't use it, it will be unexported. +func NewDefaultSignalHandler() *defaultSignalHandler { + return &defaultSignalHandler{ + closeChan: make(chan struct{}), + } +} + +func isDefaultSignalHandler(handler SignalHandler) bool { + _, ok := handler.(*defaultSignalHandler) + return ok +} + +type defaultSignalHandler struct { + sync.RWMutex + closed bool + signals []chan<- *Signal + closeChan chan struct{} +} + +func (sh *defaultSignalHandler) DeliverSignal(intf, name string, signal *Signal) { + sh.RLock() + defer sh.RUnlock() + if sh.closed { + return + } + for _, ch := range sh.signals { + select { + case ch <- signal: + case <-sh.closeChan: + return + default: + go func() { + select { + case ch <- signal: + case <-sh.closeChan: + return + } + }() + } + } +} + +func (sh *defaultSignalHandler) Init() error { + sh.Lock() + sh.signals = make([]chan<- *Signal, 0) + sh.closeChan = make(chan struct{}) + sh.Unlock() + return nil +} + +func (sh *defaultSignalHandler) Terminate() { + sh.Lock() + if !sh.closed { + close(sh.closeChan) + } + sh.closed = true + for _, ch := range sh.signals { + close(ch) + } + sh.signals = nil + sh.Unlock() +} + +func (sh *defaultSignalHandler) addSignal(ch chan<- *Signal) { + sh.Lock() + defer sh.Unlock() + if sh.closed { + return + } + sh.signals = append(sh.signals, ch) + +} + +func (sh *defaultSignalHandler) removeSignal(ch chan<- *Signal) { + sh.Lock() + defer sh.Unlock() + if sh.closed { + return + } + for i := len(sh.signals) - 1; i >= 0; i-- { + if ch == sh.signals[i] { + copy(sh.signals[i:], sh.signals[i+1:]) + sh.signals[len(sh.signals)-1] = nil + sh.signals = sh.signals[:len(sh.signals)-1] + } + } +} diff --git a/vendor/github.com/godbus/dbus/doc.go b/vendor/github.com/godbus/dbus/doc.go new file mode 100644 index 0000000..895036a --- /dev/null +++ b/vendor/github.com/godbus/dbus/doc.go @@ -0,0 +1,69 @@ +/* +Package dbus implements bindings to the D-Bus message bus system. + +To use the message bus API, you first need to connect to a bus (usually the +session or system bus). The acquired connection then can be used to call methods +on remote objects and emit or receive signals. Using the Export method, you can +arrange D-Bus methods calls to be directly translated to method calls on a Go +value. + +Conversion Rules + +For outgoing messages, Go types are automatically converted to the +corresponding D-Bus types. The following types are directly encoded as their +respective D-Bus equivalents: + + Go type | D-Bus type + ------------+----------- + byte | BYTE + bool | BOOLEAN + int16 | INT16 + uint16 | UINT16 + int | INT32 + uint | UINT32 + int32 | INT32 + uint32 | UINT32 + int64 | INT64 + uint64 | UINT64 + float64 | DOUBLE + string | STRING + ObjectPath | OBJECT_PATH + Signature | SIGNATURE + Variant | VARIANT + interface{} | VARIANT + UnixFDIndex | UNIX_FD + +Slices and arrays encode as ARRAYs of their element type. + +Maps encode as DICTs, provided that their key type can be used as a key for +a DICT. + +Structs other than Variant and Signature encode as a STRUCT containing their +exported fields. Fields whose tags contain `dbus:"-"` and unexported fields will +be skipped. + +Pointers encode as the value they're pointed to. + +Types convertible to one of the base types above will be mapped as the +base type. + +Trying to encode any other type or a slice, map or struct containing an +unsupported type will result in an InvalidTypeError. + +For incoming messages, the inverse of these rules are used, with the exception +of STRUCTs. Incoming STRUCTS are represented as a slice of empty interfaces +containing the struct fields in the correct order. The Store function can be +used to convert such values to Go structs. + +Unix FD passing + +Handling Unix file descriptors deserves special mention. To use them, you should +first check that they are supported on a connection by calling SupportsUnixFDs. +If it returns true, all method of Connection will translate messages containing +UnixFD's to messages that are accompanied by the given file descriptors with the +UnixFD values being substituted by the correct indices. Similarily, the indices +of incoming messages are automatically resolved. It shouldn't be necessary to use +UnixFDIndex. + +*/ +package dbus diff --git a/vendor/github.com/godbus/dbus/encoder.go b/vendor/github.com/godbus/dbus/encoder.go new file mode 100644 index 0000000..8bb7177 --- /dev/null +++ b/vendor/github.com/godbus/dbus/encoder.go @@ -0,0 +1,210 @@ +package dbus + +import ( + "bytes" + "encoding/binary" + "io" + "reflect" +) + +// An encoder encodes values to the D-Bus wire format. +type encoder struct { + out io.Writer + order binary.ByteOrder + pos int +} + +// NewEncoder returns a new encoder that writes to out in the given byte order. +func newEncoder(out io.Writer, order binary.ByteOrder) *encoder { + return newEncoderAtOffset(out, 0, order) +} + +// newEncoderAtOffset returns a new encoder that writes to out in the given +// byte order. Specify the offset to initialize pos for proper alignment +// computation. +func newEncoderAtOffset(out io.Writer, offset int, order binary.ByteOrder) *encoder { + enc := new(encoder) + enc.out = out + enc.order = order + enc.pos = offset + return enc +} + +// Aligns the next output to be on a multiple of n. Panics on write errors. +func (enc *encoder) align(n int) { + pad := enc.padding(0, n) + if pad > 0 { + empty := make([]byte, pad) + if _, err := enc.out.Write(empty); err != nil { + panic(err) + } + enc.pos += pad + } +} + +// pad returns the number of bytes of padding, based on current position and additional offset. +// and alignment. +func (enc *encoder) padding(offset, algn int) int { + abs := enc.pos + offset + if abs%algn != 0 { + newabs := (abs + algn - 1) & ^(algn - 1) + return newabs - abs + } + return 0 +} + +// Calls binary.Write(enc.out, enc.order, v) and panics on write errors. +func (enc *encoder) binwrite(v interface{}) { + if err := binary.Write(enc.out, enc.order, v); err != nil { + panic(err) + } +} + +// Encode encodes the given values to the underyling reader. All written values +// are aligned properly as required by the D-Bus spec. +func (enc *encoder) Encode(vs ...interface{}) (err error) { + defer func() { + err, _ = recover().(error) + }() + for _, v := range vs { + enc.encode(reflect.ValueOf(v), 0) + } + return nil +} + +// encode encodes the given value to the writer and panics on error. depth holds +// the depth of the container nesting. +func (enc *encoder) encode(v reflect.Value, depth int) { + enc.align(alignment(v.Type())) + switch v.Kind() { + case reflect.Uint8: + var b [1]byte + b[0] = byte(v.Uint()) + if _, err := enc.out.Write(b[:]); err != nil { + panic(err) + } + enc.pos++ + case reflect.Bool: + if v.Bool() { + enc.encode(reflect.ValueOf(uint32(1)), depth) + } else { + enc.encode(reflect.ValueOf(uint32(0)), depth) + } + case reflect.Int16: + enc.binwrite(int16(v.Int())) + enc.pos += 2 + case reflect.Uint16: + enc.binwrite(uint16(v.Uint())) + enc.pos += 2 + case reflect.Int, reflect.Int32: + enc.binwrite(int32(v.Int())) + enc.pos += 4 + case reflect.Uint, reflect.Uint32: + enc.binwrite(uint32(v.Uint())) + enc.pos += 4 + case reflect.Int64: + enc.binwrite(v.Int()) + enc.pos += 8 + case reflect.Uint64: + enc.binwrite(v.Uint()) + enc.pos += 8 + case reflect.Float64: + enc.binwrite(v.Float()) + enc.pos += 8 + case reflect.String: + enc.encode(reflect.ValueOf(uint32(len(v.String()))), depth) + b := make([]byte, v.Len()+1) + copy(b, v.String()) + b[len(b)-1] = 0 + n, err := enc.out.Write(b) + if err != nil { + panic(err) + } + enc.pos += n + case reflect.Ptr: + enc.encode(v.Elem(), depth) + case reflect.Slice, reflect.Array: + if depth >= 64 { + panic(FormatError("input exceeds container depth limit")) + } + // Lookahead offset: 4 bytes for uint32 length (with alignment), + // plus alignment for elements. + n := enc.padding(0, 4) + 4 + offset := enc.pos + n + enc.padding(n, alignment(v.Type().Elem())) + + var buf bytes.Buffer + bufenc := newEncoderAtOffset(&buf, offset, enc.order) + + for i := 0; i < v.Len(); i++ { + bufenc.encode(v.Index(i), depth+1) + } + enc.encode(reflect.ValueOf(uint32(buf.Len())), depth) + length := buf.Len() + enc.align(alignment(v.Type().Elem())) + if _, err := buf.WriteTo(enc.out); err != nil { + panic(err) + } + enc.pos += length + case reflect.Struct: + if depth >= 64 && v.Type() != signatureType { + panic(FormatError("input exceeds container depth limit")) + } + switch t := v.Type(); t { + case signatureType: + str := v.Field(0) + enc.encode(reflect.ValueOf(byte(str.Len())), depth+1) + b := make([]byte, str.Len()+1) + copy(b, str.String()) + b[len(b)-1] = 0 + n, err := enc.out.Write(b) + if err != nil { + panic(err) + } + enc.pos += n + case variantType: + variant := v.Interface().(Variant) + enc.encode(reflect.ValueOf(variant.sig), depth+1) + enc.encode(reflect.ValueOf(variant.value), depth+1) + default: + for i := 0; i < v.Type().NumField(); i++ { + field := t.Field(i) + if field.PkgPath == "" && field.Tag.Get("dbus") != "-" { + enc.encode(v.Field(i), depth+1) + } + } + } + case reflect.Map: + // Maps are arrays of structures, so they actually increase the depth by + // 2. + if depth >= 63 { + panic(FormatError("input exceeds container depth limit")) + } + if !isKeyType(v.Type().Key()) { + panic(InvalidTypeError{v.Type()}) + } + keys := v.MapKeys() + // Lookahead offset: 4 bytes for uint32 length (with alignment), + // plus 8-byte alignment + n := enc.padding(0, 4) + 4 + offset := enc.pos + n + enc.padding(n, 8) + + var buf bytes.Buffer + bufenc := newEncoderAtOffset(&buf, offset, enc.order) + for _, k := range keys { + bufenc.align(8) + bufenc.encode(k, depth+2) + bufenc.encode(v.MapIndex(k), depth+2) + } + enc.encode(reflect.ValueOf(uint32(buf.Len())), depth) + length := buf.Len() + enc.align(8) + if _, err := buf.WriteTo(enc.out); err != nil { + panic(err) + } + enc.pos += length + case reflect.Interface: + enc.encode(reflect.ValueOf(MakeVariant(v.Interface())), depth) + default: + panic(InvalidTypeError{v.Type()}) + } +} diff --git a/vendor/github.com/godbus/dbus/export.go b/vendor/github.com/godbus/dbus/export.go new file mode 100644 index 0000000..95d0e29 --- /dev/null +++ b/vendor/github.com/godbus/dbus/export.go @@ -0,0 +1,412 @@ +package dbus + +import ( + "errors" + "fmt" + "reflect" + "strings" +) + +var ( + ErrMsgInvalidArg = Error{ + "org.freedesktop.DBus.Error.InvalidArgs", + []interface{}{"Invalid type / number of args"}, + } + ErrMsgNoObject = Error{ + "org.freedesktop.DBus.Error.NoSuchObject", + []interface{}{"No such object"}, + } + ErrMsgUnknownMethod = Error{ + "org.freedesktop.DBus.Error.UnknownMethod", + []interface{}{"Unknown / invalid method"}, + } + ErrMsgUnknownInterface = Error{ + "org.freedesktop.DBus.Error.UnknownInterface", + []interface{}{"Object does not implement the interface"}, + } +) + +func MakeFailedError(err error) *Error { + return &Error{ + "org.freedesktop.DBus.Error.Failed", + []interface{}{err.Error()}, + } +} + +// Sender is a type which can be used in exported methods to receive the message +// sender. +type Sender string + +func computeMethodName(name string, mapping map[string]string) string { + newname, ok := mapping[name] + if ok { + name = newname + } + return name +} + +func getMethods(in interface{}, mapping map[string]string) map[string]reflect.Value { + if in == nil { + return nil + } + methods := make(map[string]reflect.Value) + val := reflect.ValueOf(in) + typ := val.Type() + for i := 0; i < typ.NumMethod(); i++ { + methtype := typ.Method(i) + method := val.Method(i) + t := method.Type() + // only track valid methods must return *Error as last arg + // and must be exported + if t.NumOut() == 0 || + t.Out(t.NumOut()-1) != reflect.TypeOf(&ErrMsgInvalidArg) || + methtype.PkgPath != "" { + continue + } + // map names while building table + methods[computeMethodName(methtype.Name, mapping)] = method + } + return methods +} + +func standardMethodArgumentDecode(m Method, sender string, msg *Message, body []interface{}) ([]interface{}, error) { + pointers := make([]interface{}, m.NumArguments()) + decode := make([]interface{}, 0, len(body)) + + for i := 0; i < m.NumArguments(); i++ { + tp := reflect.TypeOf(m.ArgumentValue(i)) + val := reflect.New(tp) + pointers[i] = val.Interface() + if tp == reflect.TypeOf((*Sender)(nil)).Elem() { + val.Elem().SetString(sender) + } else if tp == reflect.TypeOf((*Message)(nil)).Elem() { + val.Elem().Set(reflect.ValueOf(*msg)) + } else { + decode = append(decode, pointers[i]) + } + } + + if len(decode) != len(body) { + return nil, ErrMsgInvalidArg + } + + if err := Store(body, decode...); err != nil { + return nil, ErrMsgInvalidArg + } + + return pointers, nil +} + +func (conn *Conn) decodeArguments(m Method, sender string, msg *Message) ([]interface{}, error) { + if decoder, ok := m.(ArgumentDecoder); ok { + return decoder.DecodeArguments(conn, sender, msg, msg.Body) + } + return standardMethodArgumentDecode(m, sender, msg, msg.Body) +} + +// handleCall handles the given method call (i.e. looks if it's one of the +// pre-implemented ones and searches for a corresponding handler if not). +func (conn *Conn) handleCall(msg *Message) { + name := msg.Headers[FieldMember].value.(string) + path := msg.Headers[FieldPath].value.(ObjectPath) + ifaceName, _ := msg.Headers[FieldInterface].value.(string) + sender, hasSender := msg.Headers[FieldSender].value.(string) + serial := msg.serial + if ifaceName == "org.freedesktop.DBus.Peer" { + switch name { + case "Ping": + conn.sendReply(sender, serial) + case "GetMachineId": + conn.sendReply(sender, serial, conn.uuid) + default: + conn.sendError(ErrMsgUnknownMethod, sender, serial) + } + return + } + if len(name) == 0 { + conn.sendError(ErrMsgUnknownMethod, sender, serial) + } + + object, ok := conn.handler.LookupObject(path) + if !ok { + conn.sendError(ErrMsgNoObject, sender, serial) + return + } + + iface, exists := object.LookupInterface(ifaceName) + if !exists { + conn.sendError(ErrMsgUnknownInterface, sender, serial) + return + } + + m, exists := iface.LookupMethod(name) + if !exists { + conn.sendError(ErrMsgUnknownMethod, sender, serial) + return + } + args, err := conn.decodeArguments(m, sender, msg) + if err != nil { + conn.sendError(err, sender, serial) + return + } + + ret, err := m.Call(args...) + if err != nil { + conn.sendError(err, sender, serial) + return + } + + if msg.Flags&FlagNoReplyExpected == 0 { + reply := new(Message) + reply.Type = TypeMethodReply + reply.serial = conn.getSerial() + reply.Headers = make(map[HeaderField]Variant) + if hasSender { + reply.Headers[FieldDestination] = msg.Headers[FieldSender] + } + reply.Headers[FieldReplySerial] = MakeVariant(msg.serial) + reply.Body = make([]interface{}, len(ret)) + for i := 0; i < len(ret); i++ { + reply.Body[i] = ret[i] + } + reply.Headers[FieldSignature] = MakeVariant(SignatureOf(reply.Body...)) + + conn.sendMessage(reply) + } +} + +// Emit emits the given signal on the message bus. The name parameter must be +// formatted as "interface.member", e.g., "org.freedesktop.DBus.NameLost". +func (conn *Conn) Emit(path ObjectPath, name string, values ...interface{}) error { + if !path.IsValid() { + return errors.New("dbus: invalid object path") + } + i := strings.LastIndex(name, ".") + if i == -1 { + return errors.New("dbus: invalid method name") + } + iface := name[:i] + member := name[i+1:] + if !isValidMember(member) { + return errors.New("dbus: invalid method name") + } + if !isValidInterface(iface) { + return errors.New("dbus: invalid interface name") + } + msg := new(Message) + msg.Type = TypeSignal + msg.serial = conn.getSerial() + msg.Headers = make(map[HeaderField]Variant) + msg.Headers[FieldInterface] = MakeVariant(iface) + msg.Headers[FieldMember] = MakeVariant(member) + msg.Headers[FieldPath] = MakeVariant(path) + msg.Body = values + if len(values) > 0 { + msg.Headers[FieldSignature] = MakeVariant(SignatureOf(values...)) + } + + var closed bool + conn.sendMessageAndIfClosed(msg, func() { + closed = true + }) + if closed { + return ErrClosed + } + return nil +} + +// Export registers the given value to be exported as an object on the +// message bus. +// +// If a method call on the given path and interface is received, an exported +// method with the same name is called with v as the receiver if the +// parameters match and the last return value is of type *Error. If this +// *Error is not nil, it is sent back to the caller as an error. +// Otherwise, a method reply is sent with the other return values as its body. +// +// Any parameters with the special type Sender are set to the sender of the +// dbus message when the method is called. Parameters of this type do not +// contribute to the dbus signature of the method (i.e. the method is exposed +// as if the parameters of type Sender were not there). +// +// Similarly, any parameters with the type Message are set to the raw message +// received on the bus. Again, parameters of this type do not contribute to the +// dbus signature of the method. +// +// Every method call is executed in a new goroutine, so the method may be called +// in multiple goroutines at once. +// +// Method calls on the interface org.freedesktop.DBus.Peer will be automatically +// handled for every object. +// +// Passing nil as the first parameter will cause conn to cease handling calls on +// the given combination of path and interface. +// +// Export returns an error if path is not a valid path name. +func (conn *Conn) Export(v interface{}, path ObjectPath, iface string) error { + return conn.ExportWithMap(v, nil, path, iface) +} + +// ExportWithMap works exactly like Export but provides the ability to remap +// method names (e.g. export a lower-case method). +// +// The keys in the map are the real method names (exported on the struct), and +// the values are the method names to be exported on DBus. +func (conn *Conn) ExportWithMap(v interface{}, mapping map[string]string, path ObjectPath, iface string) error { + return conn.export(getMethods(v, mapping), path, iface, false) +} + +// ExportSubtree works exactly like Export but registers the given value for +// an entire subtree rather under the root path provided. +// +// In order to make this useful, one parameter in each of the value's exported +// methods should be a Message, in which case it will contain the raw message +// (allowing one to get access to the path that caused the method to be called). +// +// Note that more specific export paths take precedence over less specific. For +// example, a method call using the ObjectPath /foo/bar/baz will call a method +// exported on /foo/bar before a method exported on /foo. +func (conn *Conn) ExportSubtree(v interface{}, path ObjectPath, iface string) error { + return conn.ExportSubtreeWithMap(v, nil, path, iface) +} + +// ExportSubtreeWithMap works exactly like ExportSubtree but provides the +// ability to remap method names (e.g. export a lower-case method). +// +// The keys in the map are the real method names (exported on the struct), and +// the values are the method names to be exported on DBus. +func (conn *Conn) ExportSubtreeWithMap(v interface{}, mapping map[string]string, path ObjectPath, iface string) error { + return conn.export(getMethods(v, mapping), path, iface, true) +} + +// ExportMethodTable like Export registers the given methods as an object +// on the message bus. Unlike Export the it uses a method table to define +// the object instead of a native go object. +// +// The method table is a map from method name to function closure +// representing the method. This allows an object exported on the bus to not +// necessarily be a native go object. It can be useful for generating exposed +// methods on the fly. +// +// Any non-function objects in the method table are ignored. +func (conn *Conn) ExportMethodTable(methods map[string]interface{}, path ObjectPath, iface string) error { + return conn.exportMethodTable(methods, path, iface, false) +} + +// Like ExportSubtree, but with the same caveats as ExportMethodTable. +func (conn *Conn) ExportSubtreeMethodTable(methods map[string]interface{}, path ObjectPath, iface string) error { + return conn.exportMethodTable(methods, path, iface, true) +} + +func (conn *Conn) exportMethodTable(methods map[string]interface{}, path ObjectPath, iface string, includeSubtree bool) error { + out := make(map[string]reflect.Value) + for name, method := range methods { + rval := reflect.ValueOf(method) + if rval.Kind() != reflect.Func { + continue + } + t := rval.Type() + // only track valid methods must return *Error as last arg + if t.NumOut() == 0 || + t.Out(t.NumOut()-1) != reflect.TypeOf(&ErrMsgInvalidArg) { + continue + } + out[name] = rval + } + return conn.export(out, path, iface, includeSubtree) +} + +func (conn *Conn) unexport(h *defaultHandler, path ObjectPath, iface string) error { + if h.PathExists(path) { + obj := h.objects[path] + obj.DeleteInterface(iface) + if len(obj.interfaces) == 0 { + h.DeleteObject(path) + } + } + return nil +} + +// exportWithMap is the worker function for all exports/registrations. +func (conn *Conn) export(methods map[string]reflect.Value, path ObjectPath, iface string, includeSubtree bool) error { + h, ok := conn.handler.(*defaultHandler) + if !ok { + return fmt.Errorf( + `dbus: export only allowed on the default hander handler have %T"`, + conn.handler) + } + + if !path.IsValid() { + return fmt.Errorf(`dbus: Invalid path name: "%s"`, path) + } + + // Remove a previous export if the interface is nil + if methods == nil { + return conn.unexport(h, path, iface) + } + + // If this is the first handler for this path, make a new map to hold all + // handlers for this path. + if !h.PathExists(path) { + h.AddObject(path, newExportedObject()) + } + + exportedMethods := make(map[string]Method) + for name, method := range methods { + exportedMethods[name] = exportedMethod{method} + } + + // Finally, save this handler + obj := h.objects[path] + obj.AddInterface(iface, newExportedIntf(exportedMethods, includeSubtree)) + + return nil +} + +// ReleaseName calls org.freedesktop.DBus.ReleaseName and awaits a response. +func (conn *Conn) ReleaseName(name string) (ReleaseNameReply, error) { + var r uint32 + err := conn.busObj.Call("org.freedesktop.DBus.ReleaseName", 0, name).Store(&r) + if err != nil { + return 0, err + } + return ReleaseNameReply(r), nil +} + +// RequestName calls org.freedesktop.DBus.RequestName and awaits a response. +func (conn *Conn) RequestName(name string, flags RequestNameFlags) (RequestNameReply, error) { + var r uint32 + err := conn.busObj.Call("org.freedesktop.DBus.RequestName", 0, name, flags).Store(&r) + if err != nil { + return 0, err + } + return RequestNameReply(r), nil +} + +// ReleaseNameReply is the reply to a ReleaseName call. +type ReleaseNameReply uint32 + +const ( + ReleaseNameReplyReleased ReleaseNameReply = 1 + iota + ReleaseNameReplyNonExistent + ReleaseNameReplyNotOwner +) + +// RequestNameFlags represents the possible flags for a RequestName call. +type RequestNameFlags uint32 + +const ( + NameFlagAllowReplacement RequestNameFlags = 1 << iota + NameFlagReplaceExisting + NameFlagDoNotQueue +) + +// RequestNameReply is the reply to a RequestName call. +type RequestNameReply uint32 + +const ( + RequestNameReplyPrimaryOwner RequestNameReply = 1 + iota + RequestNameReplyInQueue + RequestNameReplyExists + RequestNameReplyAlreadyOwner +) diff --git a/vendor/github.com/godbus/dbus/go.mod b/vendor/github.com/godbus/dbus/go.mod new file mode 100644 index 0000000..bdcd125 --- /dev/null +++ b/vendor/github.com/godbus/dbus/go.mod @@ -0,0 +1 @@ +module github.com/godbus/dbus diff --git a/vendor/github.com/godbus/dbus/homedir.go b/vendor/github.com/godbus/dbus/homedir.go new file mode 100644 index 0000000..0b745f9 --- /dev/null +++ b/vendor/github.com/godbus/dbus/homedir.go @@ -0,0 +1,28 @@ +package dbus + +import ( + "os" + "sync" +) + +var ( + homeDir string + homeDirLock sync.Mutex +) + +func getHomeDir() string { + homeDirLock.Lock() + defer homeDirLock.Unlock() + + if homeDir != "" { + return homeDir + } + + homeDir = os.Getenv("HOME") + if homeDir != "" { + return homeDir + } + + homeDir = lookupHomeDir() + return homeDir +} diff --git a/vendor/github.com/godbus/dbus/homedir_dynamic.go b/vendor/github.com/godbus/dbus/homedir_dynamic.go new file mode 100644 index 0000000..2732081 --- /dev/null +++ b/vendor/github.com/godbus/dbus/homedir_dynamic.go @@ -0,0 +1,15 @@ +// +build !static_build + +package dbus + +import ( + "os/user" +) + +func lookupHomeDir() string { + u, err := user.Current() + if err != nil { + return "/" + } + return u.HomeDir +} diff --git a/vendor/github.com/godbus/dbus/homedir_static.go b/vendor/github.com/godbus/dbus/homedir_static.go new file mode 100644 index 0000000..b9d9cb5 --- /dev/null +++ b/vendor/github.com/godbus/dbus/homedir_static.go @@ -0,0 +1,45 @@ +// +build static_build + +package dbus + +import ( + "bufio" + "os" + "strconv" + "strings" +) + +func lookupHomeDir() string { + myUid := os.Getuid() + + f, err := os.Open("/etc/passwd") + if err != nil { + return "/" + } + defer f.Close() + + s := bufio.NewScanner(f) + + for s.Scan() { + if err := s.Err(); err != nil { + break + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + parts := strings.Split(line, ":") + + if len(parts) >= 6 { + uid, err := strconv.Atoi(parts[2]) + if err == nil && uid == myUid { + return parts[5] + } + } + } + + // Default to / if we can't get a better value + return "/" +} diff --git a/vendor/github.com/godbus/dbus/message.go b/vendor/github.com/godbus/dbus/message.go new file mode 100644 index 0000000..6a92536 --- /dev/null +++ b/vendor/github.com/godbus/dbus/message.go @@ -0,0 +1,353 @@ +package dbus + +import ( + "bytes" + "encoding/binary" + "errors" + "io" + "reflect" + "strconv" +) + +const protoVersion byte = 1 + +// Flags represents the possible flags of a D-Bus message. +type Flags byte + +const ( + // FlagNoReplyExpected signals that the message is not expected to generate + // a reply. If this flag is set on outgoing messages, any possible reply + // will be discarded. + FlagNoReplyExpected Flags = 1 << iota + // FlagNoAutoStart signals that the message bus should not automatically + // start an application when handling this message. + FlagNoAutoStart + // FlagAllowInteractiveAuthorization may be set on a method call + // message to inform the receiving side that the caller is prepared + // to wait for interactive authorization, which might take a + // considerable time to complete. For instance, if this flag is set, + // it would be appropriate to query the user for passwords or + // confirmation via Polkit or a similar framework. + FlagAllowInteractiveAuthorization +) + +// Type represents the possible types of a D-Bus message. +type Type byte + +const ( + TypeMethodCall Type = 1 + iota + TypeMethodReply + TypeError + TypeSignal + typeMax +) + +func (t Type) String() string { + switch t { + case TypeMethodCall: + return "method call" + case TypeMethodReply: + return "reply" + case TypeError: + return "error" + case TypeSignal: + return "signal" + } + return "invalid" +} + +// HeaderField represents the possible byte codes for the headers +// of a D-Bus message. +type HeaderField byte + +const ( + FieldPath HeaderField = 1 + iota + FieldInterface + FieldMember + FieldErrorName + FieldReplySerial + FieldDestination + FieldSender + FieldSignature + FieldUnixFDs + fieldMax +) + +// An InvalidMessageError describes the reason why a D-Bus message is regarded as +// invalid. +type InvalidMessageError string + +func (e InvalidMessageError) Error() string { + return "dbus: invalid message: " + string(e) +} + +// fieldType are the types of the various header fields. +var fieldTypes = [fieldMax]reflect.Type{ + FieldPath: objectPathType, + FieldInterface: stringType, + FieldMember: stringType, + FieldErrorName: stringType, + FieldReplySerial: uint32Type, + FieldDestination: stringType, + FieldSender: stringType, + FieldSignature: signatureType, + FieldUnixFDs: uint32Type, +} + +// requiredFields lists the header fields that are required by the different +// message types. +var requiredFields = [typeMax][]HeaderField{ + TypeMethodCall: {FieldPath, FieldMember}, + TypeMethodReply: {FieldReplySerial}, + TypeError: {FieldErrorName, FieldReplySerial}, + TypeSignal: {FieldPath, FieldInterface, FieldMember}, +} + +// Message represents a single D-Bus message. +type Message struct { + Type + Flags + Headers map[HeaderField]Variant + Body []interface{} + + serial uint32 +} + +type header struct { + Field byte + Variant +} + +// DecodeMessage tries to decode a single message in the D-Bus wire format +// from the given reader. The byte order is figured out from the first byte. +// The possibly returned error can be an error of the underlying reader, an +// InvalidMessageError or a FormatError. +func DecodeMessage(rd io.Reader) (msg *Message, err error) { + var order binary.ByteOrder + var hlength, length uint32 + var typ, flags, proto byte + var headers []header + + b := make([]byte, 1) + _, err = rd.Read(b) + if err != nil { + return + } + switch b[0] { + case 'l': + order = binary.LittleEndian + case 'B': + order = binary.BigEndian + default: + return nil, InvalidMessageError("invalid byte order") + } + + dec := newDecoder(rd, order) + dec.pos = 1 + + msg = new(Message) + vs, err := dec.Decode(Signature{"yyyuu"}) + if err != nil { + return nil, err + } + if err = Store(vs, &typ, &flags, &proto, &length, &msg.serial); err != nil { + return nil, err + } + msg.Type = Type(typ) + msg.Flags = Flags(flags) + + // get the header length separately because we need it later + b = make([]byte, 4) + _, err = io.ReadFull(rd, b) + if err != nil { + return nil, err + } + binary.Read(bytes.NewBuffer(b), order, &hlength) + if hlength+length+16 > 1<<27 { + return nil, InvalidMessageError("message is too long") + } + dec = newDecoder(io.MultiReader(bytes.NewBuffer(b), rd), order) + dec.pos = 12 + vs, err = dec.Decode(Signature{"a(yv)"}) + if err != nil { + return nil, err + } + if err = Store(vs, &headers); err != nil { + return nil, err + } + + msg.Headers = make(map[HeaderField]Variant) + for _, v := range headers { + msg.Headers[HeaderField(v.Field)] = v.Variant + } + + dec.align(8) + body := make([]byte, int(length)) + if length != 0 { + _, err := io.ReadFull(rd, body) + if err != nil { + return nil, err + } + } + + if err = msg.IsValid(); err != nil { + return nil, err + } + sig, _ := msg.Headers[FieldSignature].value.(Signature) + if sig.str != "" { + buf := bytes.NewBuffer(body) + dec = newDecoder(buf, order) + vs, err := dec.Decode(sig) + if err != nil { + return nil, err + } + msg.Body = vs + } + + return +} + +// EncodeTo encodes and sends a message to the given writer. The byte order must +// be either binary.LittleEndian or binary.BigEndian. If the message is not +// valid or an error occurs when writing, an error is returned. +func (msg *Message) EncodeTo(out io.Writer, order binary.ByteOrder) error { + if err := msg.IsValid(); err != nil { + return err + } + var vs [7]interface{} + switch order { + case binary.LittleEndian: + vs[0] = byte('l') + case binary.BigEndian: + vs[0] = byte('B') + default: + return errors.New("dbus: invalid byte order") + } + body := new(bytes.Buffer) + enc := newEncoder(body, order) + if len(msg.Body) != 0 { + enc.Encode(msg.Body...) + } + vs[1] = msg.Type + vs[2] = msg.Flags + vs[3] = protoVersion + vs[4] = uint32(len(body.Bytes())) + vs[5] = msg.serial + headers := make([]header, 0, len(msg.Headers)) + for k, v := range msg.Headers { + headers = append(headers, header{byte(k), v}) + } + vs[6] = headers + var buf bytes.Buffer + enc = newEncoder(&buf, order) + enc.Encode(vs[:]...) + enc.align(8) + body.WriteTo(&buf) + if buf.Len() > 1<<27 { + return InvalidMessageError("message is too long") + } + if _, err := buf.WriteTo(out); err != nil { + return err + } + return nil +} + +// IsValid checks whether msg is a valid message and returns an +// InvalidMessageError if it is not. +func (msg *Message) IsValid() error { + if msg.Flags & ^(FlagNoAutoStart|FlagNoReplyExpected|FlagAllowInteractiveAuthorization) != 0 { + return InvalidMessageError("invalid flags") + } + if msg.Type == 0 || msg.Type >= typeMax { + return InvalidMessageError("invalid message type") + } + for k, v := range msg.Headers { + if k == 0 || k >= fieldMax { + return InvalidMessageError("invalid header") + } + if reflect.TypeOf(v.value) != fieldTypes[k] { + return InvalidMessageError("invalid type of header field") + } + } + for _, v := range requiredFields[msg.Type] { + if _, ok := msg.Headers[v]; !ok { + return InvalidMessageError("missing required header") + } + } + if path, ok := msg.Headers[FieldPath]; ok { + if !path.value.(ObjectPath).IsValid() { + return InvalidMessageError("invalid path name") + } + } + if iface, ok := msg.Headers[FieldInterface]; ok { + if !isValidInterface(iface.value.(string)) { + return InvalidMessageError("invalid interface name") + } + } + if member, ok := msg.Headers[FieldMember]; ok { + if !isValidMember(member.value.(string)) { + return InvalidMessageError("invalid member name") + } + } + if errname, ok := msg.Headers[FieldErrorName]; ok { + if !isValidInterface(errname.value.(string)) { + return InvalidMessageError("invalid error name") + } + } + if len(msg.Body) != 0 { + if _, ok := msg.Headers[FieldSignature]; !ok { + return InvalidMessageError("missing signature") + } + } + return nil +} + +// Serial returns the message's serial number. The returned value is only valid +// for messages received by eavesdropping. +func (msg *Message) Serial() uint32 { + return msg.serial +} + +// String returns a string representation of a message similar to the format of +// dbus-monitor. +func (msg *Message) String() string { + if err := msg.IsValid(); err != nil { + return "" + } + s := msg.Type.String() + if v, ok := msg.Headers[FieldSender]; ok { + s += " from " + v.value.(string) + } + if v, ok := msg.Headers[FieldDestination]; ok { + s += " to " + v.value.(string) + } + s += " serial " + strconv.FormatUint(uint64(msg.serial), 10) + if v, ok := msg.Headers[FieldReplySerial]; ok { + s += " reply_serial " + strconv.FormatUint(uint64(v.value.(uint32)), 10) + } + if v, ok := msg.Headers[FieldUnixFDs]; ok { + s += " unixfds " + strconv.FormatUint(uint64(v.value.(uint32)), 10) + } + if v, ok := msg.Headers[FieldPath]; ok { + s += " path " + string(v.value.(ObjectPath)) + } + if v, ok := msg.Headers[FieldInterface]; ok { + s += " interface " + v.value.(string) + } + if v, ok := msg.Headers[FieldErrorName]; ok { + s += " error " + v.value.(string) + } + if v, ok := msg.Headers[FieldMember]; ok { + s += " member " + v.value.(string) + } + if len(msg.Body) != 0 { + s += "\n" + } + for i, v := range msg.Body { + s += " " + MakeVariant(v).String() + if i != len(msg.Body)-1 { + s += "\n" + } + } + return s +} diff --git a/vendor/github.com/godbus/dbus/object.go b/vendor/github.com/godbus/dbus/object.go new file mode 100644 index 0000000..f27ffe1 --- /dev/null +++ b/vendor/github.com/godbus/dbus/object.go @@ -0,0 +1,219 @@ +package dbus + +import ( + "context" + "errors" + "strings" +) + +// BusObject is the interface of a remote object on which methods can be +// invoked. +type BusObject interface { + Call(method string, flags Flags, args ...interface{}) *Call + CallWithContext(ctx context.Context, method string, flags Flags, args ...interface{}) *Call + Go(method string, flags Flags, ch chan *Call, args ...interface{}) *Call + GoWithContext(ctx context.Context, method string, flags Flags, ch chan *Call, args ...interface{}) *Call + AddMatchSignal(iface, member string, options ...MatchOption) *Call + RemoveMatchSignal(iface, member string, options ...MatchOption) *Call + GetProperty(p string) (Variant, error) + Destination() string + Path() ObjectPath +} + +// Object represents a remote object on which methods can be invoked. +type Object struct { + conn *Conn + dest string + path ObjectPath +} + +// Call calls a method with (*Object).Go and waits for its reply. +func (o *Object) Call(method string, flags Flags, args ...interface{}) *Call { + return <-o.createCall(context.Background(), method, flags, make(chan *Call, 1), args...).Done +} + +// CallWithContext acts like Call but takes a context +func (o *Object) CallWithContext(ctx context.Context, method string, flags Flags, args ...interface{}) *Call { + return <-o.createCall(ctx, method, flags, make(chan *Call, 1), args...).Done +} + +// MatchOption specifies option for dbus routing match rule. Options can be constructed with WithMatch* helpers. +// For full list of available options consult +// https://dbus.freedesktop.org/doc/dbus-specification.html#message-bus-routing-match-rules +type MatchOption struct { + key string + value string +} + +// WithMatchOption creates match option with given key and value +func WithMatchOption(key, value string) MatchOption { + return MatchOption{key, value} +} + +// WithMatchObjectPath creates match option that filters events based on given path +func WithMatchObjectPath(path ObjectPath) MatchOption { + return MatchOption{"path", string(path)} +} + +func formatMatchOptions(options []MatchOption) string { + items := make([]string, 0, len(options)) + for _, option := range options { + items = append(items, option.key+"='"+option.value+"'") + } + + return strings.Join(items, ",") +} + +// AddMatchSignal subscribes BusObject to signals from specified interface, +// method (member). Additional filter rules can be added via WithMatch* option constructors. +// Note: To filter events by object path you have to specify this path via an option. +func (o *Object) AddMatchSignal(iface, member string, options ...MatchOption) *Call { + base := []MatchOption{ + {"type", "signal"}, + {"interface", iface}, + {"member", member}, + } + + options = append(base, options...) + return o.conn.BusObject().Call( + "org.freedesktop.DBus.AddMatch", + 0, + formatMatchOptions(options), + ) +} + +// RemoveMatchSignal unsubscribes BusObject from signals from specified interface, +// method (member). Additional filter rules can be added via WithMatch* option constructors +func (o *Object) RemoveMatchSignal(iface, member string, options ...MatchOption) *Call { + base := []MatchOption{ + {"type", "signal"}, + {"interface", iface}, + {"member", member}, + } + + options = append(base, options...) + return o.conn.BusObject().Call( + "org.freedesktop.DBus.RemoveMatch", + 0, + formatMatchOptions(options), + ) +} + +// Go calls a method with the given arguments asynchronously. It returns a +// Call structure representing this method call. The passed channel will +// return the same value once the call is done. If ch is nil, a new channel +// will be allocated. Otherwise, ch has to be buffered or Go will panic. +// +// If the flags include FlagNoReplyExpected, ch is ignored and a Call structure +// is returned with any error in Err and a closed channel in Done containing +// the returned Call as it's one entry. +// +// If the method parameter contains a dot ('.'), the part before the last dot +// specifies the interface on which the method is called. +func (o *Object) Go(method string, flags Flags, ch chan *Call, args ...interface{}) *Call { + return o.createCall(context.Background(), method, flags, ch, args...) +} + +// GoWithContext acts like Go but takes a context +func (o *Object) GoWithContext(ctx context.Context, method string, flags Flags, ch chan *Call, args ...interface{}) *Call { + return o.createCall(ctx, method, flags, ch, args...) +} + +func (o *Object) createCall(ctx context.Context, method string, flags Flags, ch chan *Call, args ...interface{}) *Call { + if ctx == nil { + panic("nil context") + } + iface := "" + i := strings.LastIndex(method, ".") + if i != -1 { + iface = method[:i] + } + method = method[i+1:] + msg := new(Message) + msg.Type = TypeMethodCall + msg.serial = o.conn.getSerial() + msg.Flags = flags & (FlagNoAutoStart | FlagNoReplyExpected) + msg.Headers = make(map[HeaderField]Variant) + msg.Headers[FieldPath] = MakeVariant(o.path) + msg.Headers[FieldDestination] = MakeVariant(o.dest) + msg.Headers[FieldMember] = MakeVariant(method) + if iface != "" { + msg.Headers[FieldInterface] = MakeVariant(iface) + } + msg.Body = args + if len(args) > 0 { + msg.Headers[FieldSignature] = MakeVariant(SignatureOf(args...)) + } + if msg.Flags&FlagNoReplyExpected == 0 { + if ch == nil { + ch = make(chan *Call, 10) + } else if cap(ch) == 0 { + panic("dbus: unbuffered channel passed to (*Object).Go") + } + ctx, cancel := context.WithCancel(ctx) + call := &Call{ + Destination: o.dest, + Path: o.path, + Method: method, + Args: args, + Done: ch, + ctxCanceler: cancel, + ctx: ctx, + } + o.conn.calls.track(msg.serial, call) + o.conn.sendMessageAndIfClosed(msg, func() { + o.conn.calls.handleSendError(msg, ErrClosed) + cancel() + }) + go func() { + <-ctx.Done() + o.conn.calls.handleSendError(msg, ctx.Err()) + }() + + return call + } + done := make(chan *Call, 1) + call := &Call{ + Err: nil, + Done: done, + } + defer func() { + call.Done <- call + close(done) + }() + o.conn.sendMessageAndIfClosed(msg, func() { + call.Err = ErrClosed + }) + return call +} + +// GetProperty calls org.freedesktop.DBus.Properties.GetProperty on the given +// object. The property name must be given in interface.member notation. +func (o *Object) GetProperty(p string) (Variant, error) { + idx := strings.LastIndex(p, ".") + if idx == -1 || idx+1 == len(p) { + return Variant{}, errors.New("dbus: invalid property " + p) + } + + iface := p[:idx] + prop := p[idx+1:] + + result := Variant{} + err := o.Call("org.freedesktop.DBus.Properties.Get", 0, iface, prop).Store(&result) + + if err != nil { + return Variant{}, err + } + + return result, nil +} + +// Destination returns the destination that calls on (o *Object) are sent to. +func (o *Object) Destination() string { + return o.dest +} + +// Path returns the path that calls on (o *Object") are sent to. +func (o *Object) Path() ObjectPath { + return o.path +} diff --git a/vendor/github.com/godbus/dbus/server_interfaces.go b/vendor/github.com/godbus/dbus/server_interfaces.go new file mode 100644 index 0000000..01166f0 --- /dev/null +++ b/vendor/github.com/godbus/dbus/server_interfaces.go @@ -0,0 +1,99 @@ +package dbus + +// Terminator allows a handler to implement a shutdown mechanism that +// is called when the connection terminates. +type Terminator interface { + Terminate() +} + +// Handler is the representation of a D-Bus Application. +// +// The Handler must have a way to lookup objects given +// an ObjectPath. The returned object must implement the +// ServerObject interface. +type Handler interface { + LookupObject(path ObjectPath) (ServerObject, bool) +} + +// ServerObject is the representation of an D-Bus Object. +// +// Objects are registered at a path for a given Handler. +// The Objects implement D-Bus interfaces. The semantics +// of Interface lookup is up to the implementation of +// the ServerObject. The ServerObject implementation may +// choose to implement empty string as a valid interface +// represeting all methods or not per the D-Bus specification. +type ServerObject interface { + LookupInterface(name string) (Interface, bool) +} + +// An Interface is the representation of a D-Bus Interface. +// +// Interfaces are a grouping of methods implemented by the Objects. +// Interfaces are responsible for routing method calls. +type Interface interface { + LookupMethod(name string) (Method, bool) +} + +// A Method represents the exposed methods on D-Bus. +type Method interface { + // Call requires that all arguments are decoded before being passed to it. + Call(args ...interface{}) ([]interface{}, error) + NumArguments() int + NumReturns() int + // ArgumentValue returns a representative value for the argument at position + // it should be of the proper type. reflect.Zero would be a good mechanism + // to use for this Value. + ArgumentValue(position int) interface{} + // ReturnValue returns a representative value for the return at position + // it should be of the proper type. reflect.Zero would be a good mechanism + // to use for this Value. + ReturnValue(position int) interface{} +} + +// An Argument Decoder can decode arguments using the non-standard mechanism +// +// If a method implements this interface then the non-standard +// decoder will be used. +// +// Method arguments must be decoded from the message. +// The mechanism for doing this will vary based on the +// implementation of the method. A normal approach is provided +// as part of this library, but may be replaced with +// any other decoding scheme. +type ArgumentDecoder interface { + // To decode the arguments of a method the sender and message are + // provided incase the semantics of the implementer provides access + // to these as part of the method invocation. + DecodeArguments(conn *Conn, sender string, msg *Message, args []interface{}) ([]interface{}, error) +} + +// A SignalHandler is responsible for delivering a signal. +// +// Signal delivery may be changed from the default channel +// based approach by Handlers implementing the SignalHandler +// interface. +type SignalHandler interface { + DeliverSignal(iface, name string, signal *Signal) +} + +// A DBusError is used to convert a generic object to a D-Bus error. +// +// Any custom error mechanism may implement this interface to provide +// a custom encoding of the error on D-Bus. By default if a normal +// error is returned, it will be encoded as the generic +// "org.freedesktop.DBus.Error.Failed" error. By implementing this +// interface as well a custom encoding may be provided. +type DBusError interface { + DBusError() (string, []interface{}) +} + +// SerialGenerator is responsible for serials generation. +// +// Different approaches for the serial generation can be used, +// maintaining a map guarded with a mutex (the standard way) or +// simply increment an atomic counter. +type SerialGenerator interface { + GetSerial() uint32 + RetireSerial(serial uint32) +} diff --git a/vendor/github.com/godbus/dbus/sig.go b/vendor/github.com/godbus/dbus/sig.go new file mode 100644 index 0000000..c1b8092 --- /dev/null +++ b/vendor/github.com/godbus/dbus/sig.go @@ -0,0 +1,259 @@ +package dbus + +import ( + "fmt" + "reflect" + "strings" +) + +var sigToType = map[byte]reflect.Type{ + 'y': byteType, + 'b': boolType, + 'n': int16Type, + 'q': uint16Type, + 'i': int32Type, + 'u': uint32Type, + 'x': int64Type, + 't': uint64Type, + 'd': float64Type, + 's': stringType, + 'g': signatureType, + 'o': objectPathType, + 'v': variantType, + 'h': unixFDIndexType, +} + +// Signature represents a correct type signature as specified by the D-Bus +// specification. The zero value represents the empty signature, "". +type Signature struct { + str string +} + +// SignatureOf returns the concatenation of all the signatures of the given +// values. It panics if one of them is not representable in D-Bus. +func SignatureOf(vs ...interface{}) Signature { + var s string + for _, v := range vs { + s += getSignature(reflect.TypeOf(v)) + } + return Signature{s} +} + +// SignatureOfType returns the signature of the given type. It panics if the +// type is not representable in D-Bus. +func SignatureOfType(t reflect.Type) Signature { + return Signature{getSignature(t)} +} + +// getSignature returns the signature of the given type and panics on unknown types. +func getSignature(t reflect.Type) string { + // handle simple types first + switch t.Kind() { + case reflect.Uint8: + return "y" + case reflect.Bool: + return "b" + case reflect.Int16: + return "n" + case reflect.Uint16: + return "q" + case reflect.Int, reflect.Int32: + if t == unixFDType { + return "h" + } + return "i" + case reflect.Uint, reflect.Uint32: + if t == unixFDIndexType { + return "h" + } + return "u" + case reflect.Int64: + return "x" + case reflect.Uint64: + return "t" + case reflect.Float64: + return "d" + case reflect.Ptr: + return getSignature(t.Elem()) + case reflect.String: + if t == objectPathType { + return "o" + } + return "s" + case reflect.Struct: + if t == variantType { + return "v" + } else if t == signatureType { + return "g" + } + var s string + for i := 0; i < t.NumField(); i++ { + field := t.Field(i) + if field.PkgPath == "" && field.Tag.Get("dbus") != "-" { + s += getSignature(t.Field(i).Type) + } + } + return "(" + s + ")" + case reflect.Array, reflect.Slice: + return "a" + getSignature(t.Elem()) + case reflect.Map: + if !isKeyType(t.Key()) { + panic(InvalidTypeError{t}) + } + return "a{" + getSignature(t.Key()) + getSignature(t.Elem()) + "}" + case reflect.Interface: + return "v" + } + panic(InvalidTypeError{t}) +} + +// ParseSignature returns the signature represented by this string, or a +// SignatureError if the string is not a valid signature. +func ParseSignature(s string) (sig Signature, err error) { + if len(s) == 0 { + return + } + if len(s) > 255 { + return Signature{""}, SignatureError{s, "too long"} + } + sig.str = s + for err == nil && len(s) != 0 { + err, s = validSingle(s, 0) + } + if err != nil { + sig = Signature{""} + } + + return +} + +// ParseSignatureMust behaves like ParseSignature, except that it panics if s +// is not valid. +func ParseSignatureMust(s string) Signature { + sig, err := ParseSignature(s) + if err != nil { + panic(err) + } + return sig +} + +// Empty retruns whether the signature is the empty signature. +func (s Signature) Empty() bool { + return s.str == "" +} + +// Single returns whether the signature represents a single, complete type. +func (s Signature) Single() bool { + err, r := validSingle(s.str, 0) + return err != nil && r == "" +} + +// String returns the signature's string representation. +func (s Signature) String() string { + return s.str +} + +// A SignatureError indicates that a signature passed to a function or received +// on a connection is not a valid signature. +type SignatureError struct { + Sig string + Reason string +} + +func (e SignatureError) Error() string { + return fmt.Sprintf("dbus: invalid signature: %q (%s)", e.Sig, e.Reason) +} + +// Try to read a single type from this string. If it was successful, err is nil +// and rem is the remaining unparsed part. Otherwise, err is a non-nil +// SignatureError and rem is "". depth is the current recursion depth which may +// not be greater than 64 and should be given as 0 on the first call. +func validSingle(s string, depth int) (err error, rem string) { + if s == "" { + return SignatureError{Sig: s, Reason: "empty signature"}, "" + } + if depth > 64 { + return SignatureError{Sig: s, Reason: "container nesting too deep"}, "" + } + switch s[0] { + case 'y', 'b', 'n', 'q', 'i', 'u', 'x', 't', 'd', 's', 'g', 'o', 'v', 'h': + return nil, s[1:] + case 'a': + if len(s) > 1 && s[1] == '{' { + i := findMatching(s[1:], '{', '}') + if i == -1 { + return SignatureError{Sig: s, Reason: "unmatched '{'"}, "" + } + i++ + rem = s[i+1:] + s = s[2:i] + if err, _ = validSingle(s[:1], depth+1); err != nil { + return err, "" + } + err, nr := validSingle(s[1:], depth+1) + if err != nil { + return err, "" + } + if nr != "" { + return SignatureError{Sig: s, Reason: "too many types in dict"}, "" + } + return nil, rem + } + return validSingle(s[1:], depth+1) + case '(': + i := findMatching(s, '(', ')') + if i == -1 { + return SignatureError{Sig: s, Reason: "unmatched ')'"}, "" + } + rem = s[i+1:] + s = s[1:i] + for err == nil && s != "" { + err, s = validSingle(s, depth+1) + } + if err != nil { + rem = "" + } + return + } + return SignatureError{Sig: s, Reason: "invalid type character"}, "" +} + +func findMatching(s string, left, right rune) int { + n := 0 + for i, v := range s { + if v == left { + n++ + } else if v == right { + n-- + } + if n == 0 { + return i + } + } + return -1 +} + +// typeFor returns the type of the given signature. It ignores any left over +// characters and panics if s doesn't start with a valid type signature. +func typeFor(s string) (t reflect.Type) { + err, _ := validSingle(s, 0) + if err != nil { + panic(err) + } + + if t, ok := sigToType[s[0]]; ok { + return t + } + switch s[0] { + case 'a': + if s[1] == '{' { + i := strings.LastIndex(s, "}") + t = reflect.MapOf(sigToType[s[2]], typeFor(s[3:i])) + } else { + t = reflect.SliceOf(typeFor(s[1:])) + } + case '(': + t = interfacesType + } + return +} diff --git a/vendor/github.com/godbus/dbus/transport_darwin.go b/vendor/github.com/godbus/dbus/transport_darwin.go new file mode 100644 index 0000000..1bba0d6 --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_darwin.go @@ -0,0 +1,6 @@ +package dbus + +func (t *unixTransport) SendNullByte() error { + _, err := t.Write([]byte{0}) + return err +} diff --git a/vendor/github.com/godbus/dbus/transport_generic.go b/vendor/github.com/godbus/dbus/transport_generic.go new file mode 100644 index 0000000..718a1ff --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_generic.go @@ -0,0 +1,50 @@ +package dbus + +import ( + "encoding/binary" + "errors" + "io" + "unsafe" +) + +var nativeEndian binary.ByteOrder + +func detectEndianness() binary.ByteOrder { + var x uint32 = 0x01020304 + if *(*byte)(unsafe.Pointer(&x)) == 0x01 { + return binary.BigEndian + } + return binary.LittleEndian +} + +func init() { + nativeEndian = detectEndianness() +} + +type genericTransport struct { + io.ReadWriteCloser +} + +func (t genericTransport) SendNullByte() error { + _, err := t.Write([]byte{0}) + return err +} + +func (t genericTransport) SupportsUnixFDs() bool { + return false +} + +func (t genericTransport) EnableUnixFDs() {} + +func (t genericTransport) ReadMessage() (*Message, error) { + return DecodeMessage(t) +} + +func (t genericTransport) SendMessage(msg *Message) error { + for _, v := range msg.Body { + if _, ok := v.(UnixFD); ok { + return errors.New("dbus: unix fd passing not enabled") + } + } + return msg.EncodeTo(t, nativeEndian) +} diff --git a/vendor/github.com/godbus/dbus/transport_nonce_tcp.go b/vendor/github.com/godbus/dbus/transport_nonce_tcp.go new file mode 100644 index 0000000..697739e --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_nonce_tcp.go @@ -0,0 +1,39 @@ +//+build !windows + +package dbus + +import ( + "errors" + "io/ioutil" + "net" +) + +func init() { + transports["nonce-tcp"] = newNonceTcpTransport +} + +func newNonceTcpTransport(keys string) (transport, error) { + host := getKey(keys, "host") + port := getKey(keys, "port") + noncefile := getKey(keys, "noncefile") + if host == "" || port == "" || noncefile == "" { + return nil, errors.New("dbus: unsupported address (must set host, port and noncefile)") + } + protocol, err := tcpFamily(keys) + if err != nil { + return nil, err + } + socket, err := net.Dial(protocol, net.JoinHostPort(host, port)) + if err != nil { + return nil, err + } + b, err := ioutil.ReadFile(noncefile) + if err != nil { + return nil, err + } + _, err = socket.Write(b) + if err != nil { + return nil, err + } + return NewConn(socket) +} diff --git a/vendor/github.com/godbus/dbus/transport_tcp.go b/vendor/github.com/godbus/dbus/transport_tcp.go new file mode 100644 index 0000000..dd1c8e5 --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_tcp.go @@ -0,0 +1,43 @@ +//+build !windows + +package dbus + +import ( + "errors" + "net" +) + +func init() { + transports["tcp"] = newTcpTransport +} + +func tcpFamily(keys string) (string, error) { + switch getKey(keys, "family") { + case "": + return "tcp", nil + case "ipv4": + return "tcp4", nil + case "ipv6": + return "tcp6", nil + default: + return "", errors.New("dbus: invalid tcp family (must be ipv4 or ipv6)") + } +} + +func newTcpTransport(keys string) (transport, error) { + host := getKey(keys, "host") + port := getKey(keys, "port") + if host == "" || port == "" { + return nil, errors.New("dbus: unsupported address (must set host and port)") + } + + protocol, err := tcpFamily(keys) + if err != nil { + return nil, err + } + socket, err := net.Dial(protocol, net.JoinHostPort(host, port)) + if err != nil { + return nil, err + } + return NewConn(socket) +} diff --git a/vendor/github.com/godbus/dbus/transport_unix.go b/vendor/github.com/godbus/dbus/transport_unix.go new file mode 100644 index 0000000..f000c6b --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_unix.go @@ -0,0 +1,214 @@ +//+build !windows,!solaris + +package dbus + +import ( + "bytes" + "encoding/binary" + "errors" + "io" + "net" + "syscall" +) + +type oobReader struct { + conn *net.UnixConn + oob []byte + buf [4096]byte +} + +func (o *oobReader) Read(b []byte) (n int, err error) { + n, oobn, flags, _, err := o.conn.ReadMsgUnix(b, o.buf[:]) + if err != nil { + return n, err + } + if flags&syscall.MSG_CTRUNC != 0 { + return n, errors.New("dbus: control data truncated (too many fds received)") + } + o.oob = append(o.oob, o.buf[:oobn]...) + return n, nil +} + +type unixTransport struct { + *net.UnixConn + rdr *oobReader + hasUnixFDs bool +} + +func newUnixTransport(keys string) (transport, error) { + var err error + + t := new(unixTransport) + abstract := getKey(keys, "abstract") + path := getKey(keys, "path") + switch { + case abstract == "" && path == "": + return nil, errors.New("dbus: invalid address (neither path nor abstract set)") + case abstract != "" && path == "": + t.UnixConn, err = net.DialUnix("unix", nil, &net.UnixAddr{Name: "@" + abstract, Net: "unix"}) + if err != nil { + return nil, err + } + return t, nil + case abstract == "" && path != "": + t.UnixConn, err = net.DialUnix("unix", nil, &net.UnixAddr{Name: path, Net: "unix"}) + if err != nil { + return nil, err + } + return t, nil + default: + return nil, errors.New("dbus: invalid address (both path and abstract set)") + } +} + +func init() { + transports["unix"] = newUnixTransport +} + +func (t *unixTransport) EnableUnixFDs() { + t.hasUnixFDs = true +} + +func (t *unixTransport) ReadMessage() (*Message, error) { + var ( + blen, hlen uint32 + csheader [16]byte + headers []header + order binary.ByteOrder + unixfds uint32 + ) + // To be sure that all bytes of out-of-band data are read, we use a special + // reader that uses ReadUnix on the underlying connection instead of Read + // and gathers the out-of-band data in a buffer. + if t.rdr == nil { + t.rdr = &oobReader{conn: t.UnixConn} + } else { + t.rdr.oob = nil + } + + // read the first 16 bytes (the part of the header that has a constant size), + // from which we can figure out the length of the rest of the message + if _, err := io.ReadFull(t.rdr, csheader[:]); err != nil { + return nil, err + } + switch csheader[0] { + case 'l': + order = binary.LittleEndian + case 'B': + order = binary.BigEndian + default: + return nil, InvalidMessageError("invalid byte order") + } + // csheader[4:8] -> length of message body, csheader[12:16] -> length of + // header fields (without alignment) + binary.Read(bytes.NewBuffer(csheader[4:8]), order, &blen) + binary.Read(bytes.NewBuffer(csheader[12:]), order, &hlen) + if hlen%8 != 0 { + hlen += 8 - (hlen % 8) + } + + // decode headers and look for unix fds + headerdata := make([]byte, hlen+4) + copy(headerdata, csheader[12:]) + if _, err := io.ReadFull(t.rdr, headerdata[4:]); err != nil { + return nil, err + } + dec := newDecoder(bytes.NewBuffer(headerdata), order) + dec.pos = 12 + vs, err := dec.Decode(Signature{"a(yv)"}) + if err != nil { + return nil, err + } + Store(vs, &headers) + for _, v := range headers { + if v.Field == byte(FieldUnixFDs) { + unixfds, _ = v.Variant.value.(uint32) + } + } + all := make([]byte, 16+hlen+blen) + copy(all, csheader[:]) + copy(all[16:], headerdata[4:]) + if _, err := io.ReadFull(t.rdr, all[16+hlen:]); err != nil { + return nil, err + } + if unixfds != 0 { + if !t.hasUnixFDs { + return nil, errors.New("dbus: got unix fds on unsupported transport") + } + // read the fds from the OOB data + scms, err := syscall.ParseSocketControlMessage(t.rdr.oob) + if err != nil { + return nil, err + } + if len(scms) != 1 { + return nil, errors.New("dbus: received more than one socket control message") + } + fds, err := syscall.ParseUnixRights(&scms[0]) + if err != nil { + return nil, err + } + msg, err := DecodeMessage(bytes.NewBuffer(all)) + if err != nil { + return nil, err + } + // substitute the values in the message body (which are indices for the + // array receiver via OOB) with the actual values + for i, v := range msg.Body { + switch v.(type) { + case UnixFDIndex: + j := v.(UnixFDIndex) + if uint32(j) >= unixfds { + return nil, InvalidMessageError("invalid index for unix fd") + } + msg.Body[i] = UnixFD(fds[j]) + case []UnixFDIndex: + idxArray := v.([]UnixFDIndex) + fdArray := make([]UnixFD, len(idxArray)) + for k, j := range idxArray { + if uint32(j) >= unixfds { + return nil, InvalidMessageError("invalid index for unix fd") + } + fdArray[k] = UnixFD(fds[j]) + } + msg.Body[i] = fdArray + } + } + return msg, nil + } + return DecodeMessage(bytes.NewBuffer(all)) +} + +func (t *unixTransport) SendMessage(msg *Message) error { + fds := make([]int, 0) + for i, v := range msg.Body { + if fd, ok := v.(UnixFD); ok { + msg.Body[i] = UnixFDIndex(len(fds)) + fds = append(fds, int(fd)) + } + } + if len(fds) != 0 { + if !t.hasUnixFDs { + return errors.New("dbus: unix fd passing not enabled") + } + msg.Headers[FieldUnixFDs] = MakeVariant(uint32(len(fds))) + oob := syscall.UnixRights(fds...) + buf := new(bytes.Buffer) + msg.EncodeTo(buf, nativeEndian) + n, oobn, err := t.UnixConn.WriteMsgUnix(buf.Bytes(), oob, nil) + if err != nil { + return err + } + if n != buf.Len() || oobn != len(oob) { + return io.ErrShortWrite + } + } else { + if err := msg.EncodeTo(t, nativeEndian); err != nil { + return nil + } + } + return nil +} + +func (t *unixTransport) SupportsUnixFDs() bool { + return true +} diff --git a/vendor/github.com/godbus/dbus/transport_unixcred_dragonfly.go b/vendor/github.com/godbus/dbus/transport_unixcred_dragonfly.go new file mode 100644 index 0000000..a8cd393 --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_unixcred_dragonfly.go @@ -0,0 +1,95 @@ +// The UnixCredentials system call is currently only implemented on Linux +// http://golang.org/src/pkg/syscall/sockcmsg_linux.go +// https://golang.org/s/go1.4-syscall +// http://code.google.com/p/go/source/browse/unix/sockcmsg_linux.go?repo=sys + +// Local implementation of the UnixCredentials system call for DragonFly BSD + +package dbus + +/* +#include +*/ +import "C" + +import ( + "io" + "os" + "syscall" + "unsafe" +) + +// http://golang.org/src/pkg/syscall/ztypes_linux_amd64.go +// http://golang.org/src/pkg/syscall/ztypes_dragonfly_amd64.go +type Ucred struct { + Pid int32 + Uid uint32 + Gid uint32 +} + +// http://golang.org/src/pkg/syscall/types_linux.go +// http://golang.org/src/pkg/syscall/types_dragonfly.go +// https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/sys/sys/ucred.h +const ( + SizeofUcred = C.sizeof_struct_ucred +) + +// http://golang.org/src/pkg/syscall/sockcmsg_unix.go +func cmsgAlignOf(salen int) int { + // From http://golang.org/src/pkg/syscall/sockcmsg_unix.go + //salign := sizeofPtr + // NOTE: It seems like 64-bit Darwin and DragonFly BSD kernels + // still require 32-bit aligned access to network subsystem. + //if darwin64Bit || dragonfly64Bit { + // salign = 4 + //} + salign := 4 + return (salen + salign - 1) & ^(salign - 1) +} + +// http://golang.org/src/pkg/syscall/sockcmsg_unix.go +func cmsgData(h *syscall.Cmsghdr) unsafe.Pointer { + return unsafe.Pointer(uintptr(unsafe.Pointer(h)) + uintptr(cmsgAlignOf(syscall.SizeofCmsghdr))) +} + +// http://golang.org/src/pkg/syscall/sockcmsg_linux.go +// UnixCredentials encodes credentials into a socket control message +// for sending to another process. This can be used for +// authentication. +func UnixCredentials(ucred *Ucred) []byte { + b := make([]byte, syscall.CmsgSpace(SizeofUcred)) + h := (*syscall.Cmsghdr)(unsafe.Pointer(&b[0])) + h.Level = syscall.SOL_SOCKET + h.Type = syscall.SCM_CREDS + h.SetLen(syscall.CmsgLen(SizeofUcred)) + *((*Ucred)(cmsgData(h))) = *ucred + return b +} + +// http://golang.org/src/pkg/syscall/sockcmsg_linux.go +// ParseUnixCredentials decodes a socket control message that contains +// credentials in a Ucred structure. To receive such a message, the +// SO_PASSCRED option must be enabled on the socket. +func ParseUnixCredentials(m *syscall.SocketControlMessage) (*Ucred, error) { + if m.Header.Level != syscall.SOL_SOCKET { + return nil, syscall.EINVAL + } + if m.Header.Type != syscall.SCM_CREDS { + return nil, syscall.EINVAL + } + ucred := *(*Ucred)(unsafe.Pointer(&m.Data[0])) + return &ucred, nil +} + +func (t *unixTransport) SendNullByte() error { + ucred := &Ucred{Pid: int32(os.Getpid()), Uid: uint32(os.Getuid()), Gid: uint32(os.Getgid())} + b := UnixCredentials(ucred) + _, oobn, err := t.UnixConn.WriteMsgUnix([]byte{0}, b, nil) + if err != nil { + return err + } + if oobn != len(b) { + return io.ErrShortWrite + } + return nil +} diff --git a/vendor/github.com/godbus/dbus/transport_unixcred_freebsd.go b/vendor/github.com/godbus/dbus/transport_unixcred_freebsd.go new file mode 100644 index 0000000..0fc5b92 --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_unixcred_freebsd.go @@ -0,0 +1,91 @@ +// The UnixCredentials system call is currently only implemented on Linux +// http://golang.org/src/pkg/syscall/sockcmsg_linux.go +// https://golang.org/s/go1.4-syscall +// http://code.google.com/p/go/source/browse/unix/sockcmsg_linux.go?repo=sys + +// Local implementation of the UnixCredentials system call for FreeBSD + +package dbus + +/* +const int sizeofPtr = sizeof(void*); +#define _WANT_UCRED +#include +*/ +import "C" + +import ( + "io" + "os" + "syscall" + "unsafe" +) + +// http://golang.org/src/pkg/syscall/ztypes_linux_amd64.go +// https://golang.org/src/syscall/ztypes_freebsd_amd64.go +type Ucred struct { + Pid int32 + Uid uint32 + Gid uint32 +} + +// http://golang.org/src/pkg/syscall/types_linux.go +// https://golang.org/src/syscall/types_freebsd.go +// https://github.com/freebsd/freebsd/blob/master/sys/sys/ucred.h +const ( + SizeofUcred = C.sizeof_struct_ucred +) + +// http://golang.org/src/pkg/syscall/sockcmsg_unix.go +func cmsgAlignOf(salen int) int { + salign := C.sizeofPtr + + return (salen + salign - 1) & ^(salign - 1) +} + +// http://golang.org/src/pkg/syscall/sockcmsg_unix.go +func cmsgData(h *syscall.Cmsghdr) unsafe.Pointer { + return unsafe.Pointer(uintptr(unsafe.Pointer(h)) + uintptr(cmsgAlignOf(syscall.SizeofCmsghdr))) +} + +// http://golang.org/src/pkg/syscall/sockcmsg_linux.go +// UnixCredentials encodes credentials into a socket control message +// for sending to another process. This can be used for +// authentication. +func UnixCredentials(ucred *Ucred) []byte { + b := make([]byte, syscall.CmsgSpace(SizeofUcred)) + h := (*syscall.Cmsghdr)(unsafe.Pointer(&b[0])) + h.Level = syscall.SOL_SOCKET + h.Type = syscall.SCM_CREDS + h.SetLen(syscall.CmsgLen(SizeofUcred)) + *((*Ucred)(cmsgData(h))) = *ucred + return b +} + +// http://golang.org/src/pkg/syscall/sockcmsg_linux.go +// ParseUnixCredentials decodes a socket control message that contains +// credentials in a Ucred structure. To receive such a message, the +// SO_PASSCRED option must be enabled on the socket. +func ParseUnixCredentials(m *syscall.SocketControlMessage) (*Ucred, error) { + if m.Header.Level != syscall.SOL_SOCKET { + return nil, syscall.EINVAL + } + if m.Header.Type != syscall.SCM_CREDS { + return nil, syscall.EINVAL + } + ucred := *(*Ucred)(unsafe.Pointer(&m.Data[0])) + return &ucred, nil +} + +func (t *unixTransport) SendNullByte() error { + ucred := &Ucred{Pid: int32(os.Getpid()), Uid: uint32(os.Getuid()), Gid: uint32(os.Getgid())} + b := UnixCredentials(ucred) + _, oobn, err := t.UnixConn.WriteMsgUnix([]byte{0}, b, nil) + if err != nil { + return err + } + if oobn != len(b) { + return io.ErrShortWrite + } + return nil +} diff --git a/vendor/github.com/godbus/dbus/transport_unixcred_linux.go b/vendor/github.com/godbus/dbus/transport_unixcred_linux.go new file mode 100644 index 0000000..d9dfdf6 --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_unixcred_linux.go @@ -0,0 +1,25 @@ +// The UnixCredentials system call is currently only implemented on Linux +// http://golang.org/src/pkg/syscall/sockcmsg_linux.go +// https://golang.org/s/go1.4-syscall +// http://code.google.com/p/go/source/browse/unix/sockcmsg_linux.go?repo=sys + +package dbus + +import ( + "io" + "os" + "syscall" +) + +func (t *unixTransport) SendNullByte() error { + ucred := &syscall.Ucred{Pid: int32(os.Getpid()), Uid: uint32(os.Getuid()), Gid: uint32(os.Getgid())} + b := syscall.UnixCredentials(ucred) + _, oobn, err := t.UnixConn.WriteMsgUnix([]byte{0}, b, nil) + if err != nil { + return err + } + if oobn != len(b) { + return io.ErrShortWrite + } + return nil +} diff --git a/vendor/github.com/godbus/dbus/transport_unixcred_openbsd.go b/vendor/github.com/godbus/dbus/transport_unixcred_openbsd.go new file mode 100644 index 0000000..af7bafd --- /dev/null +++ b/vendor/github.com/godbus/dbus/transport_unixcred_openbsd.go @@ -0,0 +1,14 @@ +package dbus + +import "io" + +func (t *unixTransport) SendNullByte() error { + n, _, err := t.UnixConn.WriteMsgUnix([]byte{0}, nil, nil) + if err != nil { + return err + } + if n != 1 { + return io.ErrShortWrite + } + return nil +} diff --git a/vendor/github.com/godbus/dbus/variant.go b/vendor/github.com/godbus/dbus/variant.go new file mode 100644 index 0000000..0ca123b --- /dev/null +++ b/vendor/github.com/godbus/dbus/variant.go @@ -0,0 +1,144 @@ +package dbus + +import ( + "bytes" + "fmt" + "reflect" + "sort" + "strconv" +) + +// Variant represents the D-Bus variant type. +type Variant struct { + sig Signature + value interface{} +} + +// MakeVariant converts the given value to a Variant. It panics if v cannot be +// represented as a D-Bus type. +func MakeVariant(v interface{}) Variant { + return MakeVariantWithSignature(v, SignatureOf(v)) +} + +// MakeVariantWithSignature converts the given value to a Variant. +func MakeVariantWithSignature(v interface{}, s Signature) Variant { + return Variant{s, v} +} + +// ParseVariant parses the given string as a variant as described at +// https://developer.gnome.org/glib/unstable/gvariant-text.html. If sig is not +// empty, it is taken to be the expected signature for the variant. +func ParseVariant(s string, sig Signature) (Variant, error) { + tokens := varLex(s) + p := &varParser{tokens: tokens} + n, err := varMakeNode(p) + if err != nil { + return Variant{}, err + } + if sig.str == "" { + sig, err = varInfer(n) + if err != nil { + return Variant{}, err + } + } + v, err := n.Value(sig) + if err != nil { + return Variant{}, err + } + return MakeVariant(v), nil +} + +// format returns a formatted version of v and whether this string can be parsed +// unambigously. +func (v Variant) format() (string, bool) { + switch v.sig.str[0] { + case 'b', 'i': + return fmt.Sprint(v.value), true + case 'n', 'q', 'u', 'x', 't', 'd', 'h': + return fmt.Sprint(v.value), false + case 's': + return strconv.Quote(v.value.(string)), true + case 'o': + return strconv.Quote(string(v.value.(ObjectPath))), false + case 'g': + return strconv.Quote(v.value.(Signature).str), false + case 'v': + s, unamb := v.value.(Variant).format() + if !unamb { + return "<@" + v.value.(Variant).sig.str + " " + s + ">", true + } + return "<" + s + ">", true + case 'y': + return fmt.Sprintf("%#x", v.value.(byte)), false + } + rv := reflect.ValueOf(v.value) + switch rv.Kind() { + case reflect.Slice: + if rv.Len() == 0 { + return "[]", false + } + unamb := true + buf := bytes.NewBuffer([]byte("[")) + for i := 0; i < rv.Len(); i++ { + // TODO: slooow + s, b := MakeVariant(rv.Index(i).Interface()).format() + unamb = unamb && b + buf.WriteString(s) + if i != rv.Len()-1 { + buf.WriteString(", ") + } + } + buf.WriteByte(']') + return buf.String(), unamb + case reflect.Map: + if rv.Len() == 0 { + return "{}", false + } + unamb := true + var buf bytes.Buffer + kvs := make([]string, rv.Len()) + for i, k := range rv.MapKeys() { + s, b := MakeVariant(k.Interface()).format() + unamb = unamb && b + buf.Reset() + buf.WriteString(s) + buf.WriteString(": ") + s, b = MakeVariant(rv.MapIndex(k).Interface()).format() + unamb = unamb && b + buf.WriteString(s) + kvs[i] = buf.String() + } + buf.Reset() + buf.WriteByte('{') + sort.Strings(kvs) + for i, kv := range kvs { + if i > 0 { + buf.WriteString(", ") + } + buf.WriteString(kv) + } + buf.WriteByte('}') + return buf.String(), unamb + } + return `"INVALID"`, true +} + +// Signature returns the D-Bus signature of the underlying value of v. +func (v Variant) Signature() Signature { + return v.sig +} + +// String returns the string representation of the underlying value of v as +// described at https://developer.gnome.org/glib/unstable/gvariant-text.html. +func (v Variant) String() string { + s, unamb := v.format() + if !unamb { + return "@" + v.sig.str + " " + s + } + return s +} + +// Value returns the underlying value of v. +func (v Variant) Value() interface{} { + return v.value +} diff --git a/vendor/github.com/godbus/dbus/variant_lexer.go b/vendor/github.com/godbus/dbus/variant_lexer.go new file mode 100644 index 0000000..332007d --- /dev/null +++ b/vendor/github.com/godbus/dbus/variant_lexer.go @@ -0,0 +1,284 @@ +package dbus + +import ( + "fmt" + "strings" + "unicode" + "unicode/utf8" +) + +// Heavily inspired by the lexer from text/template. + +type varToken struct { + typ varTokenType + val string +} + +type varTokenType byte + +const ( + tokEOF varTokenType = iota + tokError + tokNumber + tokString + tokBool + tokArrayStart + tokArrayEnd + tokDictStart + tokDictEnd + tokVariantStart + tokVariantEnd + tokComma + tokColon + tokType + tokByteString +) + +type varLexer struct { + input string + start int + pos int + width int + tokens []varToken +} + +type lexState func(*varLexer) lexState + +func varLex(s string) []varToken { + l := &varLexer{input: s} + l.run() + return l.tokens +} + +func (l *varLexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + l.backup() + return false +} + +func (l *varLexer) backup() { + l.pos -= l.width +} + +func (l *varLexer) emit(t varTokenType) { + l.tokens = append(l.tokens, varToken{t, l.input[l.start:l.pos]}) + l.start = l.pos +} + +func (l *varLexer) errorf(format string, v ...interface{}) lexState { + l.tokens = append(l.tokens, varToken{ + tokError, + fmt.Sprintf(format, v...), + }) + return nil +} + +func (l *varLexer) ignore() { + l.start = l.pos +} + +func (l *varLexer) next() rune { + var r rune + + if l.pos >= len(l.input) { + l.width = 0 + return -1 + } + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return r +} + +func (l *varLexer) run() { + for state := varLexNormal; state != nil; { + state = state(l) + } +} + +func (l *varLexer) peek() rune { + r := l.next() + l.backup() + return r +} + +func varLexNormal(l *varLexer) lexState { + for { + r := l.next() + switch { + case r == -1: + l.emit(tokEOF) + return nil + case r == '[': + l.emit(tokArrayStart) + case r == ']': + l.emit(tokArrayEnd) + case r == '{': + l.emit(tokDictStart) + case r == '}': + l.emit(tokDictEnd) + case r == '<': + l.emit(tokVariantStart) + case r == '>': + l.emit(tokVariantEnd) + case r == ':': + l.emit(tokColon) + case r == ',': + l.emit(tokComma) + case r == '\'' || r == '"': + l.backup() + return varLexString + case r == '@': + l.backup() + return varLexType + case unicode.IsSpace(r): + l.ignore() + case unicode.IsNumber(r) || r == '+' || r == '-': + l.backup() + return varLexNumber + case r == 'b': + pos := l.start + if n := l.peek(); n == '"' || n == '\'' { + return varLexByteString + } + // not a byte string; try to parse it as a type or bool below + l.pos = pos + 1 + l.width = 1 + fallthrough + default: + // either a bool or a type. Try bools first. + l.backup() + if l.pos+4 <= len(l.input) { + if l.input[l.pos:l.pos+4] == "true" { + l.pos += 4 + l.emit(tokBool) + continue + } + } + if l.pos+5 <= len(l.input) { + if l.input[l.pos:l.pos+5] == "false" { + l.pos += 5 + l.emit(tokBool) + continue + } + } + // must be a type. + return varLexType + } + } +} + +var varTypeMap = map[string]string{ + "boolean": "b", + "byte": "y", + "int16": "n", + "uint16": "q", + "int32": "i", + "uint32": "u", + "int64": "x", + "uint64": "t", + "double": "f", + "string": "s", + "objectpath": "o", + "signature": "g", +} + +func varLexByteString(l *varLexer) lexState { + q := l.next() +Loop: + for { + switch l.next() { + case '\\': + if r := l.next(); r != -1 { + break + } + fallthrough + case -1: + return l.errorf("unterminated bytestring") + case q: + break Loop + } + } + l.emit(tokByteString) + return varLexNormal +} + +func varLexNumber(l *varLexer) lexState { + l.accept("+-") + digits := "0123456789" + if l.accept("0") { + if l.accept("x") { + digits = "0123456789abcdefABCDEF" + } else { + digits = "01234567" + } + } + for strings.IndexRune(digits, l.next()) >= 0 { + } + l.backup() + if l.accept(".") { + for strings.IndexRune(digits, l.next()) >= 0 { + } + l.backup() + } + if l.accept("eE") { + l.accept("+-") + for strings.IndexRune("0123456789", l.next()) >= 0 { + } + l.backup() + } + if r := l.peek(); unicode.IsLetter(r) { + l.next() + return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) + } + l.emit(tokNumber) + return varLexNormal +} + +func varLexString(l *varLexer) lexState { + q := l.next() +Loop: + for { + switch l.next() { + case '\\': + if r := l.next(); r != -1 { + break + } + fallthrough + case -1: + return l.errorf("unterminated string") + case q: + break Loop + } + } + l.emit(tokString) + return varLexNormal +} + +func varLexType(l *varLexer) lexState { + at := l.accept("@") + for { + r := l.next() + if r == -1 { + break + } + if unicode.IsSpace(r) { + l.backup() + break + } + } + if at { + if _, err := ParseSignature(l.input[l.start+1 : l.pos]); err != nil { + return l.errorf("%s", err) + } + } else { + if _, ok := varTypeMap[l.input[l.start:l.pos]]; ok { + l.emit(tokType) + return varLexNormal + } + return l.errorf("unrecognized type %q", l.input[l.start:l.pos]) + } + l.emit(tokType) + return varLexNormal +} diff --git a/vendor/github.com/godbus/dbus/variant_parser.go b/vendor/github.com/godbus/dbus/variant_parser.go new file mode 100644 index 0000000..d20f5da --- /dev/null +++ b/vendor/github.com/godbus/dbus/variant_parser.go @@ -0,0 +1,817 @@ +package dbus + +import ( + "bytes" + "errors" + "fmt" + "io" + "reflect" + "strconv" + "strings" + "unicode/utf8" +) + +type varParser struct { + tokens []varToken + i int +} + +func (p *varParser) backup() { + p.i-- +} + +func (p *varParser) next() varToken { + if p.i < len(p.tokens) { + t := p.tokens[p.i] + p.i++ + return t + } + return varToken{typ: tokEOF} +} + +type varNode interface { + Infer() (Signature, error) + String() string + Sigs() sigSet + Value(Signature) (interface{}, error) +} + +func varMakeNode(p *varParser) (varNode, error) { + var sig Signature + + for { + t := p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + case tokNumber: + return varMakeNumNode(t, sig) + case tokString: + return varMakeStringNode(t, sig) + case tokBool: + if sig.str != "" && sig.str != "b" { + return nil, varTypeError{t.val, sig} + } + b, err := strconv.ParseBool(t.val) + if err != nil { + return nil, err + } + return boolNode(b), nil + case tokArrayStart: + return varMakeArrayNode(p, sig) + case tokVariantStart: + return varMakeVariantNode(p, sig) + case tokDictStart: + return varMakeDictNode(p, sig) + case tokType: + if sig.str != "" { + return nil, errors.New("unexpected type annotation") + } + if t.val[0] == '@' { + sig.str = t.val[1:] + } else { + sig.str = varTypeMap[t.val] + } + case tokByteString: + if sig.str != "" && sig.str != "ay" { + return nil, varTypeError{t.val, sig} + } + b, err := varParseByteString(t.val) + if err != nil { + return nil, err + } + return byteStringNode(b), nil + default: + return nil, fmt.Errorf("unexpected %q", t.val) + } + } +} + +type varTypeError struct { + val string + sig Signature +} + +func (e varTypeError) Error() string { + return fmt.Sprintf("dbus: can't parse %q as type %q", e.val, e.sig.str) +} + +type sigSet map[Signature]bool + +func (s sigSet) Empty() bool { + return len(s) == 0 +} + +func (s sigSet) Intersect(s2 sigSet) sigSet { + r := make(sigSet) + for k := range s { + if s2[k] { + r[k] = true + } + } + return r +} + +func (s sigSet) Single() (Signature, bool) { + if len(s) == 1 { + for k := range s { + return k, true + } + } + return Signature{}, false +} + +func (s sigSet) ToArray() sigSet { + r := make(sigSet, len(s)) + for k := range s { + r[Signature{"a" + k.str}] = true + } + return r +} + +type numNode struct { + sig Signature + str string + val interface{} +} + +var numSigSet = sigSet{ + Signature{"y"}: true, + Signature{"n"}: true, + Signature{"q"}: true, + Signature{"i"}: true, + Signature{"u"}: true, + Signature{"x"}: true, + Signature{"t"}: true, + Signature{"d"}: true, +} + +func (n numNode) Infer() (Signature, error) { + if strings.ContainsAny(n.str, ".e") { + return Signature{"d"}, nil + } + return Signature{"i"}, nil +} + +func (n numNode) String() string { + return n.str +} + +func (n numNode) Sigs() sigSet { + if n.sig.str != "" { + return sigSet{n.sig: true} + } + if strings.ContainsAny(n.str, ".e") { + return sigSet{Signature{"d"}: true} + } + return numSigSet +} + +func (n numNode) Value(sig Signature) (interface{}, error) { + if n.sig.str != "" && n.sig != sig { + return nil, varTypeError{n.str, sig} + } + if n.val != nil { + return n.val, nil + } + return varNumAs(n.str, sig) +} + +func varMakeNumNode(tok varToken, sig Signature) (varNode, error) { + if sig.str == "" { + return numNode{str: tok.val}, nil + } + num, err := varNumAs(tok.val, sig) + if err != nil { + return nil, err + } + return numNode{sig: sig, val: num}, nil +} + +func varNumAs(s string, sig Signature) (interface{}, error) { + isUnsigned := false + size := 32 + switch sig.str { + case "n": + size = 16 + case "i": + case "x": + size = 64 + case "y": + size = 8 + isUnsigned = true + case "q": + size = 16 + isUnsigned = true + case "u": + isUnsigned = true + case "t": + size = 64 + isUnsigned = true + case "d": + d, err := strconv.ParseFloat(s, 64) + if err != nil { + return nil, err + } + return d, nil + default: + return nil, varTypeError{s, sig} + } + base := 10 + if strings.HasPrefix(s, "0x") { + base = 16 + s = s[2:] + } + if strings.HasPrefix(s, "0") && len(s) != 1 { + base = 8 + s = s[1:] + } + if isUnsigned { + i, err := strconv.ParseUint(s, base, size) + if err != nil { + return nil, err + } + var v interface{} = i + switch sig.str { + case "y": + v = byte(i) + case "q": + v = uint16(i) + case "u": + v = uint32(i) + } + return v, nil + } + i, err := strconv.ParseInt(s, base, size) + if err != nil { + return nil, err + } + var v interface{} = i + switch sig.str { + case "n": + v = int16(i) + case "i": + v = int32(i) + } + return v, nil +} + +type stringNode struct { + sig Signature + str string // parsed + val interface{} // has correct type +} + +var stringSigSet = sigSet{ + Signature{"s"}: true, + Signature{"g"}: true, + Signature{"o"}: true, +} + +func (n stringNode) Infer() (Signature, error) { + return Signature{"s"}, nil +} + +func (n stringNode) String() string { + return n.str +} + +func (n stringNode) Sigs() sigSet { + if n.sig.str != "" { + return sigSet{n.sig: true} + } + return stringSigSet +} + +func (n stringNode) Value(sig Signature) (interface{}, error) { + if n.sig.str != "" && n.sig != sig { + return nil, varTypeError{n.str, sig} + } + if n.val != nil { + return n.val, nil + } + switch { + case sig.str == "g": + return Signature{n.str}, nil + case sig.str == "o": + return ObjectPath(n.str), nil + case sig.str == "s": + return n.str, nil + default: + return nil, varTypeError{n.str, sig} + } +} + +func varMakeStringNode(tok varToken, sig Signature) (varNode, error) { + if sig.str != "" && sig.str != "s" && sig.str != "g" && sig.str != "o" { + return nil, fmt.Errorf("invalid type %q for string", sig.str) + } + s, err := varParseString(tok.val) + if err != nil { + return nil, err + } + n := stringNode{str: s} + if sig.str == "" { + return stringNode{str: s}, nil + } + n.sig = sig + switch sig.str { + case "o": + n.val = ObjectPath(s) + case "g": + n.val = Signature{s} + case "s": + n.val = s + } + return n, nil +} + +func varParseString(s string) (string, error) { + // quotes are guaranteed to be there + s = s[1 : len(s)-1] + buf := new(bytes.Buffer) + for len(s) != 0 { + r, size := utf8.DecodeRuneInString(s) + if r == utf8.RuneError && size == 1 { + return "", errors.New("invalid UTF-8") + } + s = s[size:] + if r != '\\' { + buf.WriteRune(r) + continue + } + r, size = utf8.DecodeRuneInString(s) + if r == utf8.RuneError && size == 1 { + return "", errors.New("invalid UTF-8") + } + s = s[size:] + switch r { + case 'a': + buf.WriteRune(0x7) + case 'b': + buf.WriteRune(0x8) + case 'f': + buf.WriteRune(0xc) + case 'n': + buf.WriteRune('\n') + case 'r': + buf.WriteRune('\r') + case 't': + buf.WriteRune('\t') + case '\n': + case 'u': + if len(s) < 4 { + return "", errors.New("short unicode escape") + } + r, err := strconv.ParseUint(s[:4], 16, 32) + if err != nil { + return "", err + } + buf.WriteRune(rune(r)) + s = s[4:] + case 'U': + if len(s) < 8 { + return "", errors.New("short unicode escape") + } + r, err := strconv.ParseUint(s[:8], 16, 32) + if err != nil { + return "", err + } + buf.WriteRune(rune(r)) + s = s[8:] + default: + buf.WriteRune(r) + } + } + return buf.String(), nil +} + +var boolSigSet = sigSet{Signature{"b"}: true} + +type boolNode bool + +func (boolNode) Infer() (Signature, error) { + return Signature{"b"}, nil +} + +func (b boolNode) String() string { + if b { + return "true" + } + return "false" +} + +func (boolNode) Sigs() sigSet { + return boolSigSet +} + +func (b boolNode) Value(sig Signature) (interface{}, error) { + if sig.str != "b" { + return nil, varTypeError{b.String(), sig} + } + return bool(b), nil +} + +type arrayNode struct { + set sigSet + children []varNode + val interface{} +} + +func (n arrayNode) Infer() (Signature, error) { + for _, v := range n.children { + csig, err := varInfer(v) + if err != nil { + continue + } + return Signature{"a" + csig.str}, nil + } + return Signature{}, fmt.Errorf("can't infer type for %q", n.String()) +} + +func (n arrayNode) String() string { + s := "[" + for i, v := range n.children { + s += v.String() + if i != len(n.children)-1 { + s += ", " + } + } + return s + "]" +} + +func (n arrayNode) Sigs() sigSet { + return n.set +} + +func (n arrayNode) Value(sig Signature) (interface{}, error) { + if n.set.Empty() { + // no type information whatsoever, so this must be an empty slice + return reflect.MakeSlice(typeFor(sig.str), 0, 0).Interface(), nil + } + if !n.set[sig] { + return nil, varTypeError{n.String(), sig} + } + s := reflect.MakeSlice(typeFor(sig.str), len(n.children), len(n.children)) + for i, v := range n.children { + rv, err := v.Value(Signature{sig.str[1:]}) + if err != nil { + return nil, err + } + s.Index(i).Set(reflect.ValueOf(rv)) + } + return s.Interface(), nil +} + +func varMakeArrayNode(p *varParser, sig Signature) (varNode, error) { + var n arrayNode + if sig.str != "" { + n.set = sigSet{sig: true} + } + if t := p.next(); t.typ == tokArrayEnd { + return n, nil + } else { + p.backup() + } +Loop: + for { + t := p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + } + p.backup() + cn, err := varMakeNode(p) + if err != nil { + return nil, err + } + if cset := cn.Sigs(); !cset.Empty() { + if n.set.Empty() { + n.set = cset.ToArray() + } else { + nset := cset.ToArray().Intersect(n.set) + if nset.Empty() { + return nil, fmt.Errorf("can't parse %q with given type information", cn.String()) + } + n.set = nset + } + } + n.children = append(n.children, cn) + switch t := p.next(); t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + case tokArrayEnd: + break Loop + case tokComma: + continue + default: + return nil, fmt.Errorf("unexpected %q", t.val) + } + } + return n, nil +} + +type variantNode struct { + n varNode +} + +var variantSet = sigSet{ + Signature{"v"}: true, +} + +func (variantNode) Infer() (Signature, error) { + return Signature{"v"}, nil +} + +func (n variantNode) String() string { + return "<" + n.n.String() + ">" +} + +func (variantNode) Sigs() sigSet { + return variantSet +} + +func (n variantNode) Value(sig Signature) (interface{}, error) { + if sig.str != "v" { + return nil, varTypeError{n.String(), sig} + } + sig, err := varInfer(n.n) + if err != nil { + return nil, err + } + v, err := n.n.Value(sig) + if err != nil { + return nil, err + } + return MakeVariant(v), nil +} + +func varMakeVariantNode(p *varParser, sig Signature) (varNode, error) { + n, err := varMakeNode(p) + if err != nil { + return nil, err + } + if t := p.next(); t.typ != tokVariantEnd { + return nil, fmt.Errorf("unexpected %q", t.val) + } + vn := variantNode{n} + if sig.str != "" && sig.str != "v" { + return nil, varTypeError{vn.String(), sig} + } + return variantNode{n}, nil +} + +type dictEntry struct { + key, val varNode +} + +type dictNode struct { + kset, vset sigSet + children []dictEntry + val interface{} +} + +func (n dictNode) Infer() (Signature, error) { + for _, v := range n.children { + ksig, err := varInfer(v.key) + if err != nil { + continue + } + vsig, err := varInfer(v.val) + if err != nil { + continue + } + return Signature{"a{" + ksig.str + vsig.str + "}"}, nil + } + return Signature{}, fmt.Errorf("can't infer type for %q", n.String()) +} + +func (n dictNode) String() string { + s := "{" + for i, v := range n.children { + s += v.key.String() + ": " + v.val.String() + if i != len(n.children)-1 { + s += ", " + } + } + return s + "}" +} + +func (n dictNode) Sigs() sigSet { + r := sigSet{} + for k := range n.kset { + for v := range n.vset { + sig := "a{" + k.str + v.str + "}" + r[Signature{sig}] = true + } + } + return r +} + +func (n dictNode) Value(sig Signature) (interface{}, error) { + set := n.Sigs() + if set.Empty() { + // no type information -> empty dict + return reflect.MakeMap(typeFor(sig.str)).Interface(), nil + } + if !set[sig] { + return nil, varTypeError{n.String(), sig} + } + m := reflect.MakeMap(typeFor(sig.str)) + ksig := Signature{sig.str[2:3]} + vsig := Signature{sig.str[3 : len(sig.str)-1]} + for _, v := range n.children { + kv, err := v.key.Value(ksig) + if err != nil { + return nil, err + } + vv, err := v.val.Value(vsig) + if err != nil { + return nil, err + } + m.SetMapIndex(reflect.ValueOf(kv), reflect.ValueOf(vv)) + } + return m.Interface(), nil +} + +func varMakeDictNode(p *varParser, sig Signature) (varNode, error) { + var n dictNode + + if sig.str != "" { + if len(sig.str) < 5 { + return nil, fmt.Errorf("invalid signature %q for dict type", sig) + } + ksig := Signature{string(sig.str[2])} + vsig := Signature{sig.str[3 : len(sig.str)-1]} + n.kset = sigSet{ksig: true} + n.vset = sigSet{vsig: true} + } + if t := p.next(); t.typ == tokDictEnd { + return n, nil + } else { + p.backup() + } +Loop: + for { + t := p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + } + p.backup() + kn, err := varMakeNode(p) + if err != nil { + return nil, err + } + if kset := kn.Sigs(); !kset.Empty() { + if n.kset.Empty() { + n.kset = kset + } else { + n.kset = kset.Intersect(n.kset) + if n.kset.Empty() { + return nil, fmt.Errorf("can't parse %q with given type information", kn.String()) + } + } + } + t = p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + case tokColon: + default: + return nil, fmt.Errorf("unexpected %q", t.val) + } + t = p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + } + p.backup() + vn, err := varMakeNode(p) + if err != nil { + return nil, err + } + if vset := vn.Sigs(); !vset.Empty() { + if n.vset.Empty() { + n.vset = vset + } else { + n.vset = n.vset.Intersect(vset) + if n.vset.Empty() { + return nil, fmt.Errorf("can't parse %q with given type information", vn.String()) + } + } + } + n.children = append(n.children, dictEntry{kn, vn}) + t = p.next() + switch t.typ { + case tokEOF: + return nil, io.ErrUnexpectedEOF + case tokError: + return nil, errors.New(t.val) + case tokDictEnd: + break Loop + case tokComma: + continue + default: + return nil, fmt.Errorf("unexpected %q", t.val) + } + } + return n, nil +} + +type byteStringNode []byte + +var byteStringSet = sigSet{ + Signature{"ay"}: true, +} + +func (byteStringNode) Infer() (Signature, error) { + return Signature{"ay"}, nil +} + +func (b byteStringNode) String() string { + return string(b) +} + +func (b byteStringNode) Sigs() sigSet { + return byteStringSet +} + +func (b byteStringNode) Value(sig Signature) (interface{}, error) { + if sig.str != "ay" { + return nil, varTypeError{b.String(), sig} + } + return []byte(b), nil +} + +func varParseByteString(s string) ([]byte, error) { + // quotes and b at start are guaranteed to be there + b := make([]byte, 0, 1) + s = s[2 : len(s)-1] + for len(s) != 0 { + c := s[0] + s = s[1:] + if c != '\\' { + b = append(b, c) + continue + } + c = s[0] + s = s[1:] + switch c { + case 'a': + b = append(b, 0x7) + case 'b': + b = append(b, 0x8) + case 'f': + b = append(b, 0xc) + case 'n': + b = append(b, '\n') + case 'r': + b = append(b, '\r') + case 't': + b = append(b, '\t') + case 'x': + if len(s) < 2 { + return nil, errors.New("short escape") + } + n, err := strconv.ParseUint(s[:2], 16, 8) + if err != nil { + return nil, err + } + b = append(b, byte(n)) + s = s[2:] + case '0': + if len(s) < 3 { + return nil, errors.New("short escape") + } + n, err := strconv.ParseUint(s[:3], 8, 8) + if err != nil { + return nil, err + } + b = append(b, byte(n)) + s = s[3:] + default: + b = append(b, c) + } + } + return append(b, 0), nil +} + +func varInfer(n varNode) (Signature, error) { + if sig, ok := n.Sigs().Single(); ok { + return sig, nil + } + return n.Infer() +} diff --git a/vendor/github.com/golang/protobuf/LICENSE b/vendor/github.com/golang/protobuf/LICENSE new file mode 100644 index 0000000..1b1b192 --- /dev/null +++ b/vendor/github.com/golang/protobuf/LICENSE @@ -0,0 +1,31 @@ +Go support for Protocol Buffers - Google's data interchange format + +Copyright 2010 The Go Authors. All rights reserved. +https://github.com/golang/protobuf + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/vendor/github.com/golang/protobuf/README.md b/vendor/github.com/golang/protobuf/README.md new file mode 100644 index 0000000..9c4c815 --- /dev/null +++ b/vendor/github.com/golang/protobuf/README.md @@ -0,0 +1,244 @@ +# Go support for Protocol Buffers + +[![Build Status](https://travis-ci.org/golang/protobuf.svg?branch=master)](https://travis-ci.org/golang/protobuf) +[![GoDoc](https://godoc.org/github.com/golang/protobuf?status.svg)](https://godoc.org/github.com/golang/protobuf) + +Google's data interchange format. +Copyright 2010 The Go Authors. +https://github.com/golang/protobuf + +This package and the code it generates requires at least Go 1.4. + +This software implements Go bindings for protocol buffers. For +information about protocol buffers themselves, see + https://developers.google.com/protocol-buffers/ + +## Installation ## + +To use this software, you must: +- Install the standard C++ implementation of protocol buffers from + https://developers.google.com/protocol-buffers/ +- Of course, install the Go compiler and tools from + https://golang.org/ + See + https://golang.org/doc/install + for details or, if you are using gccgo, follow the instructions at + https://golang.org/doc/install/gccgo +- Grab the code from the repository and install the proto package. + The simplest way is to run `go get -u github.com/golang/protobuf/protoc-gen-go`. + The compiler plugin, protoc-gen-go, will be installed in $GOBIN, + defaulting to $GOPATH/bin. It must be in your $PATH for the protocol + compiler, protoc, to find it. + +This software has two parts: a 'protocol compiler plugin' that +generates Go source files that, once compiled, can access and manage +protocol buffers; and a library that implements run-time support for +encoding (marshaling), decoding (unmarshaling), and accessing protocol +buffers. + +There is support for gRPC in Go using protocol buffers. +See the note at the bottom of this file for details. + +There are no insertion points in the plugin. + + +## Using protocol buffers with Go ## + +Once the software is installed, there are two steps to using it. +First you must compile the protocol buffer definitions and then import +them, with the support library, into your program. + +To compile the protocol buffer definition, run protoc with the --go_out +parameter set to the directory you want to output the Go code to. + + protoc --go_out=. *.proto + +The generated files will be suffixed .pb.go. See the Test code below +for an example using such a file. + + +The package comment for the proto library contains text describing +the interface provided in Go for protocol buffers. Here is an edited +version. + +========== + +The proto package converts data structures to and from the +wire format of protocol buffers. It works in concert with the +Go source code generated for .proto files by the protocol compiler. + +A summary of the properties of the protocol buffer interface +for a protocol buffer variable v: + + - Names are turned from camel_case to CamelCase for export. + - There are no methods on v to set fields; just treat + them as structure fields. + - There are getters that return a field's value if set, + and return the field's default value if unset. + The getters work even if the receiver is a nil message. + - The zero value for a struct is its correct initialization state. + All desired fields must be set before marshaling. + - A Reset() method will restore a protobuf struct to its zero state. + - Non-repeated fields are pointers to the values; nil means unset. + That is, optional or required field int32 f becomes F *int32. + - Repeated fields are slices. + - Helper functions are available to aid the setting of fields. + Helpers for getting values are superseded by the + GetFoo methods and their use is deprecated. + msg.Foo = proto.String("hello") // set field + - Constants are defined to hold the default values of all fields that + have them. They have the form Default_StructName_FieldName. + Because the getter methods handle defaulted values, + direct use of these constants should be rare. + - Enums are given type names and maps from names to values. + Enum values are prefixed with the enum's type name. Enum types have + a String method, and a Enum method to assist in message construction. + - Nested groups and enums have type names prefixed with the name of + the surrounding message type. + - Extensions are given descriptor names that start with E_, + followed by an underscore-delimited list of the nested messages + that contain it (if any) followed by the CamelCased name of the + extension field itself. HasExtension, ClearExtension, GetExtension + and SetExtension are functions for manipulating extensions. + - Oneof field sets are given a single field in their message, + with distinguished wrapper types for each possible field value. + - Marshal and Unmarshal are functions to encode and decode the wire format. + +When the .proto file specifies `syntax="proto3"`, there are some differences: + + - Non-repeated fields of non-message type are values instead of pointers. + - Enum types do not get an Enum method. + +Consider file test.proto, containing + +```proto + syntax = "proto2"; + package example; + + enum FOO { X = 17; }; + + message Test { + required string label = 1; + optional int32 type = 2 [default=77]; + repeated int64 reps = 3; + optional group OptionalGroup = 4 { + required string RequiredField = 5; + } + } +``` + +To create and play with a Test object from the example package, + +```go + package main + + import ( + "log" + + "github.com/golang/protobuf/proto" + "path/to/example" + ) + + func main() { + test := &example.Test { + Label: proto.String("hello"), + Type: proto.Int32(17), + Reps: []int64{1, 2, 3}, + Optionalgroup: &example.Test_OptionalGroup { + RequiredField: proto.String("good bye"), + }, + } + data, err := proto.Marshal(test) + if err != nil { + log.Fatal("marshaling error: ", err) + } + newTest := &example.Test{} + err = proto.Unmarshal(data, newTest) + if err != nil { + log.Fatal("unmarshaling error: ", err) + } + // Now test and newTest contain the same data. + if test.GetLabel() != newTest.GetLabel() { + log.Fatalf("data mismatch %q != %q", test.GetLabel(), newTest.GetLabel()) + } + // etc. + } +``` + +## Parameters ## + +To pass extra parameters to the plugin, use a comma-separated +parameter list separated from the output directory by a colon: + + + protoc --go_out=plugins=grpc,import_path=mypackage:. *.proto + + +- `import_prefix=xxx` - a prefix that is added onto the beginning of + all imports. Useful for things like generating protos in a + subdirectory, or regenerating vendored protobufs in-place. +- `import_path=foo/bar` - used as the package if no input files + declare `go_package`. If it contains slashes, everything up to the + rightmost slash is ignored. +- `plugins=plugin1+plugin2` - specifies the list of sub-plugins to + load. The only plugin in this repo is `grpc`. +- `Mfoo/bar.proto=quux/shme` - declares that foo/bar.proto is + associated with Go package quux/shme. This is subject to the + import_prefix parameter. + +## gRPC Support ## + +If a proto file specifies RPC services, protoc-gen-go can be instructed to +generate code compatible with gRPC (http://www.grpc.io/). To do this, pass +the `plugins` parameter to protoc-gen-go; the usual way is to insert it into +the --go_out argument to protoc: + + protoc --go_out=plugins=grpc:. *.proto + +## Compatibility ## + +The library and the generated code are expected to be stable over time. +However, we reserve the right to make breaking changes without notice for the +following reasons: + +- Security. A security issue in the specification or implementation may come to + light whose resolution requires breaking compatibility. We reserve the right + to address such security issues. +- Unspecified behavior. There are some aspects of the Protocol Buffers + specification that are undefined. Programs that depend on such unspecified + behavior may break in future releases. +- Specification errors or changes. If it becomes necessary to address an + inconsistency, incompleteness, or change in the Protocol Buffers + specification, resolving the issue could affect the meaning or legality of + existing programs. We reserve the right to address such issues, including + updating the implementations. +- Bugs. If the library has a bug that violates the specification, a program + that depends on the buggy behavior may break if the bug is fixed. We reserve + the right to fix such bugs. +- Adding methods or fields to generated structs. These may conflict with field + names that already exist in a schema, causing applications to break. When the + code generator encounters a field in the schema that would collide with a + generated field or method name, the code generator will append an underscore + to the generated field or method name. +- Adding, removing, or changing methods or fields in generated structs that + start with `XXX`. These parts of the generated code are exported out of + necessity, but should not be considered part of the public API. +- Adding, removing, or changing unexported symbols in generated code. + +Any breaking changes outside of these will be announced 6 months in advance to +protobuf@googlegroups.com. + +You should, whenever possible, use generated code created by the `protoc-gen-go` +tool built at the same commit as the `proto` package. The `proto` package +declares package-level constants in the form `ProtoPackageIsVersionX`. +Application code and generated code may depend on one of these constants to +ensure that compilation will fail if the available version of the proto library +is too old. Whenever we make a change to the generated code that requires newer +library support, in the same commit we will increment the version number of the +generated code and declare a new package-level constant whose name incorporates +the latest version number. Removing a compatibility constant is considered a +breaking change and would be subject to the announcement policy stated above. + +The `protoc-gen-go/generator` package exposes a plugin interface, +which is used by the gRPC code generation. This interface is not +supported and is subject to incompatible changes without notice. diff --git a/vendor/github.com/golang/protobuf/proto/clone.go b/vendor/github.com/golang/protobuf/proto/clone.go new file mode 100644 index 0000000..e392575 --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/clone.go @@ -0,0 +1,229 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2011 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Protocol buffer deep copy and merge. +// TODO: RawMessage. + +package proto + +import ( + "log" + "reflect" + "strings" +) + +// Clone returns a deep copy of a protocol buffer. +func Clone(pb Message) Message { + in := reflect.ValueOf(pb) + if in.IsNil() { + return pb + } + + out := reflect.New(in.Type().Elem()) + // out is empty so a merge is a deep copy. + mergeStruct(out.Elem(), in.Elem()) + return out.Interface().(Message) +} + +// Merge merges src into dst. +// Required and optional fields that are set in src will be set to that value in dst. +// Elements of repeated fields will be appended. +// Merge panics if src and dst are not the same type, or if dst is nil. +func Merge(dst, src Message) { + in := reflect.ValueOf(src) + out := reflect.ValueOf(dst) + if out.IsNil() { + panic("proto: nil destination") + } + if in.Type() != out.Type() { + // Explicit test prior to mergeStruct so that mistyped nils will fail + panic("proto: type mismatch") + } + if in.IsNil() { + // Merging nil into non-nil is a quiet no-op + return + } + mergeStruct(out.Elem(), in.Elem()) +} + +func mergeStruct(out, in reflect.Value) { + sprop := GetProperties(in.Type()) + for i := 0; i < in.NumField(); i++ { + f := in.Type().Field(i) + if strings.HasPrefix(f.Name, "XXX_") { + continue + } + mergeAny(out.Field(i), in.Field(i), false, sprop.Prop[i]) + } + + if emIn, ok := extendable(in.Addr().Interface()); ok { + emOut, _ := extendable(out.Addr().Interface()) + mIn, muIn := emIn.extensionsRead() + if mIn != nil { + mOut := emOut.extensionsWrite() + muIn.Lock() + mergeExtension(mOut, mIn) + muIn.Unlock() + } + } + + uf := in.FieldByName("XXX_unrecognized") + if !uf.IsValid() { + return + } + uin := uf.Bytes() + if len(uin) > 0 { + out.FieldByName("XXX_unrecognized").SetBytes(append([]byte(nil), uin...)) + } +} + +// mergeAny performs a merge between two values of the same type. +// viaPtr indicates whether the values were indirected through a pointer (implying proto2). +// prop is set if this is a struct field (it may be nil). +func mergeAny(out, in reflect.Value, viaPtr bool, prop *Properties) { + if in.Type() == protoMessageType { + if !in.IsNil() { + if out.IsNil() { + out.Set(reflect.ValueOf(Clone(in.Interface().(Message)))) + } else { + Merge(out.Interface().(Message), in.Interface().(Message)) + } + } + return + } + switch in.Kind() { + case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int32, reflect.Int64, + reflect.String, reflect.Uint32, reflect.Uint64: + if !viaPtr && isProto3Zero(in) { + return + } + out.Set(in) + case reflect.Interface: + // Probably a oneof field; copy non-nil values. + if in.IsNil() { + return + } + // Allocate destination if it is not set, or set to a different type. + // Otherwise we will merge as normal. + if out.IsNil() || out.Elem().Type() != in.Elem().Type() { + out.Set(reflect.New(in.Elem().Elem().Type())) // interface -> *T -> T -> new(T) + } + mergeAny(out.Elem(), in.Elem(), false, nil) + case reflect.Map: + if in.Len() == 0 { + return + } + if out.IsNil() { + out.Set(reflect.MakeMap(in.Type())) + } + // For maps with value types of *T or []byte we need to deep copy each value. + elemKind := in.Type().Elem().Kind() + for _, key := range in.MapKeys() { + var val reflect.Value + switch elemKind { + case reflect.Ptr: + val = reflect.New(in.Type().Elem().Elem()) + mergeAny(val, in.MapIndex(key), false, nil) + case reflect.Slice: + val = in.MapIndex(key) + val = reflect.ValueOf(append([]byte{}, val.Bytes()...)) + default: + val = in.MapIndex(key) + } + out.SetMapIndex(key, val) + } + case reflect.Ptr: + if in.IsNil() { + return + } + if out.IsNil() { + out.Set(reflect.New(in.Elem().Type())) + } + mergeAny(out.Elem(), in.Elem(), true, nil) + case reflect.Slice: + if in.IsNil() { + return + } + if in.Type().Elem().Kind() == reflect.Uint8 { + // []byte is a scalar bytes field, not a repeated field. + + // Edge case: if this is in a proto3 message, a zero length + // bytes field is considered the zero value, and should not + // be merged. + if prop != nil && prop.proto3 && in.Len() == 0 { + return + } + + // Make a deep copy. + // Append to []byte{} instead of []byte(nil) so that we never end up + // with a nil result. + out.SetBytes(append([]byte{}, in.Bytes()...)) + return + } + n := in.Len() + if out.IsNil() { + out.Set(reflect.MakeSlice(in.Type(), 0, n)) + } + switch in.Type().Elem().Kind() { + case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int32, reflect.Int64, + reflect.String, reflect.Uint32, reflect.Uint64: + out.Set(reflect.AppendSlice(out, in)) + default: + for i := 0; i < n; i++ { + x := reflect.Indirect(reflect.New(in.Type().Elem())) + mergeAny(x, in.Index(i), false, nil) + out.Set(reflect.Append(out, x)) + } + } + case reflect.Struct: + mergeStruct(out, in) + default: + // unknown type, so not a protocol buffer + log.Printf("proto: don't know how to copy %v", in) + } +} + +func mergeExtension(out, in map[int32]Extension) { + for extNum, eIn := range in { + eOut := Extension{desc: eIn.desc} + if eIn.value != nil { + v := reflect.New(reflect.TypeOf(eIn.value)).Elem() + mergeAny(v, reflect.ValueOf(eIn.value), false, nil) + eOut.value = v.Interface() + } + if eIn.enc != nil { + eOut.enc = make([]byte, len(eIn.enc)) + copy(eOut.enc, eIn.enc) + } + + out[extNum] = eOut + } +} diff --git a/vendor/github.com/golang/protobuf/proto/decode.go b/vendor/github.com/golang/protobuf/proto/decode.go new file mode 100644 index 0000000..aa20729 --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/decode.go @@ -0,0 +1,970 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2010 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package proto + +/* + * Routines for decoding protocol buffer data to construct in-memory representations. + */ + +import ( + "errors" + "fmt" + "io" + "os" + "reflect" +) + +// errOverflow is returned when an integer is too large to be represented. +var errOverflow = errors.New("proto: integer overflow") + +// ErrInternalBadWireType is returned by generated code when an incorrect +// wire type is encountered. It does not get returned to user code. +var ErrInternalBadWireType = errors.New("proto: internal error: bad wiretype for oneof") + +// The fundamental decoders that interpret bytes on the wire. +// Those that take integer types all return uint64 and are +// therefore of type valueDecoder. + +// DecodeVarint reads a varint-encoded integer from the slice. +// It returns the integer and the number of bytes consumed, or +// zero if there is not enough. +// This is the format for the +// int32, int64, uint32, uint64, bool, and enum +// protocol buffer types. +func DecodeVarint(buf []byte) (x uint64, n int) { + for shift := uint(0); shift < 64; shift += 7 { + if n >= len(buf) { + return 0, 0 + } + b := uint64(buf[n]) + n++ + x |= (b & 0x7F) << shift + if (b & 0x80) == 0 { + return x, n + } + } + + // The number is too large to represent in a 64-bit value. + return 0, 0 +} + +func (p *Buffer) decodeVarintSlow() (x uint64, err error) { + i := p.index + l := len(p.buf) + + for shift := uint(0); shift < 64; shift += 7 { + if i >= l { + err = io.ErrUnexpectedEOF + return + } + b := p.buf[i] + i++ + x |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + p.index = i + return + } + } + + // The number is too large to represent in a 64-bit value. + err = errOverflow + return +} + +// DecodeVarint reads a varint-encoded integer from the Buffer. +// This is the format for the +// int32, int64, uint32, uint64, bool, and enum +// protocol buffer types. +func (p *Buffer) DecodeVarint() (x uint64, err error) { + i := p.index + buf := p.buf + + if i >= len(buf) { + return 0, io.ErrUnexpectedEOF + } else if buf[i] < 0x80 { + p.index++ + return uint64(buf[i]), nil + } else if len(buf)-i < 10 { + return p.decodeVarintSlow() + } + + var b uint64 + // we already checked the first byte + x = uint64(buf[i]) - 0x80 + i++ + + b = uint64(buf[i]) + i++ + x += b << 7 + if b&0x80 == 0 { + goto done + } + x -= 0x80 << 7 + + b = uint64(buf[i]) + i++ + x += b << 14 + if b&0x80 == 0 { + goto done + } + x -= 0x80 << 14 + + b = uint64(buf[i]) + i++ + x += b << 21 + if b&0x80 == 0 { + goto done + } + x -= 0x80 << 21 + + b = uint64(buf[i]) + i++ + x += b << 28 + if b&0x80 == 0 { + goto done + } + x -= 0x80 << 28 + + b = uint64(buf[i]) + i++ + x += b << 35 + if b&0x80 == 0 { + goto done + } + x -= 0x80 << 35 + + b = uint64(buf[i]) + i++ + x += b << 42 + if b&0x80 == 0 { + goto done + } + x -= 0x80 << 42 + + b = uint64(buf[i]) + i++ + x += b << 49 + if b&0x80 == 0 { + goto done + } + x -= 0x80 << 49 + + b = uint64(buf[i]) + i++ + x += b << 56 + if b&0x80 == 0 { + goto done + } + x -= 0x80 << 56 + + b = uint64(buf[i]) + i++ + x += b << 63 + if b&0x80 == 0 { + goto done + } + // x -= 0x80 << 63 // Always zero. + + return 0, errOverflow + +done: + p.index = i + return x, nil +} + +// DecodeFixed64 reads a 64-bit integer from the Buffer. +// This is the format for the +// fixed64, sfixed64, and double protocol buffer types. +func (p *Buffer) DecodeFixed64() (x uint64, err error) { + // x, err already 0 + i := p.index + 8 + if i < 0 || i > len(p.buf) { + err = io.ErrUnexpectedEOF + return + } + p.index = i + + x = uint64(p.buf[i-8]) + x |= uint64(p.buf[i-7]) << 8 + x |= uint64(p.buf[i-6]) << 16 + x |= uint64(p.buf[i-5]) << 24 + x |= uint64(p.buf[i-4]) << 32 + x |= uint64(p.buf[i-3]) << 40 + x |= uint64(p.buf[i-2]) << 48 + x |= uint64(p.buf[i-1]) << 56 + return +} + +// DecodeFixed32 reads a 32-bit integer from the Buffer. +// This is the format for the +// fixed32, sfixed32, and float protocol buffer types. +func (p *Buffer) DecodeFixed32() (x uint64, err error) { + // x, err already 0 + i := p.index + 4 + if i < 0 || i > len(p.buf) { + err = io.ErrUnexpectedEOF + return + } + p.index = i + + x = uint64(p.buf[i-4]) + x |= uint64(p.buf[i-3]) << 8 + x |= uint64(p.buf[i-2]) << 16 + x |= uint64(p.buf[i-1]) << 24 + return +} + +// DecodeZigzag64 reads a zigzag-encoded 64-bit integer +// from the Buffer. +// This is the format used for the sint64 protocol buffer type. +func (p *Buffer) DecodeZigzag64() (x uint64, err error) { + x, err = p.DecodeVarint() + if err != nil { + return + } + x = (x >> 1) ^ uint64((int64(x&1)<<63)>>63) + return +} + +// DecodeZigzag32 reads a zigzag-encoded 32-bit integer +// from the Buffer. +// This is the format used for the sint32 protocol buffer type. +func (p *Buffer) DecodeZigzag32() (x uint64, err error) { + x, err = p.DecodeVarint() + if err != nil { + return + } + x = uint64((uint32(x) >> 1) ^ uint32((int32(x&1)<<31)>>31)) + return +} + +// These are not ValueDecoders: they produce an array of bytes or a string. +// bytes, embedded messages + +// DecodeRawBytes reads a count-delimited byte buffer from the Buffer. +// This is the format used for the bytes protocol buffer +// type and for embedded messages. +func (p *Buffer) DecodeRawBytes(alloc bool) (buf []byte, err error) { + n, err := p.DecodeVarint() + if err != nil { + return nil, err + } + + nb := int(n) + if nb < 0 { + return nil, fmt.Errorf("proto: bad byte length %d", nb) + } + end := p.index + nb + if end < p.index || end > len(p.buf) { + return nil, io.ErrUnexpectedEOF + } + + if !alloc { + // todo: check if can get more uses of alloc=false + buf = p.buf[p.index:end] + p.index += nb + return + } + + buf = make([]byte, nb) + copy(buf, p.buf[p.index:]) + p.index += nb + return +} + +// DecodeStringBytes reads an encoded string from the Buffer. +// This is the format used for the proto2 string type. +func (p *Buffer) DecodeStringBytes() (s string, err error) { + buf, err := p.DecodeRawBytes(false) + if err != nil { + return + } + return string(buf), nil +} + +// Skip the next item in the buffer. Its wire type is decoded and presented as an argument. +// If the protocol buffer has extensions, and the field matches, add it as an extension. +// Otherwise, if the XXX_unrecognized field exists, append the skipped data there. +func (o *Buffer) skipAndSave(t reflect.Type, tag, wire int, base structPointer, unrecField field) error { + oi := o.index + + err := o.skip(t, tag, wire) + if err != nil { + return err + } + + if !unrecField.IsValid() { + return nil + } + + ptr := structPointer_Bytes(base, unrecField) + + // Add the skipped field to struct field + obuf := o.buf + + o.buf = *ptr + o.EncodeVarint(uint64(tag<<3 | wire)) + *ptr = append(o.buf, obuf[oi:o.index]...) + + o.buf = obuf + + return nil +} + +// Skip the next item in the buffer. Its wire type is decoded and presented as an argument. +func (o *Buffer) skip(t reflect.Type, tag, wire int) error { + + var u uint64 + var err error + + switch wire { + case WireVarint: + _, err = o.DecodeVarint() + case WireFixed64: + _, err = o.DecodeFixed64() + case WireBytes: + _, err = o.DecodeRawBytes(false) + case WireFixed32: + _, err = o.DecodeFixed32() + case WireStartGroup: + for { + u, err = o.DecodeVarint() + if err != nil { + break + } + fwire := int(u & 0x7) + if fwire == WireEndGroup { + break + } + ftag := int(u >> 3) + err = o.skip(t, ftag, fwire) + if err != nil { + break + } + } + default: + err = fmt.Errorf("proto: can't skip unknown wire type %d for %s", wire, t) + } + return err +} + +// Unmarshaler is the interface representing objects that can +// unmarshal themselves. The method should reset the receiver before +// decoding starts. The argument points to data that may be +// overwritten, so implementations should not keep references to the +// buffer. +type Unmarshaler interface { + Unmarshal([]byte) error +} + +// Unmarshal parses the protocol buffer representation in buf and places the +// decoded result in pb. If the struct underlying pb does not match +// the data in buf, the results can be unpredictable. +// +// Unmarshal resets pb before starting to unmarshal, so any +// existing data in pb is always removed. Use UnmarshalMerge +// to preserve and append to existing data. +func Unmarshal(buf []byte, pb Message) error { + pb.Reset() + return UnmarshalMerge(buf, pb) +} + +// UnmarshalMerge parses the protocol buffer representation in buf and +// writes the decoded result to pb. If the struct underlying pb does not match +// the data in buf, the results can be unpredictable. +// +// UnmarshalMerge merges into existing data in pb. +// Most code should use Unmarshal instead. +func UnmarshalMerge(buf []byte, pb Message) error { + // If the object can unmarshal itself, let it. + if u, ok := pb.(Unmarshaler); ok { + return u.Unmarshal(buf) + } + return NewBuffer(buf).Unmarshal(pb) +} + +// DecodeMessage reads a count-delimited message from the Buffer. +func (p *Buffer) DecodeMessage(pb Message) error { + enc, err := p.DecodeRawBytes(false) + if err != nil { + return err + } + return NewBuffer(enc).Unmarshal(pb) +} + +// DecodeGroup reads a tag-delimited group from the Buffer. +func (p *Buffer) DecodeGroup(pb Message) error { + typ, base, err := getbase(pb) + if err != nil { + return err + } + return p.unmarshalType(typ.Elem(), GetProperties(typ.Elem()), true, base) +} + +// Unmarshal parses the protocol buffer representation in the +// Buffer and places the decoded result in pb. If the struct +// underlying pb does not match the data in the buffer, the results can be +// unpredictable. +// +// Unlike proto.Unmarshal, this does not reset pb before starting to unmarshal. +func (p *Buffer) Unmarshal(pb Message) error { + // If the object can unmarshal itself, let it. + if u, ok := pb.(Unmarshaler); ok { + err := u.Unmarshal(p.buf[p.index:]) + p.index = len(p.buf) + return err + } + + typ, base, err := getbase(pb) + if err != nil { + return err + } + + err = p.unmarshalType(typ.Elem(), GetProperties(typ.Elem()), false, base) + + if collectStats { + stats.Decode++ + } + + return err +} + +// unmarshalType does the work of unmarshaling a structure. +func (o *Buffer) unmarshalType(st reflect.Type, prop *StructProperties, is_group bool, base structPointer) error { + var state errorState + required, reqFields := prop.reqCount, uint64(0) + + var err error + for err == nil && o.index < len(o.buf) { + oi := o.index + var u uint64 + u, err = o.DecodeVarint() + if err != nil { + break + } + wire := int(u & 0x7) + if wire == WireEndGroup { + if is_group { + if required > 0 { + // Not enough information to determine the exact field. + // (See below.) + return &RequiredNotSetError{"{Unknown}"} + } + return nil // input is satisfied + } + return fmt.Errorf("proto: %s: wiretype end group for non-group", st) + } + tag := int(u >> 3) + if tag <= 0 { + return fmt.Errorf("proto: %s: illegal tag %d (wire type %d)", st, tag, wire) + } + fieldnum, ok := prop.decoderTags.get(tag) + if !ok { + // Maybe it's an extension? + if prop.extendable { + if e, _ := extendable(structPointer_Interface(base, st)); isExtensionField(e, int32(tag)) { + if err = o.skip(st, tag, wire); err == nil { + extmap := e.extensionsWrite() + ext := extmap[int32(tag)] // may be missing + ext.enc = append(ext.enc, o.buf[oi:o.index]...) + extmap[int32(tag)] = ext + } + continue + } + } + // Maybe it's a oneof? + if prop.oneofUnmarshaler != nil { + m := structPointer_Interface(base, st).(Message) + // First return value indicates whether tag is a oneof field. + ok, err = prop.oneofUnmarshaler(m, tag, wire, o) + if err == ErrInternalBadWireType { + // Map the error to something more descriptive. + // Do the formatting here to save generated code space. + err = fmt.Errorf("bad wiretype for oneof field in %T", m) + } + if ok { + continue + } + } + err = o.skipAndSave(st, tag, wire, base, prop.unrecField) + continue + } + p := prop.Prop[fieldnum] + + if p.dec == nil { + fmt.Fprintf(os.Stderr, "proto: no protobuf decoder for %s.%s\n", st, st.Field(fieldnum).Name) + continue + } + dec := p.dec + if wire != WireStartGroup && wire != p.WireType { + if wire == WireBytes && p.packedDec != nil { + // a packable field + dec = p.packedDec + } else { + err = fmt.Errorf("proto: bad wiretype for field %s.%s: got wiretype %d, want %d", st, st.Field(fieldnum).Name, wire, p.WireType) + continue + } + } + decErr := dec(o, p, base) + if decErr != nil && !state.shouldContinue(decErr, p) { + err = decErr + } + if err == nil && p.Required { + // Successfully decoded a required field. + if tag <= 64 { + // use bitmap for fields 1-64 to catch field reuse. + var mask uint64 = 1 << uint64(tag-1) + if reqFields&mask == 0 { + // new required field + reqFields |= mask + required-- + } + } else { + // This is imprecise. It can be fooled by a required field + // with a tag > 64 that is encoded twice; that's very rare. + // A fully correct implementation would require allocating + // a data structure, which we would like to avoid. + required-- + } + } + } + if err == nil { + if is_group { + return io.ErrUnexpectedEOF + } + if state.err != nil { + return state.err + } + if required > 0 { + // Not enough information to determine the exact field. If we use extra + // CPU, we could determine the field only if the missing required field + // has a tag <= 64 and we check reqFields. + return &RequiredNotSetError{"{Unknown}"} + } + } + return err +} + +// Individual type decoders +// For each, +// u is the decoded value, +// v is a pointer to the field (pointer) in the struct + +// Sizes of the pools to allocate inside the Buffer. +// The goal is modest amortization and allocation +// on at least 16-byte boundaries. +const ( + boolPoolSize = 16 + uint32PoolSize = 8 + uint64PoolSize = 4 +) + +// Decode a bool. +func (o *Buffer) dec_bool(p *Properties, base structPointer) error { + u, err := p.valDec(o) + if err != nil { + return err + } + if len(o.bools) == 0 { + o.bools = make([]bool, boolPoolSize) + } + o.bools[0] = u != 0 + *structPointer_Bool(base, p.field) = &o.bools[0] + o.bools = o.bools[1:] + return nil +} + +func (o *Buffer) dec_proto3_bool(p *Properties, base structPointer) error { + u, err := p.valDec(o) + if err != nil { + return err + } + *structPointer_BoolVal(base, p.field) = u != 0 + return nil +} + +// Decode an int32. +func (o *Buffer) dec_int32(p *Properties, base structPointer) error { + u, err := p.valDec(o) + if err != nil { + return err + } + word32_Set(structPointer_Word32(base, p.field), o, uint32(u)) + return nil +} + +func (o *Buffer) dec_proto3_int32(p *Properties, base structPointer) error { + u, err := p.valDec(o) + if err != nil { + return err + } + word32Val_Set(structPointer_Word32Val(base, p.field), uint32(u)) + return nil +} + +// Decode an int64. +func (o *Buffer) dec_int64(p *Properties, base structPointer) error { + u, err := p.valDec(o) + if err != nil { + return err + } + word64_Set(structPointer_Word64(base, p.field), o, u) + return nil +} + +func (o *Buffer) dec_proto3_int64(p *Properties, base structPointer) error { + u, err := p.valDec(o) + if err != nil { + return err + } + word64Val_Set(structPointer_Word64Val(base, p.field), o, u) + return nil +} + +// Decode a string. +func (o *Buffer) dec_string(p *Properties, base structPointer) error { + s, err := o.DecodeStringBytes() + if err != nil { + return err + } + *structPointer_String(base, p.field) = &s + return nil +} + +func (o *Buffer) dec_proto3_string(p *Properties, base structPointer) error { + s, err := o.DecodeStringBytes() + if err != nil { + return err + } + *structPointer_StringVal(base, p.field) = s + return nil +} + +// Decode a slice of bytes ([]byte). +func (o *Buffer) dec_slice_byte(p *Properties, base structPointer) error { + b, err := o.DecodeRawBytes(true) + if err != nil { + return err + } + *structPointer_Bytes(base, p.field) = b + return nil +} + +// Decode a slice of bools ([]bool). +func (o *Buffer) dec_slice_bool(p *Properties, base structPointer) error { + u, err := p.valDec(o) + if err != nil { + return err + } + v := structPointer_BoolSlice(base, p.field) + *v = append(*v, u != 0) + return nil +} + +// Decode a slice of bools ([]bool) in packed format. +func (o *Buffer) dec_slice_packed_bool(p *Properties, base structPointer) error { + v := structPointer_BoolSlice(base, p.field) + + nn, err := o.DecodeVarint() + if err != nil { + return err + } + nb := int(nn) // number of bytes of encoded bools + fin := o.index + nb + if fin < o.index { + return errOverflow + } + + y := *v + for o.index < fin { + u, err := p.valDec(o) + if err != nil { + return err + } + y = append(y, u != 0) + } + + *v = y + return nil +} + +// Decode a slice of int32s ([]int32). +func (o *Buffer) dec_slice_int32(p *Properties, base structPointer) error { + u, err := p.valDec(o) + if err != nil { + return err + } + structPointer_Word32Slice(base, p.field).Append(uint32(u)) + return nil +} + +// Decode a slice of int32s ([]int32) in packed format. +func (o *Buffer) dec_slice_packed_int32(p *Properties, base structPointer) error { + v := structPointer_Word32Slice(base, p.field) + + nn, err := o.DecodeVarint() + if err != nil { + return err + } + nb := int(nn) // number of bytes of encoded int32s + + fin := o.index + nb + if fin < o.index { + return errOverflow + } + for o.index < fin { + u, err := p.valDec(o) + if err != nil { + return err + } + v.Append(uint32(u)) + } + return nil +} + +// Decode a slice of int64s ([]int64). +func (o *Buffer) dec_slice_int64(p *Properties, base structPointer) error { + u, err := p.valDec(o) + if err != nil { + return err + } + + structPointer_Word64Slice(base, p.field).Append(u) + return nil +} + +// Decode a slice of int64s ([]int64) in packed format. +func (o *Buffer) dec_slice_packed_int64(p *Properties, base structPointer) error { + v := structPointer_Word64Slice(base, p.field) + + nn, err := o.DecodeVarint() + if err != nil { + return err + } + nb := int(nn) // number of bytes of encoded int64s + + fin := o.index + nb + if fin < o.index { + return errOverflow + } + for o.index < fin { + u, err := p.valDec(o) + if err != nil { + return err + } + v.Append(u) + } + return nil +} + +// Decode a slice of strings ([]string). +func (o *Buffer) dec_slice_string(p *Properties, base structPointer) error { + s, err := o.DecodeStringBytes() + if err != nil { + return err + } + v := structPointer_StringSlice(base, p.field) + *v = append(*v, s) + return nil +} + +// Decode a slice of slice of bytes ([][]byte). +func (o *Buffer) dec_slice_slice_byte(p *Properties, base structPointer) error { + b, err := o.DecodeRawBytes(true) + if err != nil { + return err + } + v := structPointer_BytesSlice(base, p.field) + *v = append(*v, b) + return nil +} + +// Decode a map field. +func (o *Buffer) dec_new_map(p *Properties, base structPointer) error { + raw, err := o.DecodeRawBytes(false) + if err != nil { + return err + } + oi := o.index // index at the end of this map entry + o.index -= len(raw) // move buffer back to start of map entry + + mptr := structPointer_NewAt(base, p.field, p.mtype) // *map[K]V + if mptr.Elem().IsNil() { + mptr.Elem().Set(reflect.MakeMap(mptr.Type().Elem())) + } + v := mptr.Elem() // map[K]V + + // Prepare addressable doubly-indirect placeholders for the key and value types. + // See enc_new_map for why. + keyptr := reflect.New(reflect.PtrTo(p.mtype.Key())).Elem() // addressable *K + keybase := toStructPointer(keyptr.Addr()) // **K + + var valbase structPointer + var valptr reflect.Value + switch p.mtype.Elem().Kind() { + case reflect.Slice: + // []byte + var dummy []byte + valptr = reflect.ValueOf(&dummy) // *[]byte + valbase = toStructPointer(valptr) // *[]byte + case reflect.Ptr: + // message; valptr is **Msg; need to allocate the intermediate pointer + valptr = reflect.New(reflect.PtrTo(p.mtype.Elem())).Elem() // addressable *V + valptr.Set(reflect.New(valptr.Type().Elem())) + valbase = toStructPointer(valptr) + default: + // everything else + valptr = reflect.New(reflect.PtrTo(p.mtype.Elem())).Elem() // addressable *V + valbase = toStructPointer(valptr.Addr()) // **V + } + + // Decode. + // This parses a restricted wire format, namely the encoding of a message + // with two fields. See enc_new_map for the format. + for o.index < oi { + // tagcode for key and value properties are always a single byte + // because they have tags 1 and 2. + tagcode := o.buf[o.index] + o.index++ + switch tagcode { + case p.mkeyprop.tagcode[0]: + if err := p.mkeyprop.dec(o, p.mkeyprop, keybase); err != nil { + return err + } + case p.mvalprop.tagcode[0]: + if err := p.mvalprop.dec(o, p.mvalprop, valbase); err != nil { + return err + } + default: + // TODO: Should we silently skip this instead? + return fmt.Errorf("proto: bad map data tag %d", raw[0]) + } + } + keyelem, valelem := keyptr.Elem(), valptr.Elem() + if !keyelem.IsValid() { + keyelem = reflect.Zero(p.mtype.Key()) + } + if !valelem.IsValid() { + valelem = reflect.Zero(p.mtype.Elem()) + } + + v.SetMapIndex(keyelem, valelem) + return nil +} + +// Decode a group. +func (o *Buffer) dec_struct_group(p *Properties, base structPointer) error { + bas := structPointer_GetStructPointer(base, p.field) + if structPointer_IsNil(bas) { + // allocate new nested message + bas = toStructPointer(reflect.New(p.stype)) + structPointer_SetStructPointer(base, p.field, bas) + } + return o.unmarshalType(p.stype, p.sprop, true, bas) +} + +// Decode an embedded message. +func (o *Buffer) dec_struct_message(p *Properties, base structPointer) (err error) { + raw, e := o.DecodeRawBytes(false) + if e != nil { + return e + } + + bas := structPointer_GetStructPointer(base, p.field) + if structPointer_IsNil(bas) { + // allocate new nested message + bas = toStructPointer(reflect.New(p.stype)) + structPointer_SetStructPointer(base, p.field, bas) + } + + // If the object can unmarshal itself, let it. + if p.isUnmarshaler { + iv := structPointer_Interface(bas, p.stype) + return iv.(Unmarshaler).Unmarshal(raw) + } + + obuf := o.buf + oi := o.index + o.buf = raw + o.index = 0 + + err = o.unmarshalType(p.stype, p.sprop, false, bas) + o.buf = obuf + o.index = oi + + return err +} + +// Decode a slice of embedded messages. +func (o *Buffer) dec_slice_struct_message(p *Properties, base structPointer) error { + return o.dec_slice_struct(p, false, base) +} + +// Decode a slice of embedded groups. +func (o *Buffer) dec_slice_struct_group(p *Properties, base structPointer) error { + return o.dec_slice_struct(p, true, base) +} + +// Decode a slice of structs ([]*struct). +func (o *Buffer) dec_slice_struct(p *Properties, is_group bool, base structPointer) error { + v := reflect.New(p.stype) + bas := toStructPointer(v) + structPointer_StructPointerSlice(base, p.field).Append(bas) + + if is_group { + err := o.unmarshalType(p.stype, p.sprop, is_group, bas) + return err + } + + raw, err := o.DecodeRawBytes(false) + if err != nil { + return err + } + + // If the object can unmarshal itself, let it. + if p.isUnmarshaler { + iv := v.Interface() + return iv.(Unmarshaler).Unmarshal(raw) + } + + obuf := o.buf + oi := o.index + o.buf = raw + o.index = 0 + + err = o.unmarshalType(p.stype, p.sprop, is_group, bas) + + o.buf = obuf + o.index = oi + + return err +} diff --git a/vendor/github.com/golang/protobuf/proto/discard.go b/vendor/github.com/golang/protobuf/proto/discard.go new file mode 100644 index 0000000..bd0e3bb --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/discard.go @@ -0,0 +1,151 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2017 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package proto + +import ( + "fmt" + "reflect" + "strings" +) + +// DiscardUnknown recursively discards all unknown fields from this message +// and all embedded messages. +// +// When unmarshaling a message with unrecognized fields, the tags and values +// of such fields are preserved in the Message. This allows a later call to +// marshal to be able to produce a message that continues to have those +// unrecognized fields. To avoid this, DiscardUnknown is used to +// explicitly clear the unknown fields after unmarshaling. +// +// For proto2 messages, the unknown fields of message extensions are only +// discarded from messages that have been accessed via GetExtension. +func DiscardUnknown(m Message) { + discardLegacy(m) +} + +func discardLegacy(m Message) { + v := reflect.ValueOf(m) + if v.Kind() != reflect.Ptr || v.IsNil() { + return + } + v = v.Elem() + if v.Kind() != reflect.Struct { + return + } + t := v.Type() + + for i := 0; i < v.NumField(); i++ { + f := t.Field(i) + if strings.HasPrefix(f.Name, "XXX_") { + continue + } + vf := v.Field(i) + tf := f.Type + + // Unwrap tf to get its most basic type. + var isPointer, isSlice bool + if tf.Kind() == reflect.Slice && tf.Elem().Kind() != reflect.Uint8 { + isSlice = true + tf = tf.Elem() + } + if tf.Kind() == reflect.Ptr { + isPointer = true + tf = tf.Elem() + } + if isPointer && isSlice && tf.Kind() != reflect.Struct { + panic(fmt.Sprintf("%T.%s cannot be a slice of pointers to primitive types", m, f.Name)) + } + + switch tf.Kind() { + case reflect.Struct: + switch { + case !isPointer: + panic(fmt.Sprintf("%T.%s cannot be a direct struct value", m, f.Name)) + case isSlice: // E.g., []*pb.T + for j := 0; j < vf.Len(); j++ { + discardLegacy(vf.Index(j).Interface().(Message)) + } + default: // E.g., *pb.T + discardLegacy(vf.Interface().(Message)) + } + case reflect.Map: + switch { + case isPointer || isSlice: + panic(fmt.Sprintf("%T.%s cannot be a pointer to a map or a slice of map values", m, f.Name)) + default: // E.g., map[K]V + tv := vf.Type().Elem() + if tv.Kind() == reflect.Ptr && tv.Implements(protoMessageType) { // Proto struct (e.g., *T) + for _, key := range vf.MapKeys() { + val := vf.MapIndex(key) + discardLegacy(val.Interface().(Message)) + } + } + } + case reflect.Interface: + // Must be oneof field. + switch { + case isPointer || isSlice: + panic(fmt.Sprintf("%T.%s cannot be a pointer to a interface or a slice of interface values", m, f.Name)) + default: // E.g., test_proto.isCommunique_Union interface + if !vf.IsNil() && f.Tag.Get("protobuf_oneof") != "" { + vf = vf.Elem() // E.g., *test_proto.Communique_Msg + if !vf.IsNil() { + vf = vf.Elem() // E.g., test_proto.Communique_Msg + vf = vf.Field(0) // E.g., Proto struct (e.g., *T) or primitive value + if vf.Kind() == reflect.Ptr { + discardLegacy(vf.Interface().(Message)) + } + } + } + } + } + } + + if vf := v.FieldByName("XXX_unrecognized"); vf.IsValid() { + if vf.Type() != reflect.TypeOf([]byte{}) { + panic("expected XXX_unrecognized to be of type []byte") + } + vf.Set(reflect.ValueOf([]byte(nil))) + } + + // For proto2 messages, only discard unknown fields in message extensions + // that have been accessed via GetExtension. + if em, ok := extendable(m); ok { + // Ignore lock since discardLegacy is not concurrency safe. + emm, _ := em.extensionsRead() + for _, mx := range emm { + if m, ok := mx.value.(Message); ok { + discardLegacy(m) + } + } + } +} diff --git a/vendor/github.com/golang/protobuf/proto/encode.go b/vendor/github.com/golang/protobuf/proto/encode.go new file mode 100644 index 0000000..8b84d1b --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/encode.go @@ -0,0 +1,1362 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2010 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package proto + +/* + * Routines for encoding data into the wire format for protocol buffers. + */ + +import ( + "errors" + "fmt" + "reflect" + "sort" +) + +// RequiredNotSetError is the error returned if Marshal is called with +// a protocol buffer struct whose required fields have not +// all been initialized. It is also the error returned if Unmarshal is +// called with an encoded protocol buffer that does not include all the +// required fields. +// +// When printed, RequiredNotSetError reports the first unset required field in a +// message. If the field cannot be precisely determined, it is reported as +// "{Unknown}". +type RequiredNotSetError struct { + field string +} + +func (e *RequiredNotSetError) Error() string { + return fmt.Sprintf("proto: required field %q not set", e.field) +} + +var ( + // errRepeatedHasNil is the error returned if Marshal is called with + // a struct with a repeated field containing a nil element. + errRepeatedHasNil = errors.New("proto: repeated field has nil element") + + // errOneofHasNil is the error returned if Marshal is called with + // a struct with a oneof field containing a nil element. + errOneofHasNil = errors.New("proto: oneof field has nil value") + + // ErrNil is the error returned if Marshal is called with nil. + ErrNil = errors.New("proto: Marshal called with nil") + + // ErrTooLarge is the error returned if Marshal is called with a + // message that encodes to >2GB. + ErrTooLarge = errors.New("proto: message encodes to over 2 GB") +) + +// The fundamental encoders that put bytes on the wire. +// Those that take integer types all accept uint64 and are +// therefore of type valueEncoder. + +const maxVarintBytes = 10 // maximum length of a varint + +// maxMarshalSize is the largest allowed size of an encoded protobuf, +// since C++ and Java use signed int32s for the size. +const maxMarshalSize = 1<<31 - 1 + +// EncodeVarint returns the varint encoding of x. +// This is the format for the +// int32, int64, uint32, uint64, bool, and enum +// protocol buffer types. +// Not used by the package itself, but helpful to clients +// wishing to use the same encoding. +func EncodeVarint(x uint64) []byte { + var buf [maxVarintBytes]byte + var n int + for n = 0; x > 127; n++ { + buf[n] = 0x80 | uint8(x&0x7F) + x >>= 7 + } + buf[n] = uint8(x) + n++ + return buf[0:n] +} + +// EncodeVarint writes a varint-encoded integer to the Buffer. +// This is the format for the +// int32, int64, uint32, uint64, bool, and enum +// protocol buffer types. +func (p *Buffer) EncodeVarint(x uint64) error { + for x >= 1<<7 { + p.buf = append(p.buf, uint8(x&0x7f|0x80)) + x >>= 7 + } + p.buf = append(p.buf, uint8(x)) + return nil +} + +// SizeVarint returns the varint encoding size of an integer. +func SizeVarint(x uint64) int { + return sizeVarint(x) +} + +func sizeVarint(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} + +// EncodeFixed64 writes a 64-bit integer to the Buffer. +// This is the format for the +// fixed64, sfixed64, and double protocol buffer types. +func (p *Buffer) EncodeFixed64(x uint64) error { + p.buf = append(p.buf, + uint8(x), + uint8(x>>8), + uint8(x>>16), + uint8(x>>24), + uint8(x>>32), + uint8(x>>40), + uint8(x>>48), + uint8(x>>56)) + return nil +} + +func sizeFixed64(x uint64) int { + return 8 +} + +// EncodeFixed32 writes a 32-bit integer to the Buffer. +// This is the format for the +// fixed32, sfixed32, and float protocol buffer types. +func (p *Buffer) EncodeFixed32(x uint64) error { + p.buf = append(p.buf, + uint8(x), + uint8(x>>8), + uint8(x>>16), + uint8(x>>24)) + return nil +} + +func sizeFixed32(x uint64) int { + return 4 +} + +// EncodeZigzag64 writes a zigzag-encoded 64-bit integer +// to the Buffer. +// This is the format used for the sint64 protocol buffer type. +func (p *Buffer) EncodeZigzag64(x uint64) error { + // use signed number to get arithmetic right shift. + return p.EncodeVarint((x << 1) ^ uint64((int64(x) >> 63))) +} + +func sizeZigzag64(x uint64) int { + return sizeVarint((x << 1) ^ uint64((int64(x) >> 63))) +} + +// EncodeZigzag32 writes a zigzag-encoded 32-bit integer +// to the Buffer. +// This is the format used for the sint32 protocol buffer type. +func (p *Buffer) EncodeZigzag32(x uint64) error { + // use signed number to get arithmetic right shift. + return p.EncodeVarint(uint64((uint32(x) << 1) ^ uint32((int32(x) >> 31)))) +} + +func sizeZigzag32(x uint64) int { + return sizeVarint(uint64((uint32(x) << 1) ^ uint32((int32(x) >> 31)))) +} + +// EncodeRawBytes writes a count-delimited byte buffer to the Buffer. +// This is the format used for the bytes protocol buffer +// type and for embedded messages. +func (p *Buffer) EncodeRawBytes(b []byte) error { + p.EncodeVarint(uint64(len(b))) + p.buf = append(p.buf, b...) + return nil +} + +func sizeRawBytes(b []byte) int { + return sizeVarint(uint64(len(b))) + + len(b) +} + +// EncodeStringBytes writes an encoded string to the Buffer. +// This is the format used for the proto2 string type. +func (p *Buffer) EncodeStringBytes(s string) error { + p.EncodeVarint(uint64(len(s))) + p.buf = append(p.buf, s...) + return nil +} + +func sizeStringBytes(s string) int { + return sizeVarint(uint64(len(s))) + + len(s) +} + +// Marshaler is the interface representing objects that can marshal themselves. +type Marshaler interface { + Marshal() ([]byte, error) +} + +// Marshal takes the protocol buffer +// and encodes it into the wire format, returning the data. +func Marshal(pb Message) ([]byte, error) { + // Can the object marshal itself? + if m, ok := pb.(Marshaler); ok { + return m.Marshal() + } + p := NewBuffer(nil) + err := p.Marshal(pb) + if p.buf == nil && err == nil { + // Return a non-nil slice on success. + return []byte{}, nil + } + return p.buf, err +} + +// EncodeMessage writes the protocol buffer to the Buffer, +// prefixed by a varint-encoded length. +func (p *Buffer) EncodeMessage(pb Message) error { + t, base, err := getbase(pb) + if structPointer_IsNil(base) { + return ErrNil + } + if err == nil { + var state errorState + err = p.enc_len_struct(GetProperties(t.Elem()), base, &state) + } + return err +} + +// Marshal takes the protocol buffer +// and encodes it into the wire format, writing the result to the +// Buffer. +func (p *Buffer) Marshal(pb Message) error { + // Can the object marshal itself? + if m, ok := pb.(Marshaler); ok { + data, err := m.Marshal() + p.buf = append(p.buf, data...) + return err + } + + t, base, err := getbase(pb) + if structPointer_IsNil(base) { + return ErrNil + } + if err == nil { + err = p.enc_struct(GetProperties(t.Elem()), base) + } + + if collectStats { + (stats).Encode++ // Parens are to work around a goimports bug. + } + + if len(p.buf) > maxMarshalSize { + return ErrTooLarge + } + return err +} + +// Size returns the encoded size of a protocol buffer. +func Size(pb Message) (n int) { + // Can the object marshal itself? If so, Size is slow. + // TODO: add Size to Marshaler, or add a Sizer interface. + if m, ok := pb.(Marshaler); ok { + b, _ := m.Marshal() + return len(b) + } + + t, base, err := getbase(pb) + if structPointer_IsNil(base) { + return 0 + } + if err == nil { + n = size_struct(GetProperties(t.Elem()), base) + } + + if collectStats { + (stats).Size++ // Parens are to work around a goimports bug. + } + + return +} + +// Individual type encoders. + +// Encode a bool. +func (o *Buffer) enc_bool(p *Properties, base structPointer) error { + v := *structPointer_Bool(base, p.field) + if v == nil { + return ErrNil + } + x := 0 + if *v { + x = 1 + } + o.buf = append(o.buf, p.tagcode...) + p.valEnc(o, uint64(x)) + return nil +} + +func (o *Buffer) enc_proto3_bool(p *Properties, base structPointer) error { + v := *structPointer_BoolVal(base, p.field) + if !v { + return ErrNil + } + o.buf = append(o.buf, p.tagcode...) + p.valEnc(o, 1) + return nil +} + +func size_bool(p *Properties, base structPointer) int { + v := *structPointer_Bool(base, p.field) + if v == nil { + return 0 + } + return len(p.tagcode) + 1 // each bool takes exactly one byte +} + +func size_proto3_bool(p *Properties, base structPointer) int { + v := *structPointer_BoolVal(base, p.field) + if !v && !p.oneof { + return 0 + } + return len(p.tagcode) + 1 // each bool takes exactly one byte +} + +// Encode an int32. +func (o *Buffer) enc_int32(p *Properties, base structPointer) error { + v := structPointer_Word32(base, p.field) + if word32_IsNil(v) { + return ErrNil + } + x := int32(word32_Get(v)) // permit sign extension to use full 64-bit range + o.buf = append(o.buf, p.tagcode...) + p.valEnc(o, uint64(x)) + return nil +} + +func (o *Buffer) enc_proto3_int32(p *Properties, base structPointer) error { + v := structPointer_Word32Val(base, p.field) + x := int32(word32Val_Get(v)) // permit sign extension to use full 64-bit range + if x == 0 { + return ErrNil + } + o.buf = append(o.buf, p.tagcode...) + p.valEnc(o, uint64(x)) + return nil +} + +func size_int32(p *Properties, base structPointer) (n int) { + v := structPointer_Word32(base, p.field) + if word32_IsNil(v) { + return 0 + } + x := int32(word32_Get(v)) // permit sign extension to use full 64-bit range + n += len(p.tagcode) + n += p.valSize(uint64(x)) + return +} + +func size_proto3_int32(p *Properties, base structPointer) (n int) { + v := structPointer_Word32Val(base, p.field) + x := int32(word32Val_Get(v)) // permit sign extension to use full 64-bit range + if x == 0 && !p.oneof { + return 0 + } + n += len(p.tagcode) + n += p.valSize(uint64(x)) + return +} + +// Encode a uint32. +// Exactly the same as int32, except for no sign extension. +func (o *Buffer) enc_uint32(p *Properties, base structPointer) error { + v := structPointer_Word32(base, p.field) + if word32_IsNil(v) { + return ErrNil + } + x := word32_Get(v) + o.buf = append(o.buf, p.tagcode...) + p.valEnc(o, uint64(x)) + return nil +} + +func (o *Buffer) enc_proto3_uint32(p *Properties, base structPointer) error { + v := structPointer_Word32Val(base, p.field) + x := word32Val_Get(v) + if x == 0 { + return ErrNil + } + o.buf = append(o.buf, p.tagcode...) + p.valEnc(o, uint64(x)) + return nil +} + +func size_uint32(p *Properties, base structPointer) (n int) { + v := structPointer_Word32(base, p.field) + if word32_IsNil(v) { + return 0 + } + x := word32_Get(v) + n += len(p.tagcode) + n += p.valSize(uint64(x)) + return +} + +func size_proto3_uint32(p *Properties, base structPointer) (n int) { + v := structPointer_Word32Val(base, p.field) + x := word32Val_Get(v) + if x == 0 && !p.oneof { + return 0 + } + n += len(p.tagcode) + n += p.valSize(uint64(x)) + return +} + +// Encode an int64. +func (o *Buffer) enc_int64(p *Properties, base structPointer) error { + v := structPointer_Word64(base, p.field) + if word64_IsNil(v) { + return ErrNil + } + x := word64_Get(v) + o.buf = append(o.buf, p.tagcode...) + p.valEnc(o, x) + return nil +} + +func (o *Buffer) enc_proto3_int64(p *Properties, base structPointer) error { + v := structPointer_Word64Val(base, p.field) + x := word64Val_Get(v) + if x == 0 { + return ErrNil + } + o.buf = append(o.buf, p.tagcode...) + p.valEnc(o, x) + return nil +} + +func size_int64(p *Properties, base structPointer) (n int) { + v := structPointer_Word64(base, p.field) + if word64_IsNil(v) { + return 0 + } + x := word64_Get(v) + n += len(p.tagcode) + n += p.valSize(x) + return +} + +func size_proto3_int64(p *Properties, base structPointer) (n int) { + v := structPointer_Word64Val(base, p.field) + x := word64Val_Get(v) + if x == 0 && !p.oneof { + return 0 + } + n += len(p.tagcode) + n += p.valSize(x) + return +} + +// Encode a string. +func (o *Buffer) enc_string(p *Properties, base structPointer) error { + v := *structPointer_String(base, p.field) + if v == nil { + return ErrNil + } + x := *v + o.buf = append(o.buf, p.tagcode...) + o.EncodeStringBytes(x) + return nil +} + +func (o *Buffer) enc_proto3_string(p *Properties, base structPointer) error { + v := *structPointer_StringVal(base, p.field) + if v == "" { + return ErrNil + } + o.buf = append(o.buf, p.tagcode...) + o.EncodeStringBytes(v) + return nil +} + +func size_string(p *Properties, base structPointer) (n int) { + v := *structPointer_String(base, p.field) + if v == nil { + return 0 + } + x := *v + n += len(p.tagcode) + n += sizeStringBytes(x) + return +} + +func size_proto3_string(p *Properties, base structPointer) (n int) { + v := *structPointer_StringVal(base, p.field) + if v == "" && !p.oneof { + return 0 + } + n += len(p.tagcode) + n += sizeStringBytes(v) + return +} + +// All protocol buffer fields are nillable, but be careful. +func isNil(v reflect.Value) bool { + switch v.Kind() { + case reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice: + return v.IsNil() + } + return false +} + +// Encode a message struct. +func (o *Buffer) enc_struct_message(p *Properties, base structPointer) error { + var state errorState + structp := structPointer_GetStructPointer(base, p.field) + if structPointer_IsNil(structp) { + return ErrNil + } + + // Can the object marshal itself? + if p.isMarshaler { + m := structPointer_Interface(structp, p.stype).(Marshaler) + data, err := m.Marshal() + if err != nil && !state.shouldContinue(err, nil) { + return err + } + o.buf = append(o.buf, p.tagcode...) + o.EncodeRawBytes(data) + return state.err + } + + o.buf = append(o.buf, p.tagcode...) + return o.enc_len_struct(p.sprop, structp, &state) +} + +func size_struct_message(p *Properties, base structPointer) int { + structp := structPointer_GetStructPointer(base, p.field) + if structPointer_IsNil(structp) { + return 0 + } + + // Can the object marshal itself? + if p.isMarshaler { + m := structPointer_Interface(structp, p.stype).(Marshaler) + data, _ := m.Marshal() + n0 := len(p.tagcode) + n1 := sizeRawBytes(data) + return n0 + n1 + } + + n0 := len(p.tagcode) + n1 := size_struct(p.sprop, structp) + n2 := sizeVarint(uint64(n1)) // size of encoded length + return n0 + n1 + n2 +} + +// Encode a group struct. +func (o *Buffer) enc_struct_group(p *Properties, base structPointer) error { + var state errorState + b := structPointer_GetStructPointer(base, p.field) + if structPointer_IsNil(b) { + return ErrNil + } + + o.EncodeVarint(uint64((p.Tag << 3) | WireStartGroup)) + err := o.enc_struct(p.sprop, b) + if err != nil && !state.shouldContinue(err, nil) { + return err + } + o.EncodeVarint(uint64((p.Tag << 3) | WireEndGroup)) + return state.err +} + +func size_struct_group(p *Properties, base structPointer) (n int) { + b := structPointer_GetStructPointer(base, p.field) + if structPointer_IsNil(b) { + return 0 + } + + n += sizeVarint(uint64((p.Tag << 3) | WireStartGroup)) + n += size_struct(p.sprop, b) + n += sizeVarint(uint64((p.Tag << 3) | WireEndGroup)) + return +} + +// Encode a slice of bools ([]bool). +func (o *Buffer) enc_slice_bool(p *Properties, base structPointer) error { + s := *structPointer_BoolSlice(base, p.field) + l := len(s) + if l == 0 { + return ErrNil + } + for _, x := range s { + o.buf = append(o.buf, p.tagcode...) + v := uint64(0) + if x { + v = 1 + } + p.valEnc(o, v) + } + return nil +} + +func size_slice_bool(p *Properties, base structPointer) int { + s := *structPointer_BoolSlice(base, p.field) + l := len(s) + if l == 0 { + return 0 + } + return l * (len(p.tagcode) + 1) // each bool takes exactly one byte +} + +// Encode a slice of bools ([]bool) in packed format. +func (o *Buffer) enc_slice_packed_bool(p *Properties, base structPointer) error { + s := *structPointer_BoolSlice(base, p.field) + l := len(s) + if l == 0 { + return ErrNil + } + o.buf = append(o.buf, p.tagcode...) + o.EncodeVarint(uint64(l)) // each bool takes exactly one byte + for _, x := range s { + v := uint64(0) + if x { + v = 1 + } + p.valEnc(o, v) + } + return nil +} + +func size_slice_packed_bool(p *Properties, base structPointer) (n int) { + s := *structPointer_BoolSlice(base, p.field) + l := len(s) + if l == 0 { + return 0 + } + n += len(p.tagcode) + n += sizeVarint(uint64(l)) + n += l // each bool takes exactly one byte + return +} + +// Encode a slice of bytes ([]byte). +func (o *Buffer) enc_slice_byte(p *Properties, base structPointer) error { + s := *structPointer_Bytes(base, p.field) + if s == nil { + return ErrNil + } + o.buf = append(o.buf, p.tagcode...) + o.EncodeRawBytes(s) + return nil +} + +func (o *Buffer) enc_proto3_slice_byte(p *Properties, base structPointer) error { + s := *structPointer_Bytes(base, p.field) + if len(s) == 0 { + return ErrNil + } + o.buf = append(o.buf, p.tagcode...) + o.EncodeRawBytes(s) + return nil +} + +func size_slice_byte(p *Properties, base structPointer) (n int) { + s := *structPointer_Bytes(base, p.field) + if s == nil && !p.oneof { + return 0 + } + n += len(p.tagcode) + n += sizeRawBytes(s) + return +} + +func size_proto3_slice_byte(p *Properties, base structPointer) (n int) { + s := *structPointer_Bytes(base, p.field) + if len(s) == 0 && !p.oneof { + return 0 + } + n += len(p.tagcode) + n += sizeRawBytes(s) + return +} + +// Encode a slice of int32s ([]int32). +func (o *Buffer) enc_slice_int32(p *Properties, base structPointer) error { + s := structPointer_Word32Slice(base, p.field) + l := s.Len() + if l == 0 { + return ErrNil + } + for i := 0; i < l; i++ { + o.buf = append(o.buf, p.tagcode...) + x := int32(s.Index(i)) // permit sign extension to use full 64-bit range + p.valEnc(o, uint64(x)) + } + return nil +} + +func size_slice_int32(p *Properties, base structPointer) (n int) { + s := structPointer_Word32Slice(base, p.field) + l := s.Len() + if l == 0 { + return 0 + } + for i := 0; i < l; i++ { + n += len(p.tagcode) + x := int32(s.Index(i)) // permit sign extension to use full 64-bit range + n += p.valSize(uint64(x)) + } + return +} + +// Encode a slice of int32s ([]int32) in packed format. +func (o *Buffer) enc_slice_packed_int32(p *Properties, base structPointer) error { + s := structPointer_Word32Slice(base, p.field) + l := s.Len() + if l == 0 { + return ErrNil + } + // TODO: Reuse a Buffer. + buf := NewBuffer(nil) + for i := 0; i < l; i++ { + x := int32(s.Index(i)) // permit sign extension to use full 64-bit range + p.valEnc(buf, uint64(x)) + } + + o.buf = append(o.buf, p.tagcode...) + o.EncodeVarint(uint64(len(buf.buf))) + o.buf = append(o.buf, buf.buf...) + return nil +} + +func size_slice_packed_int32(p *Properties, base structPointer) (n int) { + s := structPointer_Word32Slice(base, p.field) + l := s.Len() + if l == 0 { + return 0 + } + var bufSize int + for i := 0; i < l; i++ { + x := int32(s.Index(i)) // permit sign extension to use full 64-bit range + bufSize += p.valSize(uint64(x)) + } + + n += len(p.tagcode) + n += sizeVarint(uint64(bufSize)) + n += bufSize + return +} + +// Encode a slice of uint32s ([]uint32). +// Exactly the same as int32, except for no sign extension. +func (o *Buffer) enc_slice_uint32(p *Properties, base structPointer) error { + s := structPointer_Word32Slice(base, p.field) + l := s.Len() + if l == 0 { + return ErrNil + } + for i := 0; i < l; i++ { + o.buf = append(o.buf, p.tagcode...) + x := s.Index(i) + p.valEnc(o, uint64(x)) + } + return nil +} + +func size_slice_uint32(p *Properties, base structPointer) (n int) { + s := structPointer_Word32Slice(base, p.field) + l := s.Len() + if l == 0 { + return 0 + } + for i := 0; i < l; i++ { + n += len(p.tagcode) + x := s.Index(i) + n += p.valSize(uint64(x)) + } + return +} + +// Encode a slice of uint32s ([]uint32) in packed format. +// Exactly the same as int32, except for no sign extension. +func (o *Buffer) enc_slice_packed_uint32(p *Properties, base structPointer) error { + s := structPointer_Word32Slice(base, p.field) + l := s.Len() + if l == 0 { + return ErrNil + } + // TODO: Reuse a Buffer. + buf := NewBuffer(nil) + for i := 0; i < l; i++ { + p.valEnc(buf, uint64(s.Index(i))) + } + + o.buf = append(o.buf, p.tagcode...) + o.EncodeVarint(uint64(len(buf.buf))) + o.buf = append(o.buf, buf.buf...) + return nil +} + +func size_slice_packed_uint32(p *Properties, base structPointer) (n int) { + s := structPointer_Word32Slice(base, p.field) + l := s.Len() + if l == 0 { + return 0 + } + var bufSize int + for i := 0; i < l; i++ { + bufSize += p.valSize(uint64(s.Index(i))) + } + + n += len(p.tagcode) + n += sizeVarint(uint64(bufSize)) + n += bufSize + return +} + +// Encode a slice of int64s ([]int64). +func (o *Buffer) enc_slice_int64(p *Properties, base structPointer) error { + s := structPointer_Word64Slice(base, p.field) + l := s.Len() + if l == 0 { + return ErrNil + } + for i := 0; i < l; i++ { + o.buf = append(o.buf, p.tagcode...) + p.valEnc(o, s.Index(i)) + } + return nil +} + +func size_slice_int64(p *Properties, base structPointer) (n int) { + s := structPointer_Word64Slice(base, p.field) + l := s.Len() + if l == 0 { + return 0 + } + for i := 0; i < l; i++ { + n += len(p.tagcode) + n += p.valSize(s.Index(i)) + } + return +} + +// Encode a slice of int64s ([]int64) in packed format. +func (o *Buffer) enc_slice_packed_int64(p *Properties, base structPointer) error { + s := structPointer_Word64Slice(base, p.field) + l := s.Len() + if l == 0 { + return ErrNil + } + // TODO: Reuse a Buffer. + buf := NewBuffer(nil) + for i := 0; i < l; i++ { + p.valEnc(buf, s.Index(i)) + } + + o.buf = append(o.buf, p.tagcode...) + o.EncodeVarint(uint64(len(buf.buf))) + o.buf = append(o.buf, buf.buf...) + return nil +} + +func size_slice_packed_int64(p *Properties, base structPointer) (n int) { + s := structPointer_Word64Slice(base, p.field) + l := s.Len() + if l == 0 { + return 0 + } + var bufSize int + for i := 0; i < l; i++ { + bufSize += p.valSize(s.Index(i)) + } + + n += len(p.tagcode) + n += sizeVarint(uint64(bufSize)) + n += bufSize + return +} + +// Encode a slice of slice of bytes ([][]byte). +func (o *Buffer) enc_slice_slice_byte(p *Properties, base structPointer) error { + ss := *structPointer_BytesSlice(base, p.field) + l := len(ss) + if l == 0 { + return ErrNil + } + for i := 0; i < l; i++ { + o.buf = append(o.buf, p.tagcode...) + o.EncodeRawBytes(ss[i]) + } + return nil +} + +func size_slice_slice_byte(p *Properties, base structPointer) (n int) { + ss := *structPointer_BytesSlice(base, p.field) + l := len(ss) + if l == 0 { + return 0 + } + n += l * len(p.tagcode) + for i := 0; i < l; i++ { + n += sizeRawBytes(ss[i]) + } + return +} + +// Encode a slice of strings ([]string). +func (o *Buffer) enc_slice_string(p *Properties, base structPointer) error { + ss := *structPointer_StringSlice(base, p.field) + l := len(ss) + for i := 0; i < l; i++ { + o.buf = append(o.buf, p.tagcode...) + o.EncodeStringBytes(ss[i]) + } + return nil +} + +func size_slice_string(p *Properties, base structPointer) (n int) { + ss := *structPointer_StringSlice(base, p.field) + l := len(ss) + n += l * len(p.tagcode) + for i := 0; i < l; i++ { + n += sizeStringBytes(ss[i]) + } + return +} + +// Encode a slice of message structs ([]*struct). +func (o *Buffer) enc_slice_struct_message(p *Properties, base structPointer) error { + var state errorState + s := structPointer_StructPointerSlice(base, p.field) + l := s.Len() + + for i := 0; i < l; i++ { + structp := s.Index(i) + if structPointer_IsNil(structp) { + return errRepeatedHasNil + } + + // Can the object marshal itself? + if p.isMarshaler { + m := structPointer_Interface(structp, p.stype).(Marshaler) + data, err := m.Marshal() + if err != nil && !state.shouldContinue(err, nil) { + return err + } + o.buf = append(o.buf, p.tagcode...) + o.EncodeRawBytes(data) + continue + } + + o.buf = append(o.buf, p.tagcode...) + err := o.enc_len_struct(p.sprop, structp, &state) + if err != nil && !state.shouldContinue(err, nil) { + if err == ErrNil { + return errRepeatedHasNil + } + return err + } + } + return state.err +} + +func size_slice_struct_message(p *Properties, base structPointer) (n int) { + s := structPointer_StructPointerSlice(base, p.field) + l := s.Len() + n += l * len(p.tagcode) + for i := 0; i < l; i++ { + structp := s.Index(i) + if structPointer_IsNil(structp) { + return // return the size up to this point + } + + // Can the object marshal itself? + if p.isMarshaler { + m := structPointer_Interface(structp, p.stype).(Marshaler) + data, _ := m.Marshal() + n += sizeRawBytes(data) + continue + } + + n0 := size_struct(p.sprop, structp) + n1 := sizeVarint(uint64(n0)) // size of encoded length + n += n0 + n1 + } + return +} + +// Encode a slice of group structs ([]*struct). +func (o *Buffer) enc_slice_struct_group(p *Properties, base structPointer) error { + var state errorState + s := structPointer_StructPointerSlice(base, p.field) + l := s.Len() + + for i := 0; i < l; i++ { + b := s.Index(i) + if structPointer_IsNil(b) { + return errRepeatedHasNil + } + + o.EncodeVarint(uint64((p.Tag << 3) | WireStartGroup)) + + err := o.enc_struct(p.sprop, b) + + if err != nil && !state.shouldContinue(err, nil) { + if err == ErrNil { + return errRepeatedHasNil + } + return err + } + + o.EncodeVarint(uint64((p.Tag << 3) | WireEndGroup)) + } + return state.err +} + +func size_slice_struct_group(p *Properties, base structPointer) (n int) { + s := structPointer_StructPointerSlice(base, p.field) + l := s.Len() + + n += l * sizeVarint(uint64((p.Tag<<3)|WireStartGroup)) + n += l * sizeVarint(uint64((p.Tag<<3)|WireEndGroup)) + for i := 0; i < l; i++ { + b := s.Index(i) + if structPointer_IsNil(b) { + return // return size up to this point + } + + n += size_struct(p.sprop, b) + } + return +} + +// Encode an extension map. +func (o *Buffer) enc_map(p *Properties, base structPointer) error { + exts := structPointer_ExtMap(base, p.field) + if err := encodeExtensionsMap(*exts); err != nil { + return err + } + + return o.enc_map_body(*exts) +} + +func (o *Buffer) enc_exts(p *Properties, base structPointer) error { + exts := structPointer_Extensions(base, p.field) + + v, mu := exts.extensionsRead() + if v == nil { + return nil + } + + mu.Lock() + defer mu.Unlock() + if err := encodeExtensionsMap(v); err != nil { + return err + } + + return o.enc_map_body(v) +} + +func (o *Buffer) enc_map_body(v map[int32]Extension) error { + // Fast-path for common cases: zero or one extensions. + if len(v) <= 1 { + for _, e := range v { + o.buf = append(o.buf, e.enc...) + } + return nil + } + + // Sort keys to provide a deterministic encoding. + keys := make([]int, 0, len(v)) + for k := range v { + keys = append(keys, int(k)) + } + sort.Ints(keys) + + for _, k := range keys { + o.buf = append(o.buf, v[int32(k)].enc...) + } + return nil +} + +func size_map(p *Properties, base structPointer) int { + v := structPointer_ExtMap(base, p.field) + return extensionsMapSize(*v) +} + +func size_exts(p *Properties, base structPointer) int { + v := structPointer_Extensions(base, p.field) + return extensionsSize(v) +} + +// Encode a map field. +func (o *Buffer) enc_new_map(p *Properties, base structPointer) error { + var state errorState // XXX: or do we need to plumb this through? + + /* + A map defined as + map map_field = N; + is encoded in the same way as + message MapFieldEntry { + key_type key = 1; + value_type value = 2; + } + repeated MapFieldEntry map_field = N; + */ + + v := structPointer_NewAt(base, p.field, p.mtype).Elem() // map[K]V + if v.Len() == 0 { + return nil + } + + keycopy, valcopy, keybase, valbase := mapEncodeScratch(p.mtype) + + enc := func() error { + if err := p.mkeyprop.enc(o, p.mkeyprop, keybase); err != nil { + return err + } + if err := p.mvalprop.enc(o, p.mvalprop, valbase); err != nil && err != ErrNil { + return err + } + return nil + } + + // Don't sort map keys. It is not required by the spec, and C++ doesn't do it. + for _, key := range v.MapKeys() { + val := v.MapIndex(key) + + keycopy.Set(key) + valcopy.Set(val) + + o.buf = append(o.buf, p.tagcode...) + if err := o.enc_len_thing(enc, &state); err != nil { + return err + } + } + return nil +} + +func size_new_map(p *Properties, base structPointer) int { + v := structPointer_NewAt(base, p.field, p.mtype).Elem() // map[K]V + + keycopy, valcopy, keybase, valbase := mapEncodeScratch(p.mtype) + + n := 0 + for _, key := range v.MapKeys() { + val := v.MapIndex(key) + keycopy.Set(key) + valcopy.Set(val) + + // Tag codes for key and val are the responsibility of the sub-sizer. + keysize := p.mkeyprop.size(p.mkeyprop, keybase) + valsize := p.mvalprop.size(p.mvalprop, valbase) + entry := keysize + valsize + // Add on tag code and length of map entry itself. + n += len(p.tagcode) + sizeVarint(uint64(entry)) + entry + } + return n +} + +// mapEncodeScratch returns a new reflect.Value matching the map's value type, +// and a structPointer suitable for passing to an encoder or sizer. +func mapEncodeScratch(mapType reflect.Type) (keycopy, valcopy reflect.Value, keybase, valbase structPointer) { + // Prepare addressable doubly-indirect placeholders for the key and value types. + // This is needed because the element-type encoders expect **T, but the map iteration produces T. + + keycopy = reflect.New(mapType.Key()).Elem() // addressable K + keyptr := reflect.New(reflect.PtrTo(keycopy.Type())).Elem() // addressable *K + keyptr.Set(keycopy.Addr()) // + keybase = toStructPointer(keyptr.Addr()) // **K + + // Value types are more varied and require special handling. + switch mapType.Elem().Kind() { + case reflect.Slice: + // []byte + var dummy []byte + valcopy = reflect.ValueOf(&dummy).Elem() // addressable []byte + valbase = toStructPointer(valcopy.Addr()) + case reflect.Ptr: + // message; the generated field type is map[K]*Msg (so V is *Msg), + // so we only need one level of indirection. + valcopy = reflect.New(mapType.Elem()).Elem() // addressable V + valbase = toStructPointer(valcopy.Addr()) + default: + // everything else + valcopy = reflect.New(mapType.Elem()).Elem() // addressable V + valptr := reflect.New(reflect.PtrTo(valcopy.Type())).Elem() // addressable *V + valptr.Set(valcopy.Addr()) // + valbase = toStructPointer(valptr.Addr()) // **V + } + return +} + +// Encode a struct. +func (o *Buffer) enc_struct(prop *StructProperties, base structPointer) error { + var state errorState + // Encode fields in tag order so that decoders may use optimizations + // that depend on the ordering. + // https://developers.google.com/protocol-buffers/docs/encoding#order + for _, i := range prop.order { + p := prop.Prop[i] + if p.enc != nil { + err := p.enc(o, p, base) + if err != nil { + if err == ErrNil { + if p.Required && state.err == nil { + state.err = &RequiredNotSetError{p.Name} + } + } else if err == errRepeatedHasNil { + // Give more context to nil values in repeated fields. + return errors.New("repeated field " + p.OrigName + " has nil element") + } else if !state.shouldContinue(err, p) { + return err + } + } + if len(o.buf) > maxMarshalSize { + return ErrTooLarge + } + } + } + + // Do oneof fields. + if prop.oneofMarshaler != nil { + m := structPointer_Interface(base, prop.stype).(Message) + if err := prop.oneofMarshaler(m, o); err == ErrNil { + return errOneofHasNil + } else if err != nil { + return err + } + } + + // Add unrecognized fields at the end. + if prop.unrecField.IsValid() { + v := *structPointer_Bytes(base, prop.unrecField) + if len(o.buf)+len(v) > maxMarshalSize { + return ErrTooLarge + } + if len(v) > 0 { + o.buf = append(o.buf, v...) + } + } + + return state.err +} + +func size_struct(prop *StructProperties, base structPointer) (n int) { + for _, i := range prop.order { + p := prop.Prop[i] + if p.size != nil { + n += p.size(p, base) + } + } + + // Add unrecognized fields at the end. + if prop.unrecField.IsValid() { + v := *structPointer_Bytes(base, prop.unrecField) + n += len(v) + } + + // Factor in any oneof fields. + if prop.oneofSizer != nil { + m := structPointer_Interface(base, prop.stype).(Message) + n += prop.oneofSizer(m) + } + + return +} + +var zeroes [20]byte // longer than any conceivable sizeVarint + +// Encode a struct, preceded by its encoded length (as a varint). +func (o *Buffer) enc_len_struct(prop *StructProperties, base structPointer, state *errorState) error { + return o.enc_len_thing(func() error { return o.enc_struct(prop, base) }, state) +} + +// Encode something, preceded by its encoded length (as a varint). +func (o *Buffer) enc_len_thing(enc func() error, state *errorState) error { + iLen := len(o.buf) + o.buf = append(o.buf, 0, 0, 0, 0) // reserve four bytes for length + iMsg := len(o.buf) + err := enc() + if err != nil && !state.shouldContinue(err, nil) { + return err + } + lMsg := len(o.buf) - iMsg + lLen := sizeVarint(uint64(lMsg)) + switch x := lLen - (iMsg - iLen); { + case x > 0: // actual length is x bytes larger than the space we reserved + // Move msg x bytes right. + o.buf = append(o.buf, zeroes[:x]...) + copy(o.buf[iMsg+x:], o.buf[iMsg:iMsg+lMsg]) + case x < 0: // actual length is x bytes smaller than the space we reserved + // Move msg x bytes left. + copy(o.buf[iMsg+x:], o.buf[iMsg:iMsg+lMsg]) + o.buf = o.buf[:len(o.buf)+x] // x is negative + } + // Encode the length in the reserved space. + o.buf = o.buf[:iLen] + o.EncodeVarint(uint64(lMsg)) + o.buf = o.buf[:len(o.buf)+lMsg] + return state.err +} + +// errorState maintains the first error that occurs and updates that error +// with additional context. +type errorState struct { + err error +} + +// shouldContinue reports whether encoding should continue upon encountering the +// given error. If the error is RequiredNotSetError, shouldContinue returns true +// and, if this is the first appearance of that error, remembers it for future +// reporting. +// +// If prop is not nil, it may update any error with additional context about the +// field with the error. +func (s *errorState) shouldContinue(err error, prop *Properties) bool { + // Ignore unset required fields. + reqNotSet, ok := err.(*RequiredNotSetError) + if !ok { + return false + } + if s.err == nil { + if prop != nil { + err = &RequiredNotSetError{prop.Name + "." + reqNotSet.field} + } + s.err = err + } + return true +} diff --git a/vendor/github.com/golang/protobuf/proto/equal.go b/vendor/github.com/golang/protobuf/proto/equal.go new file mode 100644 index 0000000..2ed1cf5 --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/equal.go @@ -0,0 +1,300 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2011 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Protocol buffer comparison. + +package proto + +import ( + "bytes" + "log" + "reflect" + "strings" +) + +/* +Equal returns true iff protocol buffers a and b are equal. +The arguments must both be pointers to protocol buffer structs. + +Equality is defined in this way: + - Two messages are equal iff they are the same type, + corresponding fields are equal, unknown field sets + are equal, and extensions sets are equal. + - Two set scalar fields are equal iff their values are equal. + If the fields are of a floating-point type, remember that + NaN != x for all x, including NaN. If the message is defined + in a proto3 .proto file, fields are not "set"; specifically, + zero length proto3 "bytes" fields are equal (nil == {}). + - Two repeated fields are equal iff their lengths are the same, + and their corresponding elements are equal. Note a "bytes" field, + although represented by []byte, is not a repeated field and the + rule for the scalar fields described above applies. + - Two unset fields are equal. + - Two unknown field sets are equal if their current + encoded state is equal. + - Two extension sets are equal iff they have corresponding + elements that are pairwise equal. + - Two map fields are equal iff their lengths are the same, + and they contain the same set of elements. Zero-length map + fields are equal. + - Every other combination of things are not equal. + +The return value is undefined if a and b are not protocol buffers. +*/ +func Equal(a, b Message) bool { + if a == nil || b == nil { + return a == b + } + v1, v2 := reflect.ValueOf(a), reflect.ValueOf(b) + if v1.Type() != v2.Type() { + return false + } + if v1.Kind() == reflect.Ptr { + if v1.IsNil() { + return v2.IsNil() + } + if v2.IsNil() { + return false + } + v1, v2 = v1.Elem(), v2.Elem() + } + if v1.Kind() != reflect.Struct { + return false + } + return equalStruct(v1, v2) +} + +// v1 and v2 are known to have the same type. +func equalStruct(v1, v2 reflect.Value) bool { + sprop := GetProperties(v1.Type()) + for i := 0; i < v1.NumField(); i++ { + f := v1.Type().Field(i) + if strings.HasPrefix(f.Name, "XXX_") { + continue + } + f1, f2 := v1.Field(i), v2.Field(i) + if f.Type.Kind() == reflect.Ptr { + if n1, n2 := f1.IsNil(), f2.IsNil(); n1 && n2 { + // both unset + continue + } else if n1 != n2 { + // set/unset mismatch + return false + } + b1, ok := f1.Interface().(raw) + if ok { + b2 := f2.Interface().(raw) + // RawMessage + if !bytes.Equal(b1.Bytes(), b2.Bytes()) { + return false + } + continue + } + f1, f2 = f1.Elem(), f2.Elem() + } + if !equalAny(f1, f2, sprop.Prop[i]) { + return false + } + } + + if em1 := v1.FieldByName("XXX_InternalExtensions"); em1.IsValid() { + em2 := v2.FieldByName("XXX_InternalExtensions") + if !equalExtensions(v1.Type(), em1.Interface().(XXX_InternalExtensions), em2.Interface().(XXX_InternalExtensions)) { + return false + } + } + + if em1 := v1.FieldByName("XXX_extensions"); em1.IsValid() { + em2 := v2.FieldByName("XXX_extensions") + if !equalExtMap(v1.Type(), em1.Interface().(map[int32]Extension), em2.Interface().(map[int32]Extension)) { + return false + } + } + + uf := v1.FieldByName("XXX_unrecognized") + if !uf.IsValid() { + return true + } + + u1 := uf.Bytes() + u2 := v2.FieldByName("XXX_unrecognized").Bytes() + if !bytes.Equal(u1, u2) { + return false + } + + return true +} + +// v1 and v2 are known to have the same type. +// prop may be nil. +func equalAny(v1, v2 reflect.Value, prop *Properties) bool { + if v1.Type() == protoMessageType { + m1, _ := v1.Interface().(Message) + m2, _ := v2.Interface().(Message) + return Equal(m1, m2) + } + switch v1.Kind() { + case reflect.Bool: + return v1.Bool() == v2.Bool() + case reflect.Float32, reflect.Float64: + return v1.Float() == v2.Float() + case reflect.Int32, reflect.Int64: + return v1.Int() == v2.Int() + case reflect.Interface: + // Probably a oneof field; compare the inner values. + n1, n2 := v1.IsNil(), v2.IsNil() + if n1 || n2 { + return n1 == n2 + } + e1, e2 := v1.Elem(), v2.Elem() + if e1.Type() != e2.Type() { + return false + } + return equalAny(e1, e2, nil) + case reflect.Map: + if v1.Len() != v2.Len() { + return false + } + for _, key := range v1.MapKeys() { + val2 := v2.MapIndex(key) + if !val2.IsValid() { + // This key was not found in the second map. + return false + } + if !equalAny(v1.MapIndex(key), val2, nil) { + return false + } + } + return true + case reflect.Ptr: + // Maps may have nil values in them, so check for nil. + if v1.IsNil() && v2.IsNil() { + return true + } + if v1.IsNil() != v2.IsNil() { + return false + } + return equalAny(v1.Elem(), v2.Elem(), prop) + case reflect.Slice: + if v1.Type().Elem().Kind() == reflect.Uint8 { + // short circuit: []byte + + // Edge case: if this is in a proto3 message, a zero length + // bytes field is considered the zero value. + if prop != nil && prop.proto3 && v1.Len() == 0 && v2.Len() == 0 { + return true + } + if v1.IsNil() != v2.IsNil() { + return false + } + return bytes.Equal(v1.Interface().([]byte), v2.Interface().([]byte)) + } + + if v1.Len() != v2.Len() { + return false + } + for i := 0; i < v1.Len(); i++ { + if !equalAny(v1.Index(i), v2.Index(i), prop) { + return false + } + } + return true + case reflect.String: + return v1.Interface().(string) == v2.Interface().(string) + case reflect.Struct: + return equalStruct(v1, v2) + case reflect.Uint32, reflect.Uint64: + return v1.Uint() == v2.Uint() + } + + // unknown type, so not a protocol buffer + log.Printf("proto: don't know how to compare %v", v1) + return false +} + +// base is the struct type that the extensions are based on. +// x1 and x2 are InternalExtensions. +func equalExtensions(base reflect.Type, x1, x2 XXX_InternalExtensions) bool { + em1, _ := x1.extensionsRead() + em2, _ := x2.extensionsRead() + return equalExtMap(base, em1, em2) +} + +func equalExtMap(base reflect.Type, em1, em2 map[int32]Extension) bool { + if len(em1) != len(em2) { + return false + } + + for extNum, e1 := range em1 { + e2, ok := em2[extNum] + if !ok { + return false + } + + m1, m2 := e1.value, e2.value + + if m1 != nil && m2 != nil { + // Both are unencoded. + if !equalAny(reflect.ValueOf(m1), reflect.ValueOf(m2), nil) { + return false + } + continue + } + + // At least one is encoded. To do a semantically correct comparison + // we need to unmarshal them first. + var desc *ExtensionDesc + if m := extensionMaps[base]; m != nil { + desc = m[extNum] + } + if desc == nil { + log.Printf("proto: don't know how to compare extension %d of %v", extNum, base) + continue + } + var err error + if m1 == nil { + m1, err = decodeExtension(e1.enc, desc) + } + if m2 == nil && err == nil { + m2, err = decodeExtension(e2.enc, desc) + } + if err != nil { + // The encoded form is invalid. + log.Printf("proto: badly encoded extension %d of %v: %v", extNum, base, err) + return false + } + if !equalAny(reflect.ValueOf(m1), reflect.ValueOf(m2), nil) { + return false + } + } + + return true +} diff --git a/vendor/github.com/golang/protobuf/proto/extensions.go b/vendor/github.com/golang/protobuf/proto/extensions.go new file mode 100644 index 0000000..eaad218 --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/extensions.go @@ -0,0 +1,587 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2010 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package proto + +/* + * Types and routines for supporting protocol buffer extensions. + */ + +import ( + "errors" + "fmt" + "reflect" + "strconv" + "sync" +) + +// ErrMissingExtension is the error returned by GetExtension if the named extension is not in the message. +var ErrMissingExtension = errors.New("proto: missing extension") + +// ExtensionRange represents a range of message extensions for a protocol buffer. +// Used in code generated by the protocol compiler. +type ExtensionRange struct { + Start, End int32 // both inclusive +} + +// extendableProto is an interface implemented by any protocol buffer generated by the current +// proto compiler that may be extended. +type extendableProto interface { + Message + ExtensionRangeArray() []ExtensionRange + extensionsWrite() map[int32]Extension + extensionsRead() (map[int32]Extension, sync.Locker) +} + +// extendableProtoV1 is an interface implemented by a protocol buffer generated by the previous +// version of the proto compiler that may be extended. +type extendableProtoV1 interface { + Message + ExtensionRangeArray() []ExtensionRange + ExtensionMap() map[int32]Extension +} + +// extensionAdapter is a wrapper around extendableProtoV1 that implements extendableProto. +type extensionAdapter struct { + extendableProtoV1 +} + +func (e extensionAdapter) extensionsWrite() map[int32]Extension { + return e.ExtensionMap() +} + +func (e extensionAdapter) extensionsRead() (map[int32]Extension, sync.Locker) { + return e.ExtensionMap(), notLocker{} +} + +// notLocker is a sync.Locker whose Lock and Unlock methods are nops. +type notLocker struct{} + +func (n notLocker) Lock() {} +func (n notLocker) Unlock() {} + +// extendable returns the extendableProto interface for the given generated proto message. +// If the proto message has the old extension format, it returns a wrapper that implements +// the extendableProto interface. +func extendable(p interface{}) (extendableProto, bool) { + if ep, ok := p.(extendableProto); ok { + return ep, ok + } + if ep, ok := p.(extendableProtoV1); ok { + return extensionAdapter{ep}, ok + } + return nil, false +} + +// XXX_InternalExtensions is an internal representation of proto extensions. +// +// Each generated message struct type embeds an anonymous XXX_InternalExtensions field, +// thus gaining the unexported 'extensions' method, which can be called only from the proto package. +// +// The methods of XXX_InternalExtensions are not concurrency safe in general, +// but calls to logically read-only methods such as has and get may be executed concurrently. +type XXX_InternalExtensions struct { + // The struct must be indirect so that if a user inadvertently copies a + // generated message and its embedded XXX_InternalExtensions, they + // avoid the mayhem of a copied mutex. + // + // The mutex serializes all logically read-only operations to p.extensionMap. + // It is up to the client to ensure that write operations to p.extensionMap are + // mutually exclusive with other accesses. + p *struct { + mu sync.Mutex + extensionMap map[int32]Extension + } +} + +// extensionsWrite returns the extension map, creating it on first use. +func (e *XXX_InternalExtensions) extensionsWrite() map[int32]Extension { + if e.p == nil { + e.p = new(struct { + mu sync.Mutex + extensionMap map[int32]Extension + }) + e.p.extensionMap = make(map[int32]Extension) + } + return e.p.extensionMap +} + +// extensionsRead returns the extensions map for read-only use. It may be nil. +// The caller must hold the returned mutex's lock when accessing Elements within the map. +func (e *XXX_InternalExtensions) extensionsRead() (map[int32]Extension, sync.Locker) { + if e.p == nil { + return nil, nil + } + return e.p.extensionMap, &e.p.mu +} + +var extendableProtoType = reflect.TypeOf((*extendableProto)(nil)).Elem() +var extendableProtoV1Type = reflect.TypeOf((*extendableProtoV1)(nil)).Elem() + +// ExtensionDesc represents an extension specification. +// Used in generated code from the protocol compiler. +type ExtensionDesc struct { + ExtendedType Message // nil pointer to the type that is being extended + ExtensionType interface{} // nil pointer to the extension type + Field int32 // field number + Name string // fully-qualified name of extension, for text formatting + Tag string // protobuf tag style + Filename string // name of the file in which the extension is defined +} + +func (ed *ExtensionDesc) repeated() bool { + t := reflect.TypeOf(ed.ExtensionType) + return t.Kind() == reflect.Slice && t.Elem().Kind() != reflect.Uint8 +} + +// Extension represents an extension in a message. +type Extension struct { + // When an extension is stored in a message using SetExtension + // only desc and value are set. When the message is marshaled + // enc will be set to the encoded form of the message. + // + // When a message is unmarshaled and contains extensions, each + // extension will have only enc set. When such an extension is + // accessed using GetExtension (or GetExtensions) desc and value + // will be set. + desc *ExtensionDesc + value interface{} + enc []byte +} + +// SetRawExtension is for testing only. +func SetRawExtension(base Message, id int32, b []byte) { + epb, ok := extendable(base) + if !ok { + return + } + extmap := epb.extensionsWrite() + extmap[id] = Extension{enc: b} +} + +// isExtensionField returns true iff the given field number is in an extension range. +func isExtensionField(pb extendableProto, field int32) bool { + for _, er := range pb.ExtensionRangeArray() { + if er.Start <= field && field <= er.End { + return true + } + } + return false +} + +// checkExtensionTypes checks that the given extension is valid for pb. +func checkExtensionTypes(pb extendableProto, extension *ExtensionDesc) error { + var pbi interface{} = pb + // Check the extended type. + if ea, ok := pbi.(extensionAdapter); ok { + pbi = ea.extendableProtoV1 + } + if a, b := reflect.TypeOf(pbi), reflect.TypeOf(extension.ExtendedType); a != b { + return errors.New("proto: bad extended type; " + b.String() + " does not extend " + a.String()) + } + // Check the range. + if !isExtensionField(pb, extension.Field) { + return errors.New("proto: bad extension number; not in declared ranges") + } + return nil +} + +// extPropKey is sufficient to uniquely identify an extension. +type extPropKey struct { + base reflect.Type + field int32 +} + +var extProp = struct { + sync.RWMutex + m map[extPropKey]*Properties +}{ + m: make(map[extPropKey]*Properties), +} + +func extensionProperties(ed *ExtensionDesc) *Properties { + key := extPropKey{base: reflect.TypeOf(ed.ExtendedType), field: ed.Field} + + extProp.RLock() + if prop, ok := extProp.m[key]; ok { + extProp.RUnlock() + return prop + } + extProp.RUnlock() + + extProp.Lock() + defer extProp.Unlock() + // Check again. + if prop, ok := extProp.m[key]; ok { + return prop + } + + prop := new(Properties) + prop.Init(reflect.TypeOf(ed.ExtensionType), "unknown_name", ed.Tag, nil) + extProp.m[key] = prop + return prop +} + +// encode encodes any unmarshaled (unencoded) extensions in e. +func encodeExtensions(e *XXX_InternalExtensions) error { + m, mu := e.extensionsRead() + if m == nil { + return nil // fast path + } + mu.Lock() + defer mu.Unlock() + return encodeExtensionsMap(m) +} + +// encode encodes any unmarshaled (unencoded) extensions in e. +func encodeExtensionsMap(m map[int32]Extension) error { + for k, e := range m { + if e.value == nil || e.desc == nil { + // Extension is only in its encoded form. + continue + } + + // We don't skip extensions that have an encoded form set, + // because the extension value may have been mutated after + // the last time this function was called. + + et := reflect.TypeOf(e.desc.ExtensionType) + props := extensionProperties(e.desc) + + p := NewBuffer(nil) + // If e.value has type T, the encoder expects a *struct{ X T }. + // Pass a *T with a zero field and hope it all works out. + x := reflect.New(et) + x.Elem().Set(reflect.ValueOf(e.value)) + if err := props.enc(p, props, toStructPointer(x)); err != nil { + return err + } + e.enc = p.buf + m[k] = e + } + return nil +} + +func extensionsSize(e *XXX_InternalExtensions) (n int) { + m, mu := e.extensionsRead() + if m == nil { + return 0 + } + mu.Lock() + defer mu.Unlock() + return extensionsMapSize(m) +} + +func extensionsMapSize(m map[int32]Extension) (n int) { + for _, e := range m { + if e.value == nil || e.desc == nil { + // Extension is only in its encoded form. + n += len(e.enc) + continue + } + + // We don't skip extensions that have an encoded form set, + // because the extension value may have been mutated after + // the last time this function was called. + + et := reflect.TypeOf(e.desc.ExtensionType) + props := extensionProperties(e.desc) + + // If e.value has type T, the encoder expects a *struct{ X T }. + // Pass a *T with a zero field and hope it all works out. + x := reflect.New(et) + x.Elem().Set(reflect.ValueOf(e.value)) + n += props.size(props, toStructPointer(x)) + } + return +} + +// HasExtension returns whether the given extension is present in pb. +func HasExtension(pb Message, extension *ExtensionDesc) bool { + // TODO: Check types, field numbers, etc.? + epb, ok := extendable(pb) + if !ok { + return false + } + extmap, mu := epb.extensionsRead() + if extmap == nil { + return false + } + mu.Lock() + _, ok = extmap[extension.Field] + mu.Unlock() + return ok +} + +// ClearExtension removes the given extension from pb. +func ClearExtension(pb Message, extension *ExtensionDesc) { + epb, ok := extendable(pb) + if !ok { + return + } + // TODO: Check types, field numbers, etc.? + extmap := epb.extensionsWrite() + delete(extmap, extension.Field) +} + +// GetExtension parses and returns the given extension of pb. +// If the extension is not present and has no default value it returns ErrMissingExtension. +func GetExtension(pb Message, extension *ExtensionDesc) (interface{}, error) { + epb, ok := extendable(pb) + if !ok { + return nil, errors.New("proto: not an extendable proto") + } + + if err := checkExtensionTypes(epb, extension); err != nil { + return nil, err + } + + emap, mu := epb.extensionsRead() + if emap == nil { + return defaultExtensionValue(extension) + } + mu.Lock() + defer mu.Unlock() + e, ok := emap[extension.Field] + if !ok { + // defaultExtensionValue returns the default value or + // ErrMissingExtension if there is no default. + return defaultExtensionValue(extension) + } + + if e.value != nil { + // Already decoded. Check the descriptor, though. + if e.desc != extension { + // This shouldn't happen. If it does, it means that + // GetExtension was called twice with two different + // descriptors with the same field number. + return nil, errors.New("proto: descriptor conflict") + } + return e.value, nil + } + + v, err := decodeExtension(e.enc, extension) + if err != nil { + return nil, err + } + + // Remember the decoded version and drop the encoded version. + // That way it is safe to mutate what we return. + e.value = v + e.desc = extension + e.enc = nil + emap[extension.Field] = e + return e.value, nil +} + +// defaultExtensionValue returns the default value for extension. +// If no default for an extension is defined ErrMissingExtension is returned. +func defaultExtensionValue(extension *ExtensionDesc) (interface{}, error) { + t := reflect.TypeOf(extension.ExtensionType) + props := extensionProperties(extension) + + sf, _, err := fieldDefault(t, props) + if err != nil { + return nil, err + } + + if sf == nil || sf.value == nil { + // There is no default value. + return nil, ErrMissingExtension + } + + if t.Kind() != reflect.Ptr { + // We do not need to return a Ptr, we can directly return sf.value. + return sf.value, nil + } + + // We need to return an interface{} that is a pointer to sf.value. + value := reflect.New(t).Elem() + value.Set(reflect.New(value.Type().Elem())) + if sf.kind == reflect.Int32 { + // We may have an int32 or an enum, but the underlying data is int32. + // Since we can't set an int32 into a non int32 reflect.value directly + // set it as a int32. + value.Elem().SetInt(int64(sf.value.(int32))) + } else { + value.Elem().Set(reflect.ValueOf(sf.value)) + } + return value.Interface(), nil +} + +// decodeExtension decodes an extension encoded in b. +func decodeExtension(b []byte, extension *ExtensionDesc) (interface{}, error) { + o := NewBuffer(b) + + t := reflect.TypeOf(extension.ExtensionType) + + props := extensionProperties(extension) + + // t is a pointer to a struct, pointer to basic type or a slice. + // Allocate a "field" to store the pointer/slice itself; the + // pointer/slice will be stored here. We pass + // the address of this field to props.dec. + // This passes a zero field and a *t and lets props.dec + // interpret it as a *struct{ x t }. + value := reflect.New(t).Elem() + + for { + // Discard wire type and field number varint. It isn't needed. + if _, err := o.DecodeVarint(); err != nil { + return nil, err + } + + if err := props.dec(o, props, toStructPointer(value.Addr())); err != nil { + return nil, err + } + + if o.index >= len(o.buf) { + break + } + } + return value.Interface(), nil +} + +// GetExtensions returns a slice of the extensions present in pb that are also listed in es. +// The returned slice has the same length as es; missing extensions will appear as nil elements. +func GetExtensions(pb Message, es []*ExtensionDesc) (extensions []interface{}, err error) { + epb, ok := extendable(pb) + if !ok { + return nil, errors.New("proto: not an extendable proto") + } + extensions = make([]interface{}, len(es)) + for i, e := range es { + extensions[i], err = GetExtension(epb, e) + if err == ErrMissingExtension { + err = nil + } + if err != nil { + return + } + } + return +} + +// ExtensionDescs returns a new slice containing pb's extension descriptors, in undefined order. +// For non-registered extensions, ExtensionDescs returns an incomplete descriptor containing +// just the Field field, which defines the extension's field number. +func ExtensionDescs(pb Message) ([]*ExtensionDesc, error) { + epb, ok := extendable(pb) + if !ok { + return nil, fmt.Errorf("proto: %T is not an extendable proto.Message", pb) + } + registeredExtensions := RegisteredExtensions(pb) + + emap, mu := epb.extensionsRead() + if emap == nil { + return nil, nil + } + mu.Lock() + defer mu.Unlock() + extensions := make([]*ExtensionDesc, 0, len(emap)) + for extid, e := range emap { + desc := e.desc + if desc == nil { + desc = registeredExtensions[extid] + if desc == nil { + desc = &ExtensionDesc{Field: extid} + } + } + + extensions = append(extensions, desc) + } + return extensions, nil +} + +// SetExtension sets the specified extension of pb to the specified value. +func SetExtension(pb Message, extension *ExtensionDesc, value interface{}) error { + epb, ok := extendable(pb) + if !ok { + return errors.New("proto: not an extendable proto") + } + if err := checkExtensionTypes(epb, extension); err != nil { + return err + } + typ := reflect.TypeOf(extension.ExtensionType) + if typ != reflect.TypeOf(value) { + return errors.New("proto: bad extension value type") + } + // nil extension values need to be caught early, because the + // encoder can't distinguish an ErrNil due to a nil extension + // from an ErrNil due to a missing field. Extensions are + // always optional, so the encoder would just swallow the error + // and drop all the extensions from the encoded message. + if reflect.ValueOf(value).IsNil() { + return fmt.Errorf("proto: SetExtension called with nil value of type %T", value) + } + + extmap := epb.extensionsWrite() + extmap[extension.Field] = Extension{desc: extension, value: value} + return nil +} + +// ClearAllExtensions clears all extensions from pb. +func ClearAllExtensions(pb Message) { + epb, ok := extendable(pb) + if !ok { + return + } + m := epb.extensionsWrite() + for k := range m { + delete(m, k) + } +} + +// A global registry of extensions. +// The generated code will register the generated descriptors by calling RegisterExtension. + +var extensionMaps = make(map[reflect.Type]map[int32]*ExtensionDesc) + +// RegisterExtension is called from the generated code. +func RegisterExtension(desc *ExtensionDesc) { + st := reflect.TypeOf(desc.ExtendedType).Elem() + m := extensionMaps[st] + if m == nil { + m = make(map[int32]*ExtensionDesc) + extensionMaps[st] = m + } + if _, ok := m[desc.Field]; ok { + panic("proto: duplicate extension registered: " + st.String() + " " + strconv.Itoa(int(desc.Field))) + } + m[desc.Field] = desc +} + +// RegisteredExtensions returns a map of the registered extensions of a +// protocol buffer struct, indexed by the extension number. +// The argument pb should be a nil pointer to the struct type. +func RegisteredExtensions(pb Message) map[int32]*ExtensionDesc { + return extensionMaps[reflect.TypeOf(pb).Elem()] +} diff --git a/vendor/github.com/golang/protobuf/proto/lib.go b/vendor/github.com/golang/protobuf/proto/lib.go new file mode 100644 index 0000000..1c22550 --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/lib.go @@ -0,0 +1,897 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2010 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* +Package proto converts data structures to and from the wire format of +protocol buffers. It works in concert with the Go source code generated +for .proto files by the protocol compiler. + +A summary of the properties of the protocol buffer interface +for a protocol buffer variable v: + + - Names are turned from camel_case to CamelCase for export. + - There are no methods on v to set fields; just treat + them as structure fields. + - There are getters that return a field's value if set, + and return the field's default value if unset. + The getters work even if the receiver is a nil message. + - The zero value for a struct is its correct initialization state. + All desired fields must be set before marshaling. + - A Reset() method will restore a protobuf struct to its zero state. + - Non-repeated fields are pointers to the values; nil means unset. + That is, optional or required field int32 f becomes F *int32. + - Repeated fields are slices. + - Helper functions are available to aid the setting of fields. + msg.Foo = proto.String("hello") // set field + - Constants are defined to hold the default values of all fields that + have them. They have the form Default_StructName_FieldName. + Because the getter methods handle defaulted values, + direct use of these constants should be rare. + - Enums are given type names and maps from names to values. + Enum values are prefixed by the enclosing message's name, or by the + enum's type name if it is a top-level enum. Enum types have a String + method, and a Enum method to assist in message construction. + - Nested messages, groups and enums have type names prefixed with the name of + the surrounding message type. + - Extensions are given descriptor names that start with E_, + followed by an underscore-delimited list of the nested messages + that contain it (if any) followed by the CamelCased name of the + extension field itself. HasExtension, ClearExtension, GetExtension + and SetExtension are functions for manipulating extensions. + - Oneof field sets are given a single field in their message, + with distinguished wrapper types for each possible field value. + - Marshal and Unmarshal are functions to encode and decode the wire format. + +When the .proto file specifies `syntax="proto3"`, there are some differences: + + - Non-repeated fields of non-message type are values instead of pointers. + - Enum types do not get an Enum method. + +The simplest way to describe this is to see an example. +Given file test.proto, containing + + package example; + + enum FOO { X = 17; } + + message Test { + required string label = 1; + optional int32 type = 2 [default=77]; + repeated int64 reps = 3; + optional group OptionalGroup = 4 { + required string RequiredField = 5; + } + oneof union { + int32 number = 6; + string name = 7; + } + } + +The resulting file, test.pb.go, is: + + package example + + import proto "github.com/golang/protobuf/proto" + import math "math" + + type FOO int32 + const ( + FOO_X FOO = 17 + ) + var FOO_name = map[int32]string{ + 17: "X", + } + var FOO_value = map[string]int32{ + "X": 17, + } + + func (x FOO) Enum() *FOO { + p := new(FOO) + *p = x + return p + } + func (x FOO) String() string { + return proto.EnumName(FOO_name, int32(x)) + } + func (x *FOO) UnmarshalJSON(data []byte) error { + value, err := proto.UnmarshalJSONEnum(FOO_value, data) + if err != nil { + return err + } + *x = FOO(value) + return nil + } + + type Test struct { + Label *string `protobuf:"bytes,1,req,name=label" json:"label,omitempty"` + Type *int32 `protobuf:"varint,2,opt,name=type,def=77" json:"type,omitempty"` + Reps []int64 `protobuf:"varint,3,rep,name=reps" json:"reps,omitempty"` + Optionalgroup *Test_OptionalGroup `protobuf:"group,4,opt,name=OptionalGroup" json:"optionalgroup,omitempty"` + // Types that are valid to be assigned to Union: + // *Test_Number + // *Test_Name + Union isTest_Union `protobuf_oneof:"union"` + XXX_unrecognized []byte `json:"-"` + } + func (m *Test) Reset() { *m = Test{} } + func (m *Test) String() string { return proto.CompactTextString(m) } + func (*Test) ProtoMessage() {} + + type isTest_Union interface { + isTest_Union() + } + + type Test_Number struct { + Number int32 `protobuf:"varint,6,opt,name=number"` + } + type Test_Name struct { + Name string `protobuf:"bytes,7,opt,name=name"` + } + + func (*Test_Number) isTest_Union() {} + func (*Test_Name) isTest_Union() {} + + func (m *Test) GetUnion() isTest_Union { + if m != nil { + return m.Union + } + return nil + } + const Default_Test_Type int32 = 77 + + func (m *Test) GetLabel() string { + if m != nil && m.Label != nil { + return *m.Label + } + return "" + } + + func (m *Test) GetType() int32 { + if m != nil && m.Type != nil { + return *m.Type + } + return Default_Test_Type + } + + func (m *Test) GetOptionalgroup() *Test_OptionalGroup { + if m != nil { + return m.Optionalgroup + } + return nil + } + + type Test_OptionalGroup struct { + RequiredField *string `protobuf:"bytes,5,req" json:"RequiredField,omitempty"` + } + func (m *Test_OptionalGroup) Reset() { *m = Test_OptionalGroup{} } + func (m *Test_OptionalGroup) String() string { return proto.CompactTextString(m) } + + func (m *Test_OptionalGroup) GetRequiredField() string { + if m != nil && m.RequiredField != nil { + return *m.RequiredField + } + return "" + } + + func (m *Test) GetNumber() int32 { + if x, ok := m.GetUnion().(*Test_Number); ok { + return x.Number + } + return 0 + } + + func (m *Test) GetName() string { + if x, ok := m.GetUnion().(*Test_Name); ok { + return x.Name + } + return "" + } + + func init() { + proto.RegisterEnum("example.FOO", FOO_name, FOO_value) + } + +To create and play with a Test object: + + package main + + import ( + "log" + + "github.com/golang/protobuf/proto" + pb "./example.pb" + ) + + func main() { + test := &pb.Test{ + Label: proto.String("hello"), + Type: proto.Int32(17), + Reps: []int64{1, 2, 3}, + Optionalgroup: &pb.Test_OptionalGroup{ + RequiredField: proto.String("good bye"), + }, + Union: &pb.Test_Name{"fred"}, + } + data, err := proto.Marshal(test) + if err != nil { + log.Fatal("marshaling error: ", err) + } + newTest := &pb.Test{} + err = proto.Unmarshal(data, newTest) + if err != nil { + log.Fatal("unmarshaling error: ", err) + } + // Now test and newTest contain the same data. + if test.GetLabel() != newTest.GetLabel() { + log.Fatalf("data mismatch %q != %q", test.GetLabel(), newTest.GetLabel()) + } + // Use a type switch to determine which oneof was set. + switch u := test.Union.(type) { + case *pb.Test_Number: // u.Number contains the number. + case *pb.Test_Name: // u.Name contains the string. + } + // etc. + } +*/ +package proto + +import ( + "encoding/json" + "fmt" + "log" + "reflect" + "sort" + "strconv" + "sync" +) + +// Message is implemented by generated protocol buffer messages. +type Message interface { + Reset() + String() string + ProtoMessage() +} + +// Stats records allocation details about the protocol buffer encoders +// and decoders. Useful for tuning the library itself. +type Stats struct { + Emalloc uint64 // mallocs in encode + Dmalloc uint64 // mallocs in decode + Encode uint64 // number of encodes + Decode uint64 // number of decodes + Chit uint64 // number of cache hits + Cmiss uint64 // number of cache misses + Size uint64 // number of sizes +} + +// Set to true to enable stats collection. +const collectStats = false + +var stats Stats + +// GetStats returns a copy of the global Stats structure. +func GetStats() Stats { return stats } + +// A Buffer is a buffer manager for marshaling and unmarshaling +// protocol buffers. It may be reused between invocations to +// reduce memory usage. It is not necessary to use a Buffer; +// the global functions Marshal and Unmarshal create a +// temporary Buffer and are fine for most applications. +type Buffer struct { + buf []byte // encode/decode byte stream + index int // read point + + // pools of basic types to amortize allocation. + bools []bool + uint32s []uint32 + uint64s []uint64 + + // extra pools, only used with pointer_reflect.go + int32s []int32 + int64s []int64 + float32s []float32 + float64s []float64 +} + +// NewBuffer allocates a new Buffer and initializes its internal data to +// the contents of the argument slice. +func NewBuffer(e []byte) *Buffer { + return &Buffer{buf: e} +} + +// Reset resets the Buffer, ready for marshaling a new protocol buffer. +func (p *Buffer) Reset() { + p.buf = p.buf[0:0] // for reading/writing + p.index = 0 // for reading +} + +// SetBuf replaces the internal buffer with the slice, +// ready for unmarshaling the contents of the slice. +func (p *Buffer) SetBuf(s []byte) { + p.buf = s + p.index = 0 +} + +// Bytes returns the contents of the Buffer. +func (p *Buffer) Bytes() []byte { return p.buf } + +/* + * Helper routines for simplifying the creation of optional fields of basic type. + */ + +// Bool is a helper routine that allocates a new bool value +// to store v and returns a pointer to it. +func Bool(v bool) *bool { + return &v +} + +// Int32 is a helper routine that allocates a new int32 value +// to store v and returns a pointer to it. +func Int32(v int32) *int32 { + return &v +} + +// Int is a helper routine that allocates a new int32 value +// to store v and returns a pointer to it, but unlike Int32 +// its argument value is an int. +func Int(v int) *int32 { + p := new(int32) + *p = int32(v) + return p +} + +// Int64 is a helper routine that allocates a new int64 value +// to store v and returns a pointer to it. +func Int64(v int64) *int64 { + return &v +} + +// Float32 is a helper routine that allocates a new float32 value +// to store v and returns a pointer to it. +func Float32(v float32) *float32 { + return &v +} + +// Float64 is a helper routine that allocates a new float64 value +// to store v and returns a pointer to it. +func Float64(v float64) *float64 { + return &v +} + +// Uint32 is a helper routine that allocates a new uint32 value +// to store v and returns a pointer to it. +func Uint32(v uint32) *uint32 { + return &v +} + +// Uint64 is a helper routine that allocates a new uint64 value +// to store v and returns a pointer to it. +func Uint64(v uint64) *uint64 { + return &v +} + +// String is a helper routine that allocates a new string value +// to store v and returns a pointer to it. +func String(v string) *string { + return &v +} + +// EnumName is a helper function to simplify printing protocol buffer enums +// by name. Given an enum map and a value, it returns a useful string. +func EnumName(m map[int32]string, v int32) string { + s, ok := m[v] + if ok { + return s + } + return strconv.Itoa(int(v)) +} + +// UnmarshalJSONEnum is a helper function to simplify recovering enum int values +// from their JSON-encoded representation. Given a map from the enum's symbolic +// names to its int values, and a byte buffer containing the JSON-encoded +// value, it returns an int32 that can be cast to the enum type by the caller. +// +// The function can deal with both JSON representations, numeric and symbolic. +func UnmarshalJSONEnum(m map[string]int32, data []byte, enumName string) (int32, error) { + if data[0] == '"' { + // New style: enums are strings. + var repr string + if err := json.Unmarshal(data, &repr); err != nil { + return -1, err + } + val, ok := m[repr] + if !ok { + return 0, fmt.Errorf("unrecognized enum %s value %q", enumName, repr) + } + return val, nil + } + // Old style: enums are ints. + var val int32 + if err := json.Unmarshal(data, &val); err != nil { + return 0, fmt.Errorf("cannot unmarshal %#q into enum %s", data, enumName) + } + return val, nil +} + +// DebugPrint dumps the encoded data in b in a debugging format with a header +// including the string s. Used in testing but made available for general debugging. +func (p *Buffer) DebugPrint(s string, b []byte) { + var u uint64 + + obuf := p.buf + index := p.index + p.buf = b + p.index = 0 + depth := 0 + + fmt.Printf("\n--- %s ---\n", s) + +out: + for { + for i := 0; i < depth; i++ { + fmt.Print(" ") + } + + index := p.index + if index == len(p.buf) { + break + } + + op, err := p.DecodeVarint() + if err != nil { + fmt.Printf("%3d: fetching op err %v\n", index, err) + break out + } + tag := op >> 3 + wire := op & 7 + + switch wire { + default: + fmt.Printf("%3d: t=%3d unknown wire=%d\n", + index, tag, wire) + break out + + case WireBytes: + var r []byte + + r, err = p.DecodeRawBytes(false) + if err != nil { + break out + } + fmt.Printf("%3d: t=%3d bytes [%d]", index, tag, len(r)) + if len(r) <= 6 { + for i := 0; i < len(r); i++ { + fmt.Printf(" %.2x", r[i]) + } + } else { + for i := 0; i < 3; i++ { + fmt.Printf(" %.2x", r[i]) + } + fmt.Printf(" ..") + for i := len(r) - 3; i < len(r); i++ { + fmt.Printf(" %.2x", r[i]) + } + } + fmt.Printf("\n") + + case WireFixed32: + u, err = p.DecodeFixed32() + if err != nil { + fmt.Printf("%3d: t=%3d fix32 err %v\n", index, tag, err) + break out + } + fmt.Printf("%3d: t=%3d fix32 %d\n", index, tag, u) + + case WireFixed64: + u, err = p.DecodeFixed64() + if err != nil { + fmt.Printf("%3d: t=%3d fix64 err %v\n", index, tag, err) + break out + } + fmt.Printf("%3d: t=%3d fix64 %d\n", index, tag, u) + + case WireVarint: + u, err = p.DecodeVarint() + if err != nil { + fmt.Printf("%3d: t=%3d varint err %v\n", index, tag, err) + break out + } + fmt.Printf("%3d: t=%3d varint %d\n", index, tag, u) + + case WireStartGroup: + fmt.Printf("%3d: t=%3d start\n", index, tag) + depth++ + + case WireEndGroup: + depth-- + fmt.Printf("%3d: t=%3d end\n", index, tag) + } + } + + if depth != 0 { + fmt.Printf("%3d: start-end not balanced %d\n", p.index, depth) + } + fmt.Printf("\n") + + p.buf = obuf + p.index = index +} + +// SetDefaults sets unset protocol buffer fields to their default values. +// It only modifies fields that are both unset and have defined defaults. +// It recursively sets default values in any non-nil sub-messages. +func SetDefaults(pb Message) { + setDefaults(reflect.ValueOf(pb), true, false) +} + +// v is a pointer to a struct. +func setDefaults(v reflect.Value, recur, zeros bool) { + v = v.Elem() + + defaultMu.RLock() + dm, ok := defaults[v.Type()] + defaultMu.RUnlock() + if !ok { + dm = buildDefaultMessage(v.Type()) + defaultMu.Lock() + defaults[v.Type()] = dm + defaultMu.Unlock() + } + + for _, sf := range dm.scalars { + f := v.Field(sf.index) + if !f.IsNil() { + // field already set + continue + } + dv := sf.value + if dv == nil && !zeros { + // no explicit default, and don't want to set zeros + continue + } + fptr := f.Addr().Interface() // **T + // TODO: Consider batching the allocations we do here. + switch sf.kind { + case reflect.Bool: + b := new(bool) + if dv != nil { + *b = dv.(bool) + } + *(fptr.(**bool)) = b + case reflect.Float32: + f := new(float32) + if dv != nil { + *f = dv.(float32) + } + *(fptr.(**float32)) = f + case reflect.Float64: + f := new(float64) + if dv != nil { + *f = dv.(float64) + } + *(fptr.(**float64)) = f + case reflect.Int32: + // might be an enum + if ft := f.Type(); ft != int32PtrType { + // enum + f.Set(reflect.New(ft.Elem())) + if dv != nil { + f.Elem().SetInt(int64(dv.(int32))) + } + } else { + // int32 field + i := new(int32) + if dv != nil { + *i = dv.(int32) + } + *(fptr.(**int32)) = i + } + case reflect.Int64: + i := new(int64) + if dv != nil { + *i = dv.(int64) + } + *(fptr.(**int64)) = i + case reflect.String: + s := new(string) + if dv != nil { + *s = dv.(string) + } + *(fptr.(**string)) = s + case reflect.Uint8: + // exceptional case: []byte + var b []byte + if dv != nil { + db := dv.([]byte) + b = make([]byte, len(db)) + copy(b, db) + } else { + b = []byte{} + } + *(fptr.(*[]byte)) = b + case reflect.Uint32: + u := new(uint32) + if dv != nil { + *u = dv.(uint32) + } + *(fptr.(**uint32)) = u + case reflect.Uint64: + u := new(uint64) + if dv != nil { + *u = dv.(uint64) + } + *(fptr.(**uint64)) = u + default: + log.Printf("proto: can't set default for field %v (sf.kind=%v)", f, sf.kind) + } + } + + for _, ni := range dm.nested { + f := v.Field(ni) + // f is *T or []*T or map[T]*T + switch f.Kind() { + case reflect.Ptr: + if f.IsNil() { + continue + } + setDefaults(f, recur, zeros) + + case reflect.Slice: + for i := 0; i < f.Len(); i++ { + e := f.Index(i) + if e.IsNil() { + continue + } + setDefaults(e, recur, zeros) + } + + case reflect.Map: + for _, k := range f.MapKeys() { + e := f.MapIndex(k) + if e.IsNil() { + continue + } + setDefaults(e, recur, zeros) + } + } + } +} + +var ( + // defaults maps a protocol buffer struct type to a slice of the fields, + // with its scalar fields set to their proto-declared non-zero default values. + defaultMu sync.RWMutex + defaults = make(map[reflect.Type]defaultMessage) + + int32PtrType = reflect.TypeOf((*int32)(nil)) +) + +// defaultMessage represents information about the default values of a message. +type defaultMessage struct { + scalars []scalarField + nested []int // struct field index of nested messages +} + +type scalarField struct { + index int // struct field index + kind reflect.Kind // element type (the T in *T or []T) + value interface{} // the proto-declared default value, or nil +} + +// t is a struct type. +func buildDefaultMessage(t reflect.Type) (dm defaultMessage) { + sprop := GetProperties(t) + for _, prop := range sprop.Prop { + fi, ok := sprop.decoderTags.get(prop.Tag) + if !ok { + // XXX_unrecognized + continue + } + ft := t.Field(fi).Type + + sf, nested, err := fieldDefault(ft, prop) + switch { + case err != nil: + log.Print(err) + case nested: + dm.nested = append(dm.nested, fi) + case sf != nil: + sf.index = fi + dm.scalars = append(dm.scalars, *sf) + } + } + + return dm +} + +// fieldDefault returns the scalarField for field type ft. +// sf will be nil if the field can not have a default. +// nestedMessage will be true if this is a nested message. +// Note that sf.index is not set on return. +func fieldDefault(ft reflect.Type, prop *Properties) (sf *scalarField, nestedMessage bool, err error) { + var canHaveDefault bool + switch ft.Kind() { + case reflect.Ptr: + if ft.Elem().Kind() == reflect.Struct { + nestedMessage = true + } else { + canHaveDefault = true // proto2 scalar field + } + + case reflect.Slice: + switch ft.Elem().Kind() { + case reflect.Ptr: + nestedMessage = true // repeated message + case reflect.Uint8: + canHaveDefault = true // bytes field + } + + case reflect.Map: + if ft.Elem().Kind() == reflect.Ptr { + nestedMessage = true // map with message values + } + } + + if !canHaveDefault { + if nestedMessage { + return nil, true, nil + } + return nil, false, nil + } + + // We now know that ft is a pointer or slice. + sf = &scalarField{kind: ft.Elem().Kind()} + + // scalar fields without defaults + if !prop.HasDefault { + return sf, false, nil + } + + // a scalar field: either *T or []byte + switch ft.Elem().Kind() { + case reflect.Bool: + x, err := strconv.ParseBool(prop.Default) + if err != nil { + return nil, false, fmt.Errorf("proto: bad default bool %q: %v", prop.Default, err) + } + sf.value = x + case reflect.Float32: + x, err := strconv.ParseFloat(prop.Default, 32) + if err != nil { + return nil, false, fmt.Errorf("proto: bad default float32 %q: %v", prop.Default, err) + } + sf.value = float32(x) + case reflect.Float64: + x, err := strconv.ParseFloat(prop.Default, 64) + if err != nil { + return nil, false, fmt.Errorf("proto: bad default float64 %q: %v", prop.Default, err) + } + sf.value = x + case reflect.Int32: + x, err := strconv.ParseInt(prop.Default, 10, 32) + if err != nil { + return nil, false, fmt.Errorf("proto: bad default int32 %q: %v", prop.Default, err) + } + sf.value = int32(x) + case reflect.Int64: + x, err := strconv.ParseInt(prop.Default, 10, 64) + if err != nil { + return nil, false, fmt.Errorf("proto: bad default int64 %q: %v", prop.Default, err) + } + sf.value = x + case reflect.String: + sf.value = prop.Default + case reflect.Uint8: + // []byte (not *uint8) + sf.value = []byte(prop.Default) + case reflect.Uint32: + x, err := strconv.ParseUint(prop.Default, 10, 32) + if err != nil { + return nil, false, fmt.Errorf("proto: bad default uint32 %q: %v", prop.Default, err) + } + sf.value = uint32(x) + case reflect.Uint64: + x, err := strconv.ParseUint(prop.Default, 10, 64) + if err != nil { + return nil, false, fmt.Errorf("proto: bad default uint64 %q: %v", prop.Default, err) + } + sf.value = x + default: + return nil, false, fmt.Errorf("proto: unhandled def kind %v", ft.Elem().Kind()) + } + + return sf, false, nil +} + +// Map fields may have key types of non-float scalars, strings and enums. +// The easiest way to sort them in some deterministic order is to use fmt. +// If this turns out to be inefficient we can always consider other options, +// such as doing a Schwartzian transform. + +func mapKeys(vs []reflect.Value) sort.Interface { + s := mapKeySorter{ + vs: vs, + // default Less function: textual comparison + less: func(a, b reflect.Value) bool { + return fmt.Sprint(a.Interface()) < fmt.Sprint(b.Interface()) + }, + } + + // Type specialization per https://developers.google.com/protocol-buffers/docs/proto#maps; + // numeric keys are sorted numerically. + if len(vs) == 0 { + return s + } + switch vs[0].Kind() { + case reflect.Int32, reflect.Int64: + s.less = func(a, b reflect.Value) bool { return a.Int() < b.Int() } + case reflect.Uint32, reflect.Uint64: + s.less = func(a, b reflect.Value) bool { return a.Uint() < b.Uint() } + } + + return s +} + +type mapKeySorter struct { + vs []reflect.Value + less func(a, b reflect.Value) bool +} + +func (s mapKeySorter) Len() int { return len(s.vs) } +func (s mapKeySorter) Swap(i, j int) { s.vs[i], s.vs[j] = s.vs[j], s.vs[i] } +func (s mapKeySorter) Less(i, j int) bool { + return s.less(s.vs[i], s.vs[j]) +} + +// isProto3Zero reports whether v is a zero proto3 value. +func isProto3Zero(v reflect.Value) bool { + switch v.Kind() { + case reflect.Bool: + return !v.Bool() + case reflect.Int32, reflect.Int64: + return v.Int() == 0 + case reflect.Uint32, reflect.Uint64: + return v.Uint() == 0 + case reflect.Float32, reflect.Float64: + return v.Float() == 0 + case reflect.String: + return v.String() == "" + } + return false +} + +// ProtoPackageIsVersion2 is referenced from generated protocol buffer files +// to assert that that code is compatible with this version of the proto package. +const ProtoPackageIsVersion2 = true + +// ProtoPackageIsVersion1 is referenced from generated protocol buffer files +// to assert that that code is compatible with this version of the proto package. +const ProtoPackageIsVersion1 = true diff --git a/vendor/github.com/golang/protobuf/proto/message_set.go b/vendor/github.com/golang/protobuf/proto/message_set.go new file mode 100644 index 0000000..fd982de --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/message_set.go @@ -0,0 +1,311 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2010 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package proto + +/* + * Support for message sets. + */ + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "reflect" + "sort" +) + +// errNoMessageTypeID occurs when a protocol buffer does not have a message type ID. +// A message type ID is required for storing a protocol buffer in a message set. +var errNoMessageTypeID = errors.New("proto does not have a message type ID") + +// The first two types (_MessageSet_Item and messageSet) +// model what the protocol compiler produces for the following protocol message: +// message MessageSet { +// repeated group Item = 1 { +// required int32 type_id = 2; +// required string message = 3; +// }; +// } +// That is the MessageSet wire format. We can't use a proto to generate these +// because that would introduce a circular dependency between it and this package. + +type _MessageSet_Item struct { + TypeId *int32 `protobuf:"varint,2,req,name=type_id"` + Message []byte `protobuf:"bytes,3,req,name=message"` +} + +type messageSet struct { + Item []*_MessageSet_Item `protobuf:"group,1,rep"` + XXX_unrecognized []byte + // TODO: caching? +} + +// Make sure messageSet is a Message. +var _ Message = (*messageSet)(nil) + +// messageTypeIder is an interface satisfied by a protocol buffer type +// that may be stored in a MessageSet. +type messageTypeIder interface { + MessageTypeId() int32 +} + +func (ms *messageSet) find(pb Message) *_MessageSet_Item { + mti, ok := pb.(messageTypeIder) + if !ok { + return nil + } + id := mti.MessageTypeId() + for _, item := range ms.Item { + if *item.TypeId == id { + return item + } + } + return nil +} + +func (ms *messageSet) Has(pb Message) bool { + if ms.find(pb) != nil { + return true + } + return false +} + +func (ms *messageSet) Unmarshal(pb Message) error { + if item := ms.find(pb); item != nil { + return Unmarshal(item.Message, pb) + } + if _, ok := pb.(messageTypeIder); !ok { + return errNoMessageTypeID + } + return nil // TODO: return error instead? +} + +func (ms *messageSet) Marshal(pb Message) error { + msg, err := Marshal(pb) + if err != nil { + return err + } + if item := ms.find(pb); item != nil { + // reuse existing item + item.Message = msg + return nil + } + + mti, ok := pb.(messageTypeIder) + if !ok { + return errNoMessageTypeID + } + + mtid := mti.MessageTypeId() + ms.Item = append(ms.Item, &_MessageSet_Item{ + TypeId: &mtid, + Message: msg, + }) + return nil +} + +func (ms *messageSet) Reset() { *ms = messageSet{} } +func (ms *messageSet) String() string { return CompactTextString(ms) } +func (*messageSet) ProtoMessage() {} + +// Support for the message_set_wire_format message option. + +func skipVarint(buf []byte) []byte { + i := 0 + for ; buf[i]&0x80 != 0; i++ { + } + return buf[i+1:] +} + +// MarshalMessageSet encodes the extension map represented by m in the message set wire format. +// It is called by generated Marshal methods on protocol buffer messages with the message_set_wire_format option. +func MarshalMessageSet(exts interface{}) ([]byte, error) { + var m map[int32]Extension + switch exts := exts.(type) { + case *XXX_InternalExtensions: + if err := encodeExtensions(exts); err != nil { + return nil, err + } + m, _ = exts.extensionsRead() + case map[int32]Extension: + if err := encodeExtensionsMap(exts); err != nil { + return nil, err + } + m = exts + default: + return nil, errors.New("proto: not an extension map") + } + + // Sort extension IDs to provide a deterministic encoding. + // See also enc_map in encode.go. + ids := make([]int, 0, len(m)) + for id := range m { + ids = append(ids, int(id)) + } + sort.Ints(ids) + + ms := &messageSet{Item: make([]*_MessageSet_Item, 0, len(m))} + for _, id := range ids { + e := m[int32(id)] + // Remove the wire type and field number varint, as well as the length varint. + msg := skipVarint(skipVarint(e.enc)) + + ms.Item = append(ms.Item, &_MessageSet_Item{ + TypeId: Int32(int32(id)), + Message: msg, + }) + } + return Marshal(ms) +} + +// UnmarshalMessageSet decodes the extension map encoded in buf in the message set wire format. +// It is called by generated Unmarshal methods on protocol buffer messages with the message_set_wire_format option. +func UnmarshalMessageSet(buf []byte, exts interface{}) error { + var m map[int32]Extension + switch exts := exts.(type) { + case *XXX_InternalExtensions: + m = exts.extensionsWrite() + case map[int32]Extension: + m = exts + default: + return errors.New("proto: not an extension map") + } + + ms := new(messageSet) + if err := Unmarshal(buf, ms); err != nil { + return err + } + for _, item := range ms.Item { + id := *item.TypeId + msg := item.Message + + // Restore wire type and field number varint, plus length varint. + // Be careful to preserve duplicate items. + b := EncodeVarint(uint64(id)<<3 | WireBytes) + if ext, ok := m[id]; ok { + // Existing data; rip off the tag and length varint + // so we join the new data correctly. + // We can assume that ext.enc is set because we are unmarshaling. + o := ext.enc[len(b):] // skip wire type and field number + _, n := DecodeVarint(o) // calculate length of length varint + o = o[n:] // skip length varint + msg = append(o, msg...) // join old data and new data + } + b = append(b, EncodeVarint(uint64(len(msg)))...) + b = append(b, msg...) + + m[id] = Extension{enc: b} + } + return nil +} + +// MarshalMessageSetJSON encodes the extension map represented by m in JSON format. +// It is called by generated MarshalJSON methods on protocol buffer messages with the message_set_wire_format option. +func MarshalMessageSetJSON(exts interface{}) ([]byte, error) { + var m map[int32]Extension + switch exts := exts.(type) { + case *XXX_InternalExtensions: + m, _ = exts.extensionsRead() + case map[int32]Extension: + m = exts + default: + return nil, errors.New("proto: not an extension map") + } + var b bytes.Buffer + b.WriteByte('{') + + // Process the map in key order for deterministic output. + ids := make([]int32, 0, len(m)) + for id := range m { + ids = append(ids, id) + } + sort.Sort(int32Slice(ids)) // int32Slice defined in text.go + + for i, id := range ids { + ext := m[id] + if i > 0 { + b.WriteByte(',') + } + + msd, ok := messageSetMap[id] + if !ok { + // Unknown type; we can't render it, so skip it. + continue + } + fmt.Fprintf(&b, `"[%s]":`, msd.name) + + x := ext.value + if x == nil { + x = reflect.New(msd.t.Elem()).Interface() + if err := Unmarshal(ext.enc, x.(Message)); err != nil { + return nil, err + } + } + d, err := json.Marshal(x) + if err != nil { + return nil, err + } + b.Write(d) + } + b.WriteByte('}') + return b.Bytes(), nil +} + +// UnmarshalMessageSetJSON decodes the extension map encoded in buf in JSON format. +// It is called by generated UnmarshalJSON methods on protocol buffer messages with the message_set_wire_format option. +func UnmarshalMessageSetJSON(buf []byte, exts interface{}) error { + // Common-case fast path. + if len(buf) == 0 || bytes.Equal(buf, []byte("{}")) { + return nil + } + + // This is fairly tricky, and it's not clear that it is needed. + return errors.New("TODO: UnmarshalMessageSetJSON not yet implemented") +} + +// A global registry of types that can be used in a MessageSet. + +var messageSetMap = make(map[int32]messageSetDesc) + +type messageSetDesc struct { + t reflect.Type // pointer to struct + name string +} + +// RegisterMessageSetType is called from the generated code. +func RegisterMessageSetType(m Message, fieldNum int32, name string) { + messageSetMap[fieldNum] = messageSetDesc{ + t: reflect.TypeOf(m), + name: name, + } +} diff --git a/vendor/github.com/golang/protobuf/proto/pointer_reflect.go b/vendor/github.com/golang/protobuf/proto/pointer_reflect.go new file mode 100644 index 0000000..fb512e2 --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/pointer_reflect.go @@ -0,0 +1,484 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2012 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +build appengine js + +// This file contains an implementation of proto field accesses using package reflect. +// It is slower than the code in pointer_unsafe.go but it avoids package unsafe and can +// be used on App Engine. + +package proto + +import ( + "math" + "reflect" +) + +// A structPointer is a pointer to a struct. +type structPointer struct { + v reflect.Value +} + +// toStructPointer returns a structPointer equivalent to the given reflect value. +// The reflect value must itself be a pointer to a struct. +func toStructPointer(v reflect.Value) structPointer { + return structPointer{v} +} + +// IsNil reports whether p is nil. +func structPointer_IsNil(p structPointer) bool { + return p.v.IsNil() +} + +// Interface returns the struct pointer as an interface value. +func structPointer_Interface(p structPointer, _ reflect.Type) interface{} { + return p.v.Interface() +} + +// A field identifies a field in a struct, accessible from a structPointer. +// In this implementation, a field is identified by the sequence of field indices +// passed to reflect's FieldByIndex. +type field []int + +// toField returns a field equivalent to the given reflect field. +func toField(f *reflect.StructField) field { + return f.Index +} + +// invalidField is an invalid field identifier. +var invalidField = field(nil) + +// IsValid reports whether the field identifier is valid. +func (f field) IsValid() bool { return f != nil } + +// field returns the given field in the struct as a reflect value. +func structPointer_field(p structPointer, f field) reflect.Value { + // Special case: an extension map entry with a value of type T + // passes a *T to the struct-handling code with a zero field, + // expecting that it will be treated as equivalent to *struct{ X T }, + // which has the same memory layout. We have to handle that case + // specially, because reflect will panic if we call FieldByIndex on a + // non-struct. + if f == nil { + return p.v.Elem() + } + + return p.v.Elem().FieldByIndex(f) +} + +// ifield returns the given field in the struct as an interface value. +func structPointer_ifield(p structPointer, f field) interface{} { + return structPointer_field(p, f).Addr().Interface() +} + +// Bytes returns the address of a []byte field in the struct. +func structPointer_Bytes(p structPointer, f field) *[]byte { + return structPointer_ifield(p, f).(*[]byte) +} + +// BytesSlice returns the address of a [][]byte field in the struct. +func structPointer_BytesSlice(p structPointer, f field) *[][]byte { + return structPointer_ifield(p, f).(*[][]byte) +} + +// Bool returns the address of a *bool field in the struct. +func structPointer_Bool(p structPointer, f field) **bool { + return structPointer_ifield(p, f).(**bool) +} + +// BoolVal returns the address of a bool field in the struct. +func structPointer_BoolVal(p structPointer, f field) *bool { + return structPointer_ifield(p, f).(*bool) +} + +// BoolSlice returns the address of a []bool field in the struct. +func structPointer_BoolSlice(p structPointer, f field) *[]bool { + return structPointer_ifield(p, f).(*[]bool) +} + +// String returns the address of a *string field in the struct. +func structPointer_String(p structPointer, f field) **string { + return structPointer_ifield(p, f).(**string) +} + +// StringVal returns the address of a string field in the struct. +func structPointer_StringVal(p structPointer, f field) *string { + return structPointer_ifield(p, f).(*string) +} + +// StringSlice returns the address of a []string field in the struct. +func structPointer_StringSlice(p structPointer, f field) *[]string { + return structPointer_ifield(p, f).(*[]string) +} + +// Extensions returns the address of an extension map field in the struct. +func structPointer_Extensions(p structPointer, f field) *XXX_InternalExtensions { + return structPointer_ifield(p, f).(*XXX_InternalExtensions) +} + +// ExtMap returns the address of an extension map field in the struct. +func structPointer_ExtMap(p structPointer, f field) *map[int32]Extension { + return structPointer_ifield(p, f).(*map[int32]Extension) +} + +// NewAt returns the reflect.Value for a pointer to a field in the struct. +func structPointer_NewAt(p structPointer, f field, typ reflect.Type) reflect.Value { + return structPointer_field(p, f).Addr() +} + +// SetStructPointer writes a *struct field in the struct. +func structPointer_SetStructPointer(p structPointer, f field, q structPointer) { + structPointer_field(p, f).Set(q.v) +} + +// GetStructPointer reads a *struct field in the struct. +func structPointer_GetStructPointer(p structPointer, f field) structPointer { + return structPointer{structPointer_field(p, f)} +} + +// StructPointerSlice the address of a []*struct field in the struct. +func structPointer_StructPointerSlice(p structPointer, f field) structPointerSlice { + return structPointerSlice{structPointer_field(p, f)} +} + +// A structPointerSlice represents the address of a slice of pointers to structs +// (themselves messages or groups). That is, v.Type() is *[]*struct{...}. +type structPointerSlice struct { + v reflect.Value +} + +func (p structPointerSlice) Len() int { return p.v.Len() } +func (p structPointerSlice) Index(i int) structPointer { return structPointer{p.v.Index(i)} } +func (p structPointerSlice) Append(q structPointer) { + p.v.Set(reflect.Append(p.v, q.v)) +} + +var ( + int32Type = reflect.TypeOf(int32(0)) + uint32Type = reflect.TypeOf(uint32(0)) + float32Type = reflect.TypeOf(float32(0)) + int64Type = reflect.TypeOf(int64(0)) + uint64Type = reflect.TypeOf(uint64(0)) + float64Type = reflect.TypeOf(float64(0)) +) + +// A word32 represents a field of type *int32, *uint32, *float32, or *enum. +// That is, v.Type() is *int32, *uint32, *float32, or *enum and v is assignable. +type word32 struct { + v reflect.Value +} + +// IsNil reports whether p is nil. +func word32_IsNil(p word32) bool { + return p.v.IsNil() +} + +// Set sets p to point at a newly allocated word with bits set to x. +func word32_Set(p word32, o *Buffer, x uint32) { + t := p.v.Type().Elem() + switch t { + case int32Type: + if len(o.int32s) == 0 { + o.int32s = make([]int32, uint32PoolSize) + } + o.int32s[0] = int32(x) + p.v.Set(reflect.ValueOf(&o.int32s[0])) + o.int32s = o.int32s[1:] + return + case uint32Type: + if len(o.uint32s) == 0 { + o.uint32s = make([]uint32, uint32PoolSize) + } + o.uint32s[0] = x + p.v.Set(reflect.ValueOf(&o.uint32s[0])) + o.uint32s = o.uint32s[1:] + return + case float32Type: + if len(o.float32s) == 0 { + o.float32s = make([]float32, uint32PoolSize) + } + o.float32s[0] = math.Float32frombits(x) + p.v.Set(reflect.ValueOf(&o.float32s[0])) + o.float32s = o.float32s[1:] + return + } + + // must be enum + p.v.Set(reflect.New(t)) + p.v.Elem().SetInt(int64(int32(x))) +} + +// Get gets the bits pointed at by p, as a uint32. +func word32_Get(p word32) uint32 { + elem := p.v.Elem() + switch elem.Kind() { + case reflect.Int32: + return uint32(elem.Int()) + case reflect.Uint32: + return uint32(elem.Uint()) + case reflect.Float32: + return math.Float32bits(float32(elem.Float())) + } + panic("unreachable") +} + +// Word32 returns a reference to a *int32, *uint32, *float32, or *enum field in the struct. +func structPointer_Word32(p structPointer, f field) word32 { + return word32{structPointer_field(p, f)} +} + +// A word32Val represents a field of type int32, uint32, float32, or enum. +// That is, v.Type() is int32, uint32, float32, or enum and v is assignable. +type word32Val struct { + v reflect.Value +} + +// Set sets *p to x. +func word32Val_Set(p word32Val, x uint32) { + switch p.v.Type() { + case int32Type: + p.v.SetInt(int64(x)) + return + case uint32Type: + p.v.SetUint(uint64(x)) + return + case float32Type: + p.v.SetFloat(float64(math.Float32frombits(x))) + return + } + + // must be enum + p.v.SetInt(int64(int32(x))) +} + +// Get gets the bits pointed at by p, as a uint32. +func word32Val_Get(p word32Val) uint32 { + elem := p.v + switch elem.Kind() { + case reflect.Int32: + return uint32(elem.Int()) + case reflect.Uint32: + return uint32(elem.Uint()) + case reflect.Float32: + return math.Float32bits(float32(elem.Float())) + } + panic("unreachable") +} + +// Word32Val returns a reference to a int32, uint32, float32, or enum field in the struct. +func structPointer_Word32Val(p structPointer, f field) word32Val { + return word32Val{structPointer_field(p, f)} +} + +// A word32Slice is a slice of 32-bit values. +// That is, v.Type() is []int32, []uint32, []float32, or []enum. +type word32Slice struct { + v reflect.Value +} + +func (p word32Slice) Append(x uint32) { + n, m := p.v.Len(), p.v.Cap() + if n < m { + p.v.SetLen(n + 1) + } else { + t := p.v.Type().Elem() + p.v.Set(reflect.Append(p.v, reflect.Zero(t))) + } + elem := p.v.Index(n) + switch elem.Kind() { + case reflect.Int32: + elem.SetInt(int64(int32(x))) + case reflect.Uint32: + elem.SetUint(uint64(x)) + case reflect.Float32: + elem.SetFloat(float64(math.Float32frombits(x))) + } +} + +func (p word32Slice) Len() int { + return p.v.Len() +} + +func (p word32Slice) Index(i int) uint32 { + elem := p.v.Index(i) + switch elem.Kind() { + case reflect.Int32: + return uint32(elem.Int()) + case reflect.Uint32: + return uint32(elem.Uint()) + case reflect.Float32: + return math.Float32bits(float32(elem.Float())) + } + panic("unreachable") +} + +// Word32Slice returns a reference to a []int32, []uint32, []float32, or []enum field in the struct. +func structPointer_Word32Slice(p structPointer, f field) word32Slice { + return word32Slice{structPointer_field(p, f)} +} + +// word64 is like word32 but for 64-bit values. +type word64 struct { + v reflect.Value +} + +func word64_Set(p word64, o *Buffer, x uint64) { + t := p.v.Type().Elem() + switch t { + case int64Type: + if len(o.int64s) == 0 { + o.int64s = make([]int64, uint64PoolSize) + } + o.int64s[0] = int64(x) + p.v.Set(reflect.ValueOf(&o.int64s[0])) + o.int64s = o.int64s[1:] + return + case uint64Type: + if len(o.uint64s) == 0 { + o.uint64s = make([]uint64, uint64PoolSize) + } + o.uint64s[0] = x + p.v.Set(reflect.ValueOf(&o.uint64s[0])) + o.uint64s = o.uint64s[1:] + return + case float64Type: + if len(o.float64s) == 0 { + o.float64s = make([]float64, uint64PoolSize) + } + o.float64s[0] = math.Float64frombits(x) + p.v.Set(reflect.ValueOf(&o.float64s[0])) + o.float64s = o.float64s[1:] + return + } + panic("unreachable") +} + +func word64_IsNil(p word64) bool { + return p.v.IsNil() +} + +func word64_Get(p word64) uint64 { + elem := p.v.Elem() + switch elem.Kind() { + case reflect.Int64: + return uint64(elem.Int()) + case reflect.Uint64: + return elem.Uint() + case reflect.Float64: + return math.Float64bits(elem.Float()) + } + panic("unreachable") +} + +func structPointer_Word64(p structPointer, f field) word64 { + return word64{structPointer_field(p, f)} +} + +// word64Val is like word32Val but for 64-bit values. +type word64Val struct { + v reflect.Value +} + +func word64Val_Set(p word64Val, o *Buffer, x uint64) { + switch p.v.Type() { + case int64Type: + p.v.SetInt(int64(x)) + return + case uint64Type: + p.v.SetUint(x) + return + case float64Type: + p.v.SetFloat(math.Float64frombits(x)) + return + } + panic("unreachable") +} + +func word64Val_Get(p word64Val) uint64 { + elem := p.v + switch elem.Kind() { + case reflect.Int64: + return uint64(elem.Int()) + case reflect.Uint64: + return elem.Uint() + case reflect.Float64: + return math.Float64bits(elem.Float()) + } + panic("unreachable") +} + +func structPointer_Word64Val(p structPointer, f field) word64Val { + return word64Val{structPointer_field(p, f)} +} + +type word64Slice struct { + v reflect.Value +} + +func (p word64Slice) Append(x uint64) { + n, m := p.v.Len(), p.v.Cap() + if n < m { + p.v.SetLen(n + 1) + } else { + t := p.v.Type().Elem() + p.v.Set(reflect.Append(p.v, reflect.Zero(t))) + } + elem := p.v.Index(n) + switch elem.Kind() { + case reflect.Int64: + elem.SetInt(int64(int64(x))) + case reflect.Uint64: + elem.SetUint(uint64(x)) + case reflect.Float64: + elem.SetFloat(float64(math.Float64frombits(x))) + } +} + +func (p word64Slice) Len() int { + return p.v.Len() +} + +func (p word64Slice) Index(i int) uint64 { + elem := p.v.Index(i) + switch elem.Kind() { + case reflect.Int64: + return uint64(elem.Int()) + case reflect.Uint64: + return uint64(elem.Uint()) + case reflect.Float64: + return math.Float64bits(float64(elem.Float())) + } + panic("unreachable") +} + +func structPointer_Word64Slice(p structPointer, f field) word64Slice { + return word64Slice{structPointer_field(p, f)} +} diff --git a/vendor/github.com/golang/protobuf/proto/pointer_unsafe.go b/vendor/github.com/golang/protobuf/proto/pointer_unsafe.go new file mode 100644 index 0000000..6b5567d --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/pointer_unsafe.go @@ -0,0 +1,270 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2012 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +build !appengine,!js + +// This file contains the implementation of the proto field accesses using package unsafe. + +package proto + +import ( + "reflect" + "unsafe" +) + +// NOTE: These type_Foo functions would more idiomatically be methods, +// but Go does not allow methods on pointer types, and we must preserve +// some pointer type for the garbage collector. We use these +// funcs with clunky names as our poor approximation to methods. +// +// An alternative would be +// type structPointer struct { p unsafe.Pointer } +// but that does not registerize as well. + +// A structPointer is a pointer to a struct. +type structPointer unsafe.Pointer + +// toStructPointer returns a structPointer equivalent to the given reflect value. +func toStructPointer(v reflect.Value) structPointer { + return structPointer(unsafe.Pointer(v.Pointer())) +} + +// IsNil reports whether p is nil. +func structPointer_IsNil(p structPointer) bool { + return p == nil +} + +// Interface returns the struct pointer, assumed to have element type t, +// as an interface value. +func structPointer_Interface(p structPointer, t reflect.Type) interface{} { + return reflect.NewAt(t, unsafe.Pointer(p)).Interface() +} + +// A field identifies a field in a struct, accessible from a structPointer. +// In this implementation, a field is identified by its byte offset from the start of the struct. +type field uintptr + +// toField returns a field equivalent to the given reflect field. +func toField(f *reflect.StructField) field { + return field(f.Offset) +} + +// invalidField is an invalid field identifier. +const invalidField = ^field(0) + +// IsValid reports whether the field identifier is valid. +func (f field) IsValid() bool { + return f != ^field(0) +} + +// Bytes returns the address of a []byte field in the struct. +func structPointer_Bytes(p structPointer, f field) *[]byte { + return (*[]byte)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// BytesSlice returns the address of a [][]byte field in the struct. +func structPointer_BytesSlice(p structPointer, f field) *[][]byte { + return (*[][]byte)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// Bool returns the address of a *bool field in the struct. +func structPointer_Bool(p structPointer, f field) **bool { + return (**bool)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// BoolVal returns the address of a bool field in the struct. +func structPointer_BoolVal(p structPointer, f field) *bool { + return (*bool)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// BoolSlice returns the address of a []bool field in the struct. +func structPointer_BoolSlice(p structPointer, f field) *[]bool { + return (*[]bool)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// String returns the address of a *string field in the struct. +func structPointer_String(p structPointer, f field) **string { + return (**string)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// StringVal returns the address of a string field in the struct. +func structPointer_StringVal(p structPointer, f field) *string { + return (*string)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// StringSlice returns the address of a []string field in the struct. +func structPointer_StringSlice(p structPointer, f field) *[]string { + return (*[]string)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// ExtMap returns the address of an extension map field in the struct. +func structPointer_Extensions(p structPointer, f field) *XXX_InternalExtensions { + return (*XXX_InternalExtensions)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +func structPointer_ExtMap(p structPointer, f field) *map[int32]Extension { + return (*map[int32]Extension)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// NewAt returns the reflect.Value for a pointer to a field in the struct. +func structPointer_NewAt(p structPointer, f field, typ reflect.Type) reflect.Value { + return reflect.NewAt(typ, unsafe.Pointer(uintptr(p)+uintptr(f))) +} + +// SetStructPointer writes a *struct field in the struct. +func structPointer_SetStructPointer(p structPointer, f field, q structPointer) { + *(*structPointer)(unsafe.Pointer(uintptr(p) + uintptr(f))) = q +} + +// GetStructPointer reads a *struct field in the struct. +func structPointer_GetStructPointer(p structPointer, f field) structPointer { + return *(*structPointer)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// StructPointerSlice the address of a []*struct field in the struct. +func structPointer_StructPointerSlice(p structPointer, f field) *structPointerSlice { + return (*structPointerSlice)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// A structPointerSlice represents a slice of pointers to structs (themselves submessages or groups). +type structPointerSlice []structPointer + +func (v *structPointerSlice) Len() int { return len(*v) } +func (v *structPointerSlice) Index(i int) structPointer { return (*v)[i] } +func (v *structPointerSlice) Append(p structPointer) { *v = append(*v, p) } + +// A word32 is the address of a "pointer to 32-bit value" field. +type word32 **uint32 + +// IsNil reports whether *v is nil. +func word32_IsNil(p word32) bool { + return *p == nil +} + +// Set sets *v to point at a newly allocated word set to x. +func word32_Set(p word32, o *Buffer, x uint32) { + if len(o.uint32s) == 0 { + o.uint32s = make([]uint32, uint32PoolSize) + } + o.uint32s[0] = x + *p = &o.uint32s[0] + o.uint32s = o.uint32s[1:] +} + +// Get gets the value pointed at by *v. +func word32_Get(p word32) uint32 { + return **p +} + +// Word32 returns the address of a *int32, *uint32, *float32, or *enum field in the struct. +func structPointer_Word32(p structPointer, f field) word32 { + return word32((**uint32)(unsafe.Pointer(uintptr(p) + uintptr(f)))) +} + +// A word32Val is the address of a 32-bit value field. +type word32Val *uint32 + +// Set sets *p to x. +func word32Val_Set(p word32Val, x uint32) { + *p = x +} + +// Get gets the value pointed at by p. +func word32Val_Get(p word32Val) uint32 { + return *p +} + +// Word32Val returns the address of a *int32, *uint32, *float32, or *enum field in the struct. +func structPointer_Word32Val(p structPointer, f field) word32Val { + return word32Val((*uint32)(unsafe.Pointer(uintptr(p) + uintptr(f)))) +} + +// A word32Slice is a slice of 32-bit values. +type word32Slice []uint32 + +func (v *word32Slice) Append(x uint32) { *v = append(*v, x) } +func (v *word32Slice) Len() int { return len(*v) } +func (v *word32Slice) Index(i int) uint32 { return (*v)[i] } + +// Word32Slice returns the address of a []int32, []uint32, []float32, or []enum field in the struct. +func structPointer_Word32Slice(p structPointer, f field) *word32Slice { + return (*word32Slice)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} + +// word64 is like word32 but for 64-bit values. +type word64 **uint64 + +func word64_Set(p word64, o *Buffer, x uint64) { + if len(o.uint64s) == 0 { + o.uint64s = make([]uint64, uint64PoolSize) + } + o.uint64s[0] = x + *p = &o.uint64s[0] + o.uint64s = o.uint64s[1:] +} + +func word64_IsNil(p word64) bool { + return *p == nil +} + +func word64_Get(p word64) uint64 { + return **p +} + +func structPointer_Word64(p structPointer, f field) word64 { + return word64((**uint64)(unsafe.Pointer(uintptr(p) + uintptr(f)))) +} + +// word64Val is like word32Val but for 64-bit values. +type word64Val *uint64 + +func word64Val_Set(p word64Val, o *Buffer, x uint64) { + *p = x +} + +func word64Val_Get(p word64Val) uint64 { + return *p +} + +func structPointer_Word64Val(p structPointer, f field) word64Val { + return word64Val((*uint64)(unsafe.Pointer(uintptr(p) + uintptr(f)))) +} + +// word64Slice is like word32Slice but for 64-bit values. +type word64Slice []uint64 + +func (v *word64Slice) Append(x uint64) { *v = append(*v, x) } +func (v *word64Slice) Len() int { return len(*v) } +func (v *word64Slice) Index(i int) uint64 { return (*v)[i] } + +func structPointer_Word64Slice(p structPointer, f field) *word64Slice { + return (*word64Slice)(unsafe.Pointer(uintptr(p) + uintptr(f))) +} diff --git a/vendor/github.com/golang/protobuf/proto/properties.go b/vendor/github.com/golang/protobuf/proto/properties.go new file mode 100644 index 0000000..ec2289c --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/properties.go @@ -0,0 +1,872 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2010 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package proto + +/* + * Routines for encoding data into the wire format for protocol buffers. + */ + +import ( + "fmt" + "log" + "os" + "reflect" + "sort" + "strconv" + "strings" + "sync" +) + +const debug bool = false + +// Constants that identify the encoding of a value on the wire. +const ( + WireVarint = 0 + WireFixed64 = 1 + WireBytes = 2 + WireStartGroup = 3 + WireEndGroup = 4 + WireFixed32 = 5 +) + +const startSize = 10 // initial slice/string sizes + +// Encoders are defined in encode.go +// An encoder outputs the full representation of a field, including its +// tag and encoder type. +type encoder func(p *Buffer, prop *Properties, base structPointer) error + +// A valueEncoder encodes a single integer in a particular encoding. +type valueEncoder func(o *Buffer, x uint64) error + +// Sizers are defined in encode.go +// A sizer returns the encoded size of a field, including its tag and encoder +// type. +type sizer func(prop *Properties, base structPointer) int + +// A valueSizer returns the encoded size of a single integer in a particular +// encoding. +type valueSizer func(x uint64) int + +// Decoders are defined in decode.go +// A decoder creates a value from its wire representation. +// Unrecognized subelements are saved in unrec. +type decoder func(p *Buffer, prop *Properties, base structPointer) error + +// A valueDecoder decodes a single integer in a particular encoding. +type valueDecoder func(o *Buffer) (x uint64, err error) + +// A oneofMarshaler does the marshaling for all oneof fields in a message. +type oneofMarshaler func(Message, *Buffer) error + +// A oneofUnmarshaler does the unmarshaling for a oneof field in a message. +type oneofUnmarshaler func(Message, int, int, *Buffer) (bool, error) + +// A oneofSizer does the sizing for all oneof fields in a message. +type oneofSizer func(Message) int + +// tagMap is an optimization over map[int]int for typical protocol buffer +// use-cases. Encoded protocol buffers are often in tag order with small tag +// numbers. +type tagMap struct { + fastTags []int + slowTags map[int]int +} + +// tagMapFastLimit is the upper bound on the tag number that will be stored in +// the tagMap slice rather than its map. +const tagMapFastLimit = 1024 + +func (p *tagMap) get(t int) (int, bool) { + if t > 0 && t < tagMapFastLimit { + if t >= len(p.fastTags) { + return 0, false + } + fi := p.fastTags[t] + return fi, fi >= 0 + } + fi, ok := p.slowTags[t] + return fi, ok +} + +func (p *tagMap) put(t int, fi int) { + if t > 0 && t < tagMapFastLimit { + for len(p.fastTags) < t+1 { + p.fastTags = append(p.fastTags, -1) + } + p.fastTags[t] = fi + return + } + if p.slowTags == nil { + p.slowTags = make(map[int]int) + } + p.slowTags[t] = fi +} + +// StructProperties represents properties for all the fields of a struct. +// decoderTags and decoderOrigNames should only be used by the decoder. +type StructProperties struct { + Prop []*Properties // properties for each field + reqCount int // required count + decoderTags tagMap // map from proto tag to struct field number + decoderOrigNames map[string]int // map from original name to struct field number + order []int // list of struct field numbers in tag order + unrecField field // field id of the XXX_unrecognized []byte field + extendable bool // is this an extendable proto + + oneofMarshaler oneofMarshaler + oneofUnmarshaler oneofUnmarshaler + oneofSizer oneofSizer + stype reflect.Type + + // OneofTypes contains information about the oneof fields in this message. + // It is keyed by the original name of a field. + OneofTypes map[string]*OneofProperties +} + +// OneofProperties represents information about a specific field in a oneof. +type OneofProperties struct { + Type reflect.Type // pointer to generated struct type for this oneof field + Field int // struct field number of the containing oneof in the message + Prop *Properties +} + +// Implement the sorting interface so we can sort the fields in tag order, as recommended by the spec. +// See encode.go, (*Buffer).enc_struct. + +func (sp *StructProperties) Len() int { return len(sp.order) } +func (sp *StructProperties) Less(i, j int) bool { + return sp.Prop[sp.order[i]].Tag < sp.Prop[sp.order[j]].Tag +} +func (sp *StructProperties) Swap(i, j int) { sp.order[i], sp.order[j] = sp.order[j], sp.order[i] } + +// Properties represents the protocol-specific behavior of a single struct field. +type Properties struct { + Name string // name of the field, for error messages + OrigName string // original name before protocol compiler (always set) + JSONName string // name to use for JSON; determined by protoc + Wire string + WireType int + Tag int + Required bool + Optional bool + Repeated bool + Packed bool // relevant for repeated primitives only + Enum string // set for enum types only + proto3 bool // whether this is known to be a proto3 field; set for []byte only + oneof bool // whether this is a oneof field + + Default string // default value + HasDefault bool // whether an explicit default was provided + def_uint64 uint64 + + enc encoder + valEnc valueEncoder // set for bool and numeric types only + field field + tagcode []byte // encoding of EncodeVarint((Tag<<3)|WireType) + tagbuf [8]byte + stype reflect.Type // set for struct types only + sprop *StructProperties // set for struct types only + isMarshaler bool + isUnmarshaler bool + + mtype reflect.Type // set for map types only + mkeyprop *Properties // set for map types only + mvalprop *Properties // set for map types only + + size sizer + valSize valueSizer // set for bool and numeric types only + + dec decoder + valDec valueDecoder // set for bool and numeric types only + + // If this is a packable field, this will be the decoder for the packed version of the field. + packedDec decoder +} + +// String formats the properties in the protobuf struct field tag style. +func (p *Properties) String() string { + s := p.Wire + s = "," + s += strconv.Itoa(p.Tag) + if p.Required { + s += ",req" + } + if p.Optional { + s += ",opt" + } + if p.Repeated { + s += ",rep" + } + if p.Packed { + s += ",packed" + } + s += ",name=" + p.OrigName + if p.JSONName != p.OrigName { + s += ",json=" + p.JSONName + } + if p.proto3 { + s += ",proto3" + } + if p.oneof { + s += ",oneof" + } + if len(p.Enum) > 0 { + s += ",enum=" + p.Enum + } + if p.HasDefault { + s += ",def=" + p.Default + } + return s +} + +// Parse populates p by parsing a string in the protobuf struct field tag style. +func (p *Properties) Parse(s string) { + // "bytes,49,opt,name=foo,def=hello!" + fields := strings.Split(s, ",") // breaks def=, but handled below. + if len(fields) < 2 { + fmt.Fprintf(os.Stderr, "proto: tag has too few fields: %q\n", s) + return + } + + p.Wire = fields[0] + switch p.Wire { + case "varint": + p.WireType = WireVarint + p.valEnc = (*Buffer).EncodeVarint + p.valDec = (*Buffer).DecodeVarint + p.valSize = sizeVarint + case "fixed32": + p.WireType = WireFixed32 + p.valEnc = (*Buffer).EncodeFixed32 + p.valDec = (*Buffer).DecodeFixed32 + p.valSize = sizeFixed32 + case "fixed64": + p.WireType = WireFixed64 + p.valEnc = (*Buffer).EncodeFixed64 + p.valDec = (*Buffer).DecodeFixed64 + p.valSize = sizeFixed64 + case "zigzag32": + p.WireType = WireVarint + p.valEnc = (*Buffer).EncodeZigzag32 + p.valDec = (*Buffer).DecodeZigzag32 + p.valSize = sizeZigzag32 + case "zigzag64": + p.WireType = WireVarint + p.valEnc = (*Buffer).EncodeZigzag64 + p.valDec = (*Buffer).DecodeZigzag64 + p.valSize = sizeZigzag64 + case "bytes", "group": + p.WireType = WireBytes + // no numeric converter for non-numeric types + default: + fmt.Fprintf(os.Stderr, "proto: tag has unknown wire type: %q\n", s) + return + } + + var err error + p.Tag, err = strconv.Atoi(fields[1]) + if err != nil { + return + } + + for i := 2; i < len(fields); i++ { + f := fields[i] + switch { + case f == "req": + p.Required = true + case f == "opt": + p.Optional = true + case f == "rep": + p.Repeated = true + case f == "packed": + p.Packed = true + case strings.HasPrefix(f, "name="): + p.OrigName = f[5:] + case strings.HasPrefix(f, "json="): + p.JSONName = f[5:] + case strings.HasPrefix(f, "enum="): + p.Enum = f[5:] + case f == "proto3": + p.proto3 = true + case f == "oneof": + p.oneof = true + case strings.HasPrefix(f, "def="): + p.HasDefault = true + p.Default = f[4:] // rest of string + if i+1 < len(fields) { + // Commas aren't escaped, and def is always last. + p.Default += "," + strings.Join(fields[i+1:], ",") + break + } + } + } +} + +func logNoSliceEnc(t1, t2 reflect.Type) { + fmt.Fprintf(os.Stderr, "proto: no slice oenc for %T = []%T\n", t1, t2) +} + +var protoMessageType = reflect.TypeOf((*Message)(nil)).Elem() + +// Initialize the fields for encoding and decoding. +func (p *Properties) setEncAndDec(typ reflect.Type, f *reflect.StructField, lockGetProp bool) { + p.enc = nil + p.dec = nil + p.size = nil + + switch t1 := typ; t1.Kind() { + default: + fmt.Fprintf(os.Stderr, "proto: no coders for %v\n", t1) + + // proto3 scalar types + + case reflect.Bool: + p.enc = (*Buffer).enc_proto3_bool + p.dec = (*Buffer).dec_proto3_bool + p.size = size_proto3_bool + case reflect.Int32: + p.enc = (*Buffer).enc_proto3_int32 + p.dec = (*Buffer).dec_proto3_int32 + p.size = size_proto3_int32 + case reflect.Uint32: + p.enc = (*Buffer).enc_proto3_uint32 + p.dec = (*Buffer).dec_proto3_int32 // can reuse + p.size = size_proto3_uint32 + case reflect.Int64, reflect.Uint64: + p.enc = (*Buffer).enc_proto3_int64 + p.dec = (*Buffer).dec_proto3_int64 + p.size = size_proto3_int64 + case reflect.Float32: + p.enc = (*Buffer).enc_proto3_uint32 // can just treat them as bits + p.dec = (*Buffer).dec_proto3_int32 + p.size = size_proto3_uint32 + case reflect.Float64: + p.enc = (*Buffer).enc_proto3_int64 // can just treat them as bits + p.dec = (*Buffer).dec_proto3_int64 + p.size = size_proto3_int64 + case reflect.String: + p.enc = (*Buffer).enc_proto3_string + p.dec = (*Buffer).dec_proto3_string + p.size = size_proto3_string + + case reflect.Ptr: + switch t2 := t1.Elem(); t2.Kind() { + default: + fmt.Fprintf(os.Stderr, "proto: no encoder function for %v -> %v\n", t1, t2) + break + case reflect.Bool: + p.enc = (*Buffer).enc_bool + p.dec = (*Buffer).dec_bool + p.size = size_bool + case reflect.Int32: + p.enc = (*Buffer).enc_int32 + p.dec = (*Buffer).dec_int32 + p.size = size_int32 + case reflect.Uint32: + p.enc = (*Buffer).enc_uint32 + p.dec = (*Buffer).dec_int32 // can reuse + p.size = size_uint32 + case reflect.Int64, reflect.Uint64: + p.enc = (*Buffer).enc_int64 + p.dec = (*Buffer).dec_int64 + p.size = size_int64 + case reflect.Float32: + p.enc = (*Buffer).enc_uint32 // can just treat them as bits + p.dec = (*Buffer).dec_int32 + p.size = size_uint32 + case reflect.Float64: + p.enc = (*Buffer).enc_int64 // can just treat them as bits + p.dec = (*Buffer).dec_int64 + p.size = size_int64 + case reflect.String: + p.enc = (*Buffer).enc_string + p.dec = (*Buffer).dec_string + p.size = size_string + case reflect.Struct: + p.stype = t1.Elem() + p.isMarshaler = isMarshaler(t1) + p.isUnmarshaler = isUnmarshaler(t1) + if p.Wire == "bytes" { + p.enc = (*Buffer).enc_struct_message + p.dec = (*Buffer).dec_struct_message + p.size = size_struct_message + } else { + p.enc = (*Buffer).enc_struct_group + p.dec = (*Buffer).dec_struct_group + p.size = size_struct_group + } + } + + case reflect.Slice: + switch t2 := t1.Elem(); t2.Kind() { + default: + logNoSliceEnc(t1, t2) + break + case reflect.Bool: + if p.Packed { + p.enc = (*Buffer).enc_slice_packed_bool + p.size = size_slice_packed_bool + } else { + p.enc = (*Buffer).enc_slice_bool + p.size = size_slice_bool + } + p.dec = (*Buffer).dec_slice_bool + p.packedDec = (*Buffer).dec_slice_packed_bool + case reflect.Int32: + if p.Packed { + p.enc = (*Buffer).enc_slice_packed_int32 + p.size = size_slice_packed_int32 + } else { + p.enc = (*Buffer).enc_slice_int32 + p.size = size_slice_int32 + } + p.dec = (*Buffer).dec_slice_int32 + p.packedDec = (*Buffer).dec_slice_packed_int32 + case reflect.Uint32: + if p.Packed { + p.enc = (*Buffer).enc_slice_packed_uint32 + p.size = size_slice_packed_uint32 + } else { + p.enc = (*Buffer).enc_slice_uint32 + p.size = size_slice_uint32 + } + p.dec = (*Buffer).dec_slice_int32 + p.packedDec = (*Buffer).dec_slice_packed_int32 + case reflect.Int64, reflect.Uint64: + if p.Packed { + p.enc = (*Buffer).enc_slice_packed_int64 + p.size = size_slice_packed_int64 + } else { + p.enc = (*Buffer).enc_slice_int64 + p.size = size_slice_int64 + } + p.dec = (*Buffer).dec_slice_int64 + p.packedDec = (*Buffer).dec_slice_packed_int64 + case reflect.Uint8: + p.dec = (*Buffer).dec_slice_byte + if p.proto3 { + p.enc = (*Buffer).enc_proto3_slice_byte + p.size = size_proto3_slice_byte + } else { + p.enc = (*Buffer).enc_slice_byte + p.size = size_slice_byte + } + case reflect.Float32, reflect.Float64: + switch t2.Bits() { + case 32: + // can just treat them as bits + if p.Packed { + p.enc = (*Buffer).enc_slice_packed_uint32 + p.size = size_slice_packed_uint32 + } else { + p.enc = (*Buffer).enc_slice_uint32 + p.size = size_slice_uint32 + } + p.dec = (*Buffer).dec_slice_int32 + p.packedDec = (*Buffer).dec_slice_packed_int32 + case 64: + // can just treat them as bits + if p.Packed { + p.enc = (*Buffer).enc_slice_packed_int64 + p.size = size_slice_packed_int64 + } else { + p.enc = (*Buffer).enc_slice_int64 + p.size = size_slice_int64 + } + p.dec = (*Buffer).dec_slice_int64 + p.packedDec = (*Buffer).dec_slice_packed_int64 + default: + logNoSliceEnc(t1, t2) + break + } + case reflect.String: + p.enc = (*Buffer).enc_slice_string + p.dec = (*Buffer).dec_slice_string + p.size = size_slice_string + case reflect.Ptr: + switch t3 := t2.Elem(); t3.Kind() { + default: + fmt.Fprintf(os.Stderr, "proto: no ptr oenc for %T -> %T -> %T\n", t1, t2, t3) + break + case reflect.Struct: + p.stype = t2.Elem() + p.isMarshaler = isMarshaler(t2) + p.isUnmarshaler = isUnmarshaler(t2) + if p.Wire == "bytes" { + p.enc = (*Buffer).enc_slice_struct_message + p.dec = (*Buffer).dec_slice_struct_message + p.size = size_slice_struct_message + } else { + p.enc = (*Buffer).enc_slice_struct_group + p.dec = (*Buffer).dec_slice_struct_group + p.size = size_slice_struct_group + } + } + case reflect.Slice: + switch t2.Elem().Kind() { + default: + fmt.Fprintf(os.Stderr, "proto: no slice elem oenc for %T -> %T -> %T\n", t1, t2, t2.Elem()) + break + case reflect.Uint8: + p.enc = (*Buffer).enc_slice_slice_byte + p.dec = (*Buffer).dec_slice_slice_byte + p.size = size_slice_slice_byte + } + } + + case reflect.Map: + p.enc = (*Buffer).enc_new_map + p.dec = (*Buffer).dec_new_map + p.size = size_new_map + + p.mtype = t1 + p.mkeyprop = &Properties{} + p.mkeyprop.init(reflect.PtrTo(p.mtype.Key()), "Key", f.Tag.Get("protobuf_key"), nil, lockGetProp) + p.mvalprop = &Properties{} + vtype := p.mtype.Elem() + if vtype.Kind() != reflect.Ptr && vtype.Kind() != reflect.Slice { + // The value type is not a message (*T) or bytes ([]byte), + // so we need encoders for the pointer to this type. + vtype = reflect.PtrTo(vtype) + } + p.mvalprop.init(vtype, "Value", f.Tag.Get("protobuf_val"), nil, lockGetProp) + } + + // precalculate tag code + wire := p.WireType + if p.Packed { + wire = WireBytes + } + x := uint32(p.Tag)<<3 | uint32(wire) + i := 0 + for i = 0; x > 127; i++ { + p.tagbuf[i] = 0x80 | uint8(x&0x7F) + x >>= 7 + } + p.tagbuf[i] = uint8(x) + p.tagcode = p.tagbuf[0 : i+1] + + if p.stype != nil { + if lockGetProp { + p.sprop = GetProperties(p.stype) + } else { + p.sprop = getPropertiesLocked(p.stype) + } + } +} + +var ( + marshalerType = reflect.TypeOf((*Marshaler)(nil)).Elem() + unmarshalerType = reflect.TypeOf((*Unmarshaler)(nil)).Elem() +) + +// isMarshaler reports whether type t implements Marshaler. +func isMarshaler(t reflect.Type) bool { + // We're checking for (likely) pointer-receiver methods + // so if t is not a pointer, something is very wrong. + // The calls above only invoke isMarshaler on pointer types. + if t.Kind() != reflect.Ptr { + panic("proto: misuse of isMarshaler") + } + return t.Implements(marshalerType) +} + +// isUnmarshaler reports whether type t implements Unmarshaler. +func isUnmarshaler(t reflect.Type) bool { + // We're checking for (likely) pointer-receiver methods + // so if t is not a pointer, something is very wrong. + // The calls above only invoke isUnmarshaler on pointer types. + if t.Kind() != reflect.Ptr { + panic("proto: misuse of isUnmarshaler") + } + return t.Implements(unmarshalerType) +} + +// Init populates the properties from a protocol buffer struct tag. +func (p *Properties) Init(typ reflect.Type, name, tag string, f *reflect.StructField) { + p.init(typ, name, tag, f, true) +} + +func (p *Properties) init(typ reflect.Type, name, tag string, f *reflect.StructField, lockGetProp bool) { + // "bytes,49,opt,def=hello!" + p.Name = name + p.OrigName = name + if f != nil { + p.field = toField(f) + } + if tag == "" { + return + } + p.Parse(tag) + p.setEncAndDec(typ, f, lockGetProp) +} + +var ( + propertiesMu sync.RWMutex + propertiesMap = make(map[reflect.Type]*StructProperties) +) + +// GetProperties returns the list of properties for the type represented by t. +// t must represent a generated struct type of a protocol message. +func GetProperties(t reflect.Type) *StructProperties { + if t.Kind() != reflect.Struct { + panic("proto: type must have kind struct") + } + + // Most calls to GetProperties in a long-running program will be + // retrieving details for types we have seen before. + propertiesMu.RLock() + sprop, ok := propertiesMap[t] + propertiesMu.RUnlock() + if ok { + if collectStats { + stats.Chit++ + } + return sprop + } + + propertiesMu.Lock() + sprop = getPropertiesLocked(t) + propertiesMu.Unlock() + return sprop +} + +// getPropertiesLocked requires that propertiesMu is held. +func getPropertiesLocked(t reflect.Type) *StructProperties { + if prop, ok := propertiesMap[t]; ok { + if collectStats { + stats.Chit++ + } + return prop + } + if collectStats { + stats.Cmiss++ + } + + prop := new(StructProperties) + // in case of recursive protos, fill this in now. + propertiesMap[t] = prop + + // build properties + prop.extendable = reflect.PtrTo(t).Implements(extendableProtoType) || + reflect.PtrTo(t).Implements(extendableProtoV1Type) + prop.unrecField = invalidField + prop.Prop = make([]*Properties, t.NumField()) + prop.order = make([]int, t.NumField()) + + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + p := new(Properties) + name := f.Name + p.init(f.Type, name, f.Tag.Get("protobuf"), &f, false) + + if f.Name == "XXX_InternalExtensions" { // special case + p.enc = (*Buffer).enc_exts + p.dec = nil // not needed + p.size = size_exts + } else if f.Name == "XXX_extensions" { // special case + p.enc = (*Buffer).enc_map + p.dec = nil // not needed + p.size = size_map + } else if f.Name == "XXX_unrecognized" { // special case + prop.unrecField = toField(&f) + } + oneof := f.Tag.Get("protobuf_oneof") // special case + if oneof != "" { + // Oneof fields don't use the traditional protobuf tag. + p.OrigName = oneof + } + prop.Prop[i] = p + prop.order[i] = i + if debug { + print(i, " ", f.Name, " ", t.String(), " ") + if p.Tag > 0 { + print(p.String()) + } + print("\n") + } + if p.enc == nil && !strings.HasPrefix(f.Name, "XXX_") && oneof == "" { + fmt.Fprintln(os.Stderr, "proto: no encoder for", f.Name, f.Type.String(), "[GetProperties]") + } + } + + // Re-order prop.order. + sort.Sort(prop) + + type oneofMessage interface { + XXX_OneofFuncs() (func(Message, *Buffer) error, func(Message, int, int, *Buffer) (bool, error), func(Message) int, []interface{}) + } + if om, ok := reflect.Zero(reflect.PtrTo(t)).Interface().(oneofMessage); ok { + var oots []interface{} + prop.oneofMarshaler, prop.oneofUnmarshaler, prop.oneofSizer, oots = om.XXX_OneofFuncs() + prop.stype = t + + // Interpret oneof metadata. + prop.OneofTypes = make(map[string]*OneofProperties) + for _, oot := range oots { + oop := &OneofProperties{ + Type: reflect.ValueOf(oot).Type(), // *T + Prop: new(Properties), + } + sft := oop.Type.Elem().Field(0) + oop.Prop.Name = sft.Name + oop.Prop.Parse(sft.Tag.Get("protobuf")) + // There will be exactly one interface field that + // this new value is assignable to. + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + if f.Type.Kind() != reflect.Interface { + continue + } + if !oop.Type.AssignableTo(f.Type) { + continue + } + oop.Field = i + break + } + prop.OneofTypes[oop.Prop.OrigName] = oop + } + } + + // build required counts + // build tags + reqCount := 0 + prop.decoderOrigNames = make(map[string]int) + for i, p := range prop.Prop { + if strings.HasPrefix(p.Name, "XXX_") { + // Internal fields should not appear in tags/origNames maps. + // They are handled specially when encoding and decoding. + continue + } + if p.Required { + reqCount++ + } + prop.decoderTags.put(p.Tag, i) + prop.decoderOrigNames[p.OrigName] = i + } + prop.reqCount = reqCount + + return prop +} + +// Return the Properties object for the x[0]'th field of the structure. +func propByIndex(t reflect.Type, x []int) *Properties { + if len(x) != 1 { + fmt.Fprintf(os.Stderr, "proto: field index dimension %d (not 1) for type %s\n", len(x), t) + return nil + } + prop := GetProperties(t) + return prop.Prop[x[0]] +} + +// Get the address and type of a pointer to a struct from an interface. +func getbase(pb Message) (t reflect.Type, b structPointer, err error) { + if pb == nil { + err = ErrNil + return + } + // get the reflect type of the pointer to the struct. + t = reflect.TypeOf(pb) + // get the address of the struct. + value := reflect.ValueOf(pb) + b = toStructPointer(value) + return +} + +// A global registry of enum types. +// The generated code will register the generated maps by calling RegisterEnum. + +var enumValueMaps = make(map[string]map[string]int32) + +// RegisterEnum is called from the generated code to install the enum descriptor +// maps into the global table to aid parsing text format protocol buffers. +func RegisterEnum(typeName string, unusedNameMap map[int32]string, valueMap map[string]int32) { + if _, ok := enumValueMaps[typeName]; ok { + panic("proto: duplicate enum registered: " + typeName) + } + enumValueMaps[typeName] = valueMap +} + +// EnumValueMap returns the mapping from names to integers of the +// enum type enumType, or a nil if not found. +func EnumValueMap(enumType string) map[string]int32 { + return enumValueMaps[enumType] +} + +// A registry of all linked message types. +// The string is a fully-qualified proto name ("pkg.Message"). +var ( + protoTypes = make(map[string]reflect.Type) + revProtoTypes = make(map[reflect.Type]string) +) + +// RegisterType is called from generated code and maps from the fully qualified +// proto name to the type (pointer to struct) of the protocol buffer. +func RegisterType(x Message, name string) { + if _, ok := protoTypes[name]; ok { + // TODO: Some day, make this a panic. + log.Printf("proto: duplicate proto type registered: %s", name) + return + } + t := reflect.TypeOf(x) + protoTypes[name] = t + revProtoTypes[t] = name +} + +// MessageName returns the fully-qualified proto name for the given message type. +func MessageName(x Message) string { + type xname interface { + XXX_MessageName() string + } + if m, ok := x.(xname); ok { + return m.XXX_MessageName() + } + return revProtoTypes[reflect.TypeOf(x)] +} + +// MessageType returns the message type (pointer to struct) for a named message. +func MessageType(name string) reflect.Type { return protoTypes[name] } + +// A registry of all linked proto files. +var ( + protoFiles = make(map[string][]byte) // file name => fileDescriptor +) + +// RegisterFile is called from generated code and maps from the +// full file name of a .proto file to its compressed FileDescriptorProto. +func RegisterFile(filename string, fileDescriptor []byte) { + protoFiles[filename] = fileDescriptor +} + +// FileDescriptor returns the compressed FileDescriptorProto for a .proto file. +func FileDescriptor(filename string) []byte { return protoFiles[filename] } diff --git a/vendor/github.com/golang/protobuf/proto/text.go b/vendor/github.com/golang/protobuf/proto/text.go new file mode 100644 index 0000000..965876b --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/text.go @@ -0,0 +1,854 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2010 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package proto + +// Functions for writing the text protocol buffer format. + +import ( + "bufio" + "bytes" + "encoding" + "errors" + "fmt" + "io" + "log" + "math" + "reflect" + "sort" + "strings" +) + +var ( + newline = []byte("\n") + spaces = []byte(" ") + gtNewline = []byte(">\n") + endBraceNewline = []byte("}\n") + backslashN = []byte{'\\', 'n'} + backslashR = []byte{'\\', 'r'} + backslashT = []byte{'\\', 't'} + backslashDQ = []byte{'\\', '"'} + backslashBS = []byte{'\\', '\\'} + posInf = []byte("inf") + negInf = []byte("-inf") + nan = []byte("nan") +) + +type writer interface { + io.Writer + WriteByte(byte) error +} + +// textWriter is an io.Writer that tracks its indentation level. +type textWriter struct { + ind int + complete bool // if the current position is a complete line + compact bool // whether to write out as a one-liner + w writer +} + +func (w *textWriter) WriteString(s string) (n int, err error) { + if !strings.Contains(s, "\n") { + if !w.compact && w.complete { + w.writeIndent() + } + w.complete = false + return io.WriteString(w.w, s) + } + // WriteString is typically called without newlines, so this + // codepath and its copy are rare. We copy to avoid + // duplicating all of Write's logic here. + return w.Write([]byte(s)) +} + +func (w *textWriter) Write(p []byte) (n int, err error) { + newlines := bytes.Count(p, newline) + if newlines == 0 { + if !w.compact && w.complete { + w.writeIndent() + } + n, err = w.w.Write(p) + w.complete = false + return n, err + } + + frags := bytes.SplitN(p, newline, newlines+1) + if w.compact { + for i, frag := range frags { + if i > 0 { + if err := w.w.WriteByte(' '); err != nil { + return n, err + } + n++ + } + nn, err := w.w.Write(frag) + n += nn + if err != nil { + return n, err + } + } + return n, nil + } + + for i, frag := range frags { + if w.complete { + w.writeIndent() + } + nn, err := w.w.Write(frag) + n += nn + if err != nil { + return n, err + } + if i+1 < len(frags) { + if err := w.w.WriteByte('\n'); err != nil { + return n, err + } + n++ + } + } + w.complete = len(frags[len(frags)-1]) == 0 + return n, nil +} + +func (w *textWriter) WriteByte(c byte) error { + if w.compact && c == '\n' { + c = ' ' + } + if !w.compact && w.complete { + w.writeIndent() + } + err := w.w.WriteByte(c) + w.complete = c == '\n' + return err +} + +func (w *textWriter) indent() { w.ind++ } + +func (w *textWriter) unindent() { + if w.ind == 0 { + log.Print("proto: textWriter unindented too far") + return + } + w.ind-- +} + +func writeName(w *textWriter, props *Properties) error { + if _, err := w.WriteString(props.OrigName); err != nil { + return err + } + if props.Wire != "group" { + return w.WriteByte(':') + } + return nil +} + +// raw is the interface satisfied by RawMessage. +type raw interface { + Bytes() []byte +} + +func requiresQuotes(u string) bool { + // When type URL contains any characters except [0-9A-Za-z./\-]*, it must be quoted. + for _, ch := range u { + switch { + case ch == '.' || ch == '/' || ch == '_': + continue + case '0' <= ch && ch <= '9': + continue + case 'A' <= ch && ch <= 'Z': + continue + case 'a' <= ch && ch <= 'z': + continue + default: + return true + } + } + return false +} + +// isAny reports whether sv is a google.protobuf.Any message +func isAny(sv reflect.Value) bool { + type wkt interface { + XXX_WellKnownType() string + } + t, ok := sv.Addr().Interface().(wkt) + return ok && t.XXX_WellKnownType() == "Any" +} + +// writeProto3Any writes an expanded google.protobuf.Any message. +// +// It returns (false, nil) if sv value can't be unmarshaled (e.g. because +// required messages are not linked in). +// +// It returns (true, error) when sv was written in expanded format or an error +// was encountered. +func (tm *TextMarshaler) writeProto3Any(w *textWriter, sv reflect.Value) (bool, error) { + turl := sv.FieldByName("TypeUrl") + val := sv.FieldByName("Value") + if !turl.IsValid() || !val.IsValid() { + return true, errors.New("proto: invalid google.protobuf.Any message") + } + + b, ok := val.Interface().([]byte) + if !ok { + return true, errors.New("proto: invalid google.protobuf.Any message") + } + + parts := strings.Split(turl.String(), "/") + mt := MessageType(parts[len(parts)-1]) + if mt == nil { + return false, nil + } + m := reflect.New(mt.Elem()) + if err := Unmarshal(b, m.Interface().(Message)); err != nil { + return false, nil + } + w.Write([]byte("[")) + u := turl.String() + if requiresQuotes(u) { + writeString(w, u) + } else { + w.Write([]byte(u)) + } + if w.compact { + w.Write([]byte("]:<")) + } else { + w.Write([]byte("]: <\n")) + w.ind++ + } + if err := tm.writeStruct(w, m.Elem()); err != nil { + return true, err + } + if w.compact { + w.Write([]byte("> ")) + } else { + w.ind-- + w.Write([]byte(">\n")) + } + return true, nil +} + +func (tm *TextMarshaler) writeStruct(w *textWriter, sv reflect.Value) error { + if tm.ExpandAny && isAny(sv) { + if canExpand, err := tm.writeProto3Any(w, sv); canExpand { + return err + } + } + st := sv.Type() + sprops := GetProperties(st) + for i := 0; i < sv.NumField(); i++ { + fv := sv.Field(i) + props := sprops.Prop[i] + name := st.Field(i).Name + + if strings.HasPrefix(name, "XXX_") { + // There are two XXX_ fields: + // XXX_unrecognized []byte + // XXX_extensions map[int32]proto.Extension + // The first is handled here; + // the second is handled at the bottom of this function. + if name == "XXX_unrecognized" && !fv.IsNil() { + if err := writeUnknownStruct(w, fv.Interface().([]byte)); err != nil { + return err + } + } + continue + } + if fv.Kind() == reflect.Ptr && fv.IsNil() { + // Field not filled in. This could be an optional field or + // a required field that wasn't filled in. Either way, there + // isn't anything we can show for it. + continue + } + if fv.Kind() == reflect.Slice && fv.IsNil() { + // Repeated field that is empty, or a bytes field that is unused. + continue + } + + if props.Repeated && fv.Kind() == reflect.Slice { + // Repeated field. + for j := 0; j < fv.Len(); j++ { + if err := writeName(w, props); err != nil { + return err + } + if !w.compact { + if err := w.WriteByte(' '); err != nil { + return err + } + } + v := fv.Index(j) + if v.Kind() == reflect.Ptr && v.IsNil() { + // A nil message in a repeated field is not valid, + // but we can handle that more gracefully than panicking. + if _, err := w.Write([]byte("\n")); err != nil { + return err + } + continue + } + if err := tm.writeAny(w, v, props); err != nil { + return err + } + if err := w.WriteByte('\n'); err != nil { + return err + } + } + continue + } + if fv.Kind() == reflect.Map { + // Map fields are rendered as a repeated struct with key/value fields. + keys := fv.MapKeys() + sort.Sort(mapKeys(keys)) + for _, key := range keys { + val := fv.MapIndex(key) + if err := writeName(w, props); err != nil { + return err + } + if !w.compact { + if err := w.WriteByte(' '); err != nil { + return err + } + } + // open struct + if err := w.WriteByte('<'); err != nil { + return err + } + if !w.compact { + if err := w.WriteByte('\n'); err != nil { + return err + } + } + w.indent() + // key + if _, err := w.WriteString("key:"); err != nil { + return err + } + if !w.compact { + if err := w.WriteByte(' '); err != nil { + return err + } + } + if err := tm.writeAny(w, key, props.mkeyprop); err != nil { + return err + } + if err := w.WriteByte('\n'); err != nil { + return err + } + // nil values aren't legal, but we can avoid panicking because of them. + if val.Kind() != reflect.Ptr || !val.IsNil() { + // value + if _, err := w.WriteString("value:"); err != nil { + return err + } + if !w.compact { + if err := w.WriteByte(' '); err != nil { + return err + } + } + if err := tm.writeAny(w, val, props.mvalprop); err != nil { + return err + } + if err := w.WriteByte('\n'); err != nil { + return err + } + } + // close struct + w.unindent() + if err := w.WriteByte('>'); err != nil { + return err + } + if err := w.WriteByte('\n'); err != nil { + return err + } + } + continue + } + if props.proto3 && fv.Kind() == reflect.Slice && fv.Len() == 0 { + // empty bytes field + continue + } + if fv.Kind() != reflect.Ptr && fv.Kind() != reflect.Slice { + // proto3 non-repeated scalar field; skip if zero value + if isProto3Zero(fv) { + continue + } + } + + if fv.Kind() == reflect.Interface { + // Check if it is a oneof. + if st.Field(i).Tag.Get("protobuf_oneof") != "" { + // fv is nil, or holds a pointer to generated struct. + // That generated struct has exactly one field, + // which has a protobuf struct tag. + if fv.IsNil() { + continue + } + inner := fv.Elem().Elem() // interface -> *T -> T + tag := inner.Type().Field(0).Tag.Get("protobuf") + props = new(Properties) // Overwrite the outer props var, but not its pointee. + props.Parse(tag) + // Write the value in the oneof, not the oneof itself. + fv = inner.Field(0) + + // Special case to cope with malformed messages gracefully: + // If the value in the oneof is a nil pointer, don't panic + // in writeAny. + if fv.Kind() == reflect.Ptr && fv.IsNil() { + // Use errors.New so writeAny won't render quotes. + msg := errors.New("/* nil */") + fv = reflect.ValueOf(&msg).Elem() + } + } + } + + if err := writeName(w, props); err != nil { + return err + } + if !w.compact { + if err := w.WriteByte(' '); err != nil { + return err + } + } + if b, ok := fv.Interface().(raw); ok { + if err := writeRaw(w, b.Bytes()); err != nil { + return err + } + continue + } + + // Enums have a String method, so writeAny will work fine. + if err := tm.writeAny(w, fv, props); err != nil { + return err + } + + if err := w.WriteByte('\n'); err != nil { + return err + } + } + + // Extensions (the XXX_extensions field). + pv := sv.Addr() + if _, ok := extendable(pv.Interface()); ok { + if err := tm.writeExtensions(w, pv); err != nil { + return err + } + } + + return nil +} + +// writeRaw writes an uninterpreted raw message. +func writeRaw(w *textWriter, b []byte) error { + if err := w.WriteByte('<'); err != nil { + return err + } + if !w.compact { + if err := w.WriteByte('\n'); err != nil { + return err + } + } + w.indent() + if err := writeUnknownStruct(w, b); err != nil { + return err + } + w.unindent() + if err := w.WriteByte('>'); err != nil { + return err + } + return nil +} + +// writeAny writes an arbitrary field. +func (tm *TextMarshaler) writeAny(w *textWriter, v reflect.Value, props *Properties) error { + v = reflect.Indirect(v) + + // Floats have special cases. + if v.Kind() == reflect.Float32 || v.Kind() == reflect.Float64 { + x := v.Float() + var b []byte + switch { + case math.IsInf(x, 1): + b = posInf + case math.IsInf(x, -1): + b = negInf + case math.IsNaN(x): + b = nan + } + if b != nil { + _, err := w.Write(b) + return err + } + // Other values are handled below. + } + + // We don't attempt to serialise every possible value type; only those + // that can occur in protocol buffers. + switch v.Kind() { + case reflect.Slice: + // Should only be a []byte; repeated fields are handled in writeStruct. + if err := writeString(w, string(v.Bytes())); err != nil { + return err + } + case reflect.String: + if err := writeString(w, v.String()); err != nil { + return err + } + case reflect.Struct: + // Required/optional group/message. + var bra, ket byte = '<', '>' + if props != nil && props.Wire == "group" { + bra, ket = '{', '}' + } + if err := w.WriteByte(bra); err != nil { + return err + } + if !w.compact { + if err := w.WriteByte('\n'); err != nil { + return err + } + } + w.indent() + if etm, ok := v.Interface().(encoding.TextMarshaler); ok { + text, err := etm.MarshalText() + if err != nil { + return err + } + if _, err = w.Write(text); err != nil { + return err + } + } else if err := tm.writeStruct(w, v); err != nil { + return err + } + w.unindent() + if err := w.WriteByte(ket); err != nil { + return err + } + default: + _, err := fmt.Fprint(w, v.Interface()) + return err + } + return nil +} + +// equivalent to C's isprint. +func isprint(c byte) bool { + return c >= 0x20 && c < 0x7f +} + +// writeString writes a string in the protocol buffer text format. +// It is similar to strconv.Quote except we don't use Go escape sequences, +// we treat the string as a byte sequence, and we use octal escapes. +// These differences are to maintain interoperability with the other +// languages' implementations of the text format. +func writeString(w *textWriter, s string) error { + // use WriteByte here to get any needed indent + if err := w.WriteByte('"'); err != nil { + return err + } + // Loop over the bytes, not the runes. + for i := 0; i < len(s); i++ { + var err error + // Divergence from C++: we don't escape apostrophes. + // There's no need to escape them, and the C++ parser + // copes with a naked apostrophe. + switch c := s[i]; c { + case '\n': + _, err = w.w.Write(backslashN) + case '\r': + _, err = w.w.Write(backslashR) + case '\t': + _, err = w.w.Write(backslashT) + case '"': + _, err = w.w.Write(backslashDQ) + case '\\': + _, err = w.w.Write(backslashBS) + default: + if isprint(c) { + err = w.w.WriteByte(c) + } else { + _, err = fmt.Fprintf(w.w, "\\%03o", c) + } + } + if err != nil { + return err + } + } + return w.WriteByte('"') +} + +func writeUnknownStruct(w *textWriter, data []byte) (err error) { + if !w.compact { + if _, err := fmt.Fprintf(w, "/* %d unknown bytes */\n", len(data)); err != nil { + return err + } + } + b := NewBuffer(data) + for b.index < len(b.buf) { + x, err := b.DecodeVarint() + if err != nil { + _, err := fmt.Fprintf(w, "/* %v */\n", err) + return err + } + wire, tag := x&7, x>>3 + if wire == WireEndGroup { + w.unindent() + if _, err := w.Write(endBraceNewline); err != nil { + return err + } + continue + } + if _, err := fmt.Fprint(w, tag); err != nil { + return err + } + if wire != WireStartGroup { + if err := w.WriteByte(':'); err != nil { + return err + } + } + if !w.compact || wire == WireStartGroup { + if err := w.WriteByte(' '); err != nil { + return err + } + } + switch wire { + case WireBytes: + buf, e := b.DecodeRawBytes(false) + if e == nil { + _, err = fmt.Fprintf(w, "%q", buf) + } else { + _, err = fmt.Fprintf(w, "/* %v */", e) + } + case WireFixed32: + x, err = b.DecodeFixed32() + err = writeUnknownInt(w, x, err) + case WireFixed64: + x, err = b.DecodeFixed64() + err = writeUnknownInt(w, x, err) + case WireStartGroup: + err = w.WriteByte('{') + w.indent() + case WireVarint: + x, err = b.DecodeVarint() + err = writeUnknownInt(w, x, err) + default: + _, err = fmt.Fprintf(w, "/* unknown wire type %d */", wire) + } + if err != nil { + return err + } + if err = w.WriteByte('\n'); err != nil { + return err + } + } + return nil +} + +func writeUnknownInt(w *textWriter, x uint64, err error) error { + if err == nil { + _, err = fmt.Fprint(w, x) + } else { + _, err = fmt.Fprintf(w, "/* %v */", err) + } + return err +} + +type int32Slice []int32 + +func (s int32Slice) Len() int { return len(s) } +func (s int32Slice) Less(i, j int) bool { return s[i] < s[j] } +func (s int32Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +// writeExtensions writes all the extensions in pv. +// pv is assumed to be a pointer to a protocol message struct that is extendable. +func (tm *TextMarshaler) writeExtensions(w *textWriter, pv reflect.Value) error { + emap := extensionMaps[pv.Type().Elem()] + ep, _ := extendable(pv.Interface()) + + // Order the extensions by ID. + // This isn't strictly necessary, but it will give us + // canonical output, which will also make testing easier. + m, mu := ep.extensionsRead() + if m == nil { + return nil + } + mu.Lock() + ids := make([]int32, 0, len(m)) + for id := range m { + ids = append(ids, id) + } + sort.Sort(int32Slice(ids)) + mu.Unlock() + + for _, extNum := range ids { + ext := m[extNum] + var desc *ExtensionDesc + if emap != nil { + desc = emap[extNum] + } + if desc == nil { + // Unknown extension. + if err := writeUnknownStruct(w, ext.enc); err != nil { + return err + } + continue + } + + pb, err := GetExtension(ep, desc) + if err != nil { + return fmt.Errorf("failed getting extension: %v", err) + } + + // Repeated extensions will appear as a slice. + if !desc.repeated() { + if err := tm.writeExtension(w, desc.Name, pb); err != nil { + return err + } + } else { + v := reflect.ValueOf(pb) + for i := 0; i < v.Len(); i++ { + if err := tm.writeExtension(w, desc.Name, v.Index(i).Interface()); err != nil { + return err + } + } + } + } + return nil +} + +func (tm *TextMarshaler) writeExtension(w *textWriter, name string, pb interface{}) error { + if _, err := fmt.Fprintf(w, "[%s]:", name); err != nil { + return err + } + if !w.compact { + if err := w.WriteByte(' '); err != nil { + return err + } + } + if err := tm.writeAny(w, reflect.ValueOf(pb), nil); err != nil { + return err + } + if err := w.WriteByte('\n'); err != nil { + return err + } + return nil +} + +func (w *textWriter) writeIndent() { + if !w.complete { + return + } + remain := w.ind * 2 + for remain > 0 { + n := remain + if n > len(spaces) { + n = len(spaces) + } + w.w.Write(spaces[:n]) + remain -= n + } + w.complete = false +} + +// TextMarshaler is a configurable text format marshaler. +type TextMarshaler struct { + Compact bool // use compact text format (one line). + ExpandAny bool // expand google.protobuf.Any messages of known types +} + +// Marshal writes a given protocol buffer in text format. +// The only errors returned are from w. +func (tm *TextMarshaler) Marshal(w io.Writer, pb Message) error { + val := reflect.ValueOf(pb) + if pb == nil || val.IsNil() { + w.Write([]byte("")) + return nil + } + var bw *bufio.Writer + ww, ok := w.(writer) + if !ok { + bw = bufio.NewWriter(w) + ww = bw + } + aw := &textWriter{ + w: ww, + complete: true, + compact: tm.Compact, + } + + if etm, ok := pb.(encoding.TextMarshaler); ok { + text, err := etm.MarshalText() + if err != nil { + return err + } + if _, err = aw.Write(text); err != nil { + return err + } + if bw != nil { + return bw.Flush() + } + return nil + } + // Dereference the received pointer so we don't have outer < and >. + v := reflect.Indirect(val) + if err := tm.writeStruct(aw, v); err != nil { + return err + } + if bw != nil { + return bw.Flush() + } + return nil +} + +// Text is the same as Marshal, but returns the string directly. +func (tm *TextMarshaler) Text(pb Message) string { + var buf bytes.Buffer + tm.Marshal(&buf, pb) + return buf.String() +} + +var ( + defaultTextMarshaler = TextMarshaler{} + compactTextMarshaler = TextMarshaler{Compact: true} +) + +// TODO: consider removing some of the Marshal functions below. + +// MarshalText writes a given protocol buffer in text format. +// The only errors returned are from w. +func MarshalText(w io.Writer, pb Message) error { return defaultTextMarshaler.Marshal(w, pb) } + +// MarshalTextString is the same as MarshalText, but returns the string directly. +func MarshalTextString(pb Message) string { return defaultTextMarshaler.Text(pb) } + +// CompactText writes a given protocol buffer in compact text format (one line). +func CompactText(w io.Writer, pb Message) error { return compactTextMarshaler.Marshal(w, pb) } + +// CompactTextString is the same as CompactText, but returns the string directly. +func CompactTextString(pb Message) string { return compactTextMarshaler.Text(pb) } diff --git a/vendor/github.com/golang/protobuf/proto/text_parser.go b/vendor/github.com/golang/protobuf/proto/text_parser.go new file mode 100644 index 0000000..5e14513 --- /dev/null +++ b/vendor/github.com/golang/protobuf/proto/text_parser.go @@ -0,0 +1,895 @@ +// Go support for Protocol Buffers - Google's data interchange format +// +// Copyright 2010 The Go Authors. All rights reserved. +// https://github.com/golang/protobuf +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package proto + +// Functions for parsing the Text protocol buffer format. +// TODO: message sets. + +import ( + "encoding" + "errors" + "fmt" + "reflect" + "strconv" + "strings" + "unicode/utf8" +) + +// Error string emitted when deserializing Any and fields are already set +const anyRepeatedlyUnpacked = "Any message unpacked multiple times, or %q already set" + +type ParseError struct { + Message string + Line int // 1-based line number + Offset int // 0-based byte offset from start of input +} + +func (p *ParseError) Error() string { + if p.Line == 1 { + // show offset only for first line + return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message) + } + return fmt.Sprintf("line %d: %v", p.Line, p.Message) +} + +type token struct { + value string + err *ParseError + line int // line number + offset int // byte number from start of input, not start of line + unquoted string // the unquoted version of value, if it was a quoted string +} + +func (t *token) String() string { + if t.err == nil { + return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset) + } + return fmt.Sprintf("parse error: %v", t.err) +} + +type textParser struct { + s string // remaining input + done bool // whether the parsing is finished (success or error) + backed bool // whether back() was called + offset, line int + cur token +} + +func newTextParser(s string) *textParser { + p := new(textParser) + p.s = s + p.line = 1 + p.cur.line = 1 + return p +} + +func (p *textParser) errorf(format string, a ...interface{}) *ParseError { + pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset} + p.cur.err = pe + p.done = true + return pe +} + +// Numbers and identifiers are matched by [-+._A-Za-z0-9] +func isIdentOrNumberChar(c byte) bool { + switch { + case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z': + return true + case '0' <= c && c <= '9': + return true + } + switch c { + case '-', '+', '.', '_': + return true + } + return false +} + +func isWhitespace(c byte) bool { + switch c { + case ' ', '\t', '\n', '\r': + return true + } + return false +} + +func isQuote(c byte) bool { + switch c { + case '"', '\'': + return true + } + return false +} + +func (p *textParser) skipWhitespace() { + i := 0 + for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') { + if p.s[i] == '#' { + // comment; skip to end of line or input + for i < len(p.s) && p.s[i] != '\n' { + i++ + } + if i == len(p.s) { + break + } + } + if p.s[i] == '\n' { + p.line++ + } + i++ + } + p.offset += i + p.s = p.s[i:len(p.s)] + if len(p.s) == 0 { + p.done = true + } +} + +func (p *textParser) advance() { + // Skip whitespace + p.skipWhitespace() + if p.done { + return + } + + // Start of non-whitespace + p.cur.err = nil + p.cur.offset, p.cur.line = p.offset, p.line + p.cur.unquoted = "" + switch p.s[0] { + case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/': + // Single symbol + p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)] + case '"', '\'': + // Quoted string + i := 1 + for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' { + if p.s[i] == '\\' && i+1 < len(p.s) { + // skip escaped char + i++ + } + i++ + } + if i >= len(p.s) || p.s[i] != p.s[0] { + p.errorf("unmatched quote") + return + } + unq, err := unquoteC(p.s[1:i], rune(p.s[0])) + if err != nil { + p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err) + return + } + p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)] + p.cur.unquoted = unq + default: + i := 0 + for i < len(p.s) && isIdentOrNumberChar(p.s[i]) { + i++ + } + if i == 0 { + p.errorf("unexpected byte %#x", p.s[0]) + return + } + p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)] + } + p.offset += len(p.cur.value) +} + +var ( + errBadUTF8 = errors.New("proto: bad UTF-8") + errBadHex = errors.New("proto: bad hexadecimal") +) + +func unquoteC(s string, quote rune) (string, error) { + // This is based on C++'s tokenizer.cc. + // Despite its name, this is *not* parsing C syntax. + // For instance, "\0" is an invalid quoted string. + + // Avoid allocation in trivial cases. + simple := true + for _, r := range s { + if r == '\\' || r == quote { + simple = false + break + } + } + if simple { + return s, nil + } + + buf := make([]byte, 0, 3*len(s)/2) + for len(s) > 0 { + r, n := utf8.DecodeRuneInString(s) + if r == utf8.RuneError && n == 1 { + return "", errBadUTF8 + } + s = s[n:] + if r != '\\' { + if r < utf8.RuneSelf { + buf = append(buf, byte(r)) + } else { + buf = append(buf, string(r)...) + } + continue + } + + ch, tail, err := unescape(s) + if err != nil { + return "", err + } + buf = append(buf, ch...) + s = tail + } + return string(buf), nil +} + +func unescape(s string) (ch string, tail string, err error) { + r, n := utf8.DecodeRuneInString(s) + if r == utf8.RuneError && n == 1 { + return "", "", errBadUTF8 + } + s = s[n:] + switch r { + case 'a': + return "\a", s, nil + case 'b': + return "\b", s, nil + case 'f': + return "\f", s, nil + case 'n': + return "\n", s, nil + case 'r': + return "\r", s, nil + case 't': + return "\t", s, nil + case 'v': + return "\v", s, nil + case '?': + return "?", s, nil // trigraph workaround + case '\'', '"', '\\': + return string(r), s, nil + case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X': + if len(s) < 2 { + return "", "", fmt.Errorf(`\%c requires 2 following digits`, r) + } + base := 8 + ss := s[:2] + s = s[2:] + if r == 'x' || r == 'X' { + base = 16 + } else { + ss = string(r) + ss + } + i, err := strconv.ParseUint(ss, base, 8) + if err != nil { + return "", "", err + } + return string([]byte{byte(i)}), s, nil + case 'u', 'U': + n := 4 + if r == 'U' { + n = 8 + } + if len(s) < n { + return "", "", fmt.Errorf(`\%c requires %d digits`, r, n) + } + + bs := make([]byte, n/2) + for i := 0; i < n; i += 2 { + a, ok1 := unhex(s[i]) + b, ok2 := unhex(s[i+1]) + if !ok1 || !ok2 { + return "", "", errBadHex + } + bs[i/2] = a<<4 | b + } + s = s[n:] + return string(bs), s, nil + } + return "", "", fmt.Errorf(`unknown escape \%c`, r) +} + +// Adapted from src/pkg/strconv/quote.go. +func unhex(b byte) (v byte, ok bool) { + switch { + case '0' <= b && b <= '9': + return b - '0', true + case 'a' <= b && b <= 'f': + return b - 'a' + 10, true + case 'A' <= b && b <= 'F': + return b - 'A' + 10, true + } + return 0, false +} + +// Back off the parser by one token. Can only be done between calls to next(). +// It makes the next advance() a no-op. +func (p *textParser) back() { p.backed = true } + +// Advances the parser and returns the new current token. +func (p *textParser) next() *token { + if p.backed || p.done { + p.backed = false + return &p.cur + } + p.advance() + if p.done { + p.cur.value = "" + } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) { + // Look for multiple quoted strings separated by whitespace, + // and concatenate them. + cat := p.cur + for { + p.skipWhitespace() + if p.done || !isQuote(p.s[0]) { + break + } + p.advance() + if p.cur.err != nil { + return &p.cur + } + cat.value += " " + p.cur.value + cat.unquoted += p.cur.unquoted + } + p.done = false // parser may have seen EOF, but we want to return cat + p.cur = cat + } + return &p.cur +} + +func (p *textParser) consumeToken(s string) error { + tok := p.next() + if tok.err != nil { + return tok.err + } + if tok.value != s { + p.back() + return p.errorf("expected %q, found %q", s, tok.value) + } + return nil +} + +// Return a RequiredNotSetError indicating which required field was not set. +func (p *textParser) missingRequiredFieldError(sv reflect.Value) *RequiredNotSetError { + st := sv.Type() + sprops := GetProperties(st) + for i := 0; i < st.NumField(); i++ { + if !isNil(sv.Field(i)) { + continue + } + + props := sprops.Prop[i] + if props.Required { + return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)} + } + } + return &RequiredNotSetError{fmt.Sprintf("%v.", st)} // should not happen +} + +// Returns the index in the struct for the named field, as well as the parsed tag properties. +func structFieldByName(sprops *StructProperties, name string) (int, *Properties, bool) { + i, ok := sprops.decoderOrigNames[name] + if ok { + return i, sprops.Prop[i], true + } + return -1, nil, false +} + +// Consume a ':' from the input stream (if the next token is a colon), +// returning an error if a colon is needed but not present. +func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError { + tok := p.next() + if tok.err != nil { + return tok.err + } + if tok.value != ":" { + // Colon is optional when the field is a group or message. + needColon := true + switch props.Wire { + case "group": + needColon = false + case "bytes": + // A "bytes" field is either a message, a string, or a repeated field; + // those three become *T, *string and []T respectively, so we can check for + // this field being a pointer to a non-string. + if typ.Kind() == reflect.Ptr { + // *T or *string + if typ.Elem().Kind() == reflect.String { + break + } + } else if typ.Kind() == reflect.Slice { + // []T or []*T + if typ.Elem().Kind() != reflect.Ptr { + break + } + } else if typ.Kind() == reflect.String { + // The proto3 exception is for a string field, + // which requires a colon. + break + } + needColon = false + } + if needColon { + return p.errorf("expected ':', found %q", tok.value) + } + p.back() + } + return nil +} + +func (p *textParser) readStruct(sv reflect.Value, terminator string) error { + st := sv.Type() + sprops := GetProperties(st) + reqCount := sprops.reqCount + var reqFieldErr error + fieldSet := make(map[string]bool) + // A struct is a sequence of "name: value", terminated by one of + // '>' or '}', or the end of the input. A name may also be + // "[extension]" or "[type/url]". + // + // The whole struct can also be an expanded Any message, like: + // [type/url] < ... struct contents ... > + for { + tok := p.next() + if tok.err != nil { + return tok.err + } + if tok.value == terminator { + break + } + if tok.value == "[" { + // Looks like an extension or an Any. + // + // TODO: Check whether we need to handle + // namespace rooted names (e.g. ".something.Foo"). + extName, err := p.consumeExtName() + if err != nil { + return err + } + + if s := strings.LastIndex(extName, "/"); s >= 0 { + // If it contains a slash, it's an Any type URL. + messageName := extName[s+1:] + mt := MessageType(messageName) + if mt == nil { + return p.errorf("unrecognized message %q in google.protobuf.Any", messageName) + } + tok = p.next() + if tok.err != nil { + return tok.err + } + // consume an optional colon + if tok.value == ":" { + tok = p.next() + if tok.err != nil { + return tok.err + } + } + var terminator string + switch tok.value { + case "<": + terminator = ">" + case "{": + terminator = "}" + default: + return p.errorf("expected '{' or '<', found %q", tok.value) + } + v := reflect.New(mt.Elem()) + if pe := p.readStruct(v.Elem(), terminator); pe != nil { + return pe + } + b, err := Marshal(v.Interface().(Message)) + if err != nil { + return p.errorf("failed to marshal message of type %q: %v", messageName, err) + } + if fieldSet["type_url"] { + return p.errorf(anyRepeatedlyUnpacked, "type_url") + } + if fieldSet["value"] { + return p.errorf(anyRepeatedlyUnpacked, "value") + } + sv.FieldByName("TypeUrl").SetString(extName) + sv.FieldByName("Value").SetBytes(b) + fieldSet["type_url"] = true + fieldSet["value"] = true + continue + } + + var desc *ExtensionDesc + // This could be faster, but it's functional. + // TODO: Do something smarter than a linear scan. + for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) { + if d.Name == extName { + desc = d + break + } + } + if desc == nil { + return p.errorf("unrecognized extension %q", extName) + } + + props := &Properties{} + props.Parse(desc.Tag) + + typ := reflect.TypeOf(desc.ExtensionType) + if err := p.checkForColon(props, typ); err != nil { + return err + } + + rep := desc.repeated() + + // Read the extension structure, and set it in + // the value we're constructing. + var ext reflect.Value + if !rep { + ext = reflect.New(typ).Elem() + } else { + ext = reflect.New(typ.Elem()).Elem() + } + if err := p.readAny(ext, props); err != nil { + if _, ok := err.(*RequiredNotSetError); !ok { + return err + } + reqFieldErr = err + } + ep := sv.Addr().Interface().(Message) + if !rep { + SetExtension(ep, desc, ext.Interface()) + } else { + old, err := GetExtension(ep, desc) + var sl reflect.Value + if err == nil { + sl = reflect.ValueOf(old) // existing slice + } else { + sl = reflect.MakeSlice(typ, 0, 1) + } + sl = reflect.Append(sl, ext) + SetExtension(ep, desc, sl.Interface()) + } + if err := p.consumeOptionalSeparator(); err != nil { + return err + } + continue + } + + // This is a normal, non-extension field. + name := tok.value + var dst reflect.Value + fi, props, ok := structFieldByName(sprops, name) + if ok { + dst = sv.Field(fi) + } else if oop, ok := sprops.OneofTypes[name]; ok { + // It is a oneof. + props = oop.Prop + nv := reflect.New(oop.Type.Elem()) + dst = nv.Elem().Field(0) + field := sv.Field(oop.Field) + if !field.IsNil() { + return p.errorf("field '%s' would overwrite already parsed oneof '%s'", name, sv.Type().Field(oop.Field).Name) + } + field.Set(nv) + } + if !dst.IsValid() { + return p.errorf("unknown field name %q in %v", name, st) + } + + if dst.Kind() == reflect.Map { + // Consume any colon. + if err := p.checkForColon(props, dst.Type()); err != nil { + return err + } + + // Construct the map if it doesn't already exist. + if dst.IsNil() { + dst.Set(reflect.MakeMap(dst.Type())) + } + key := reflect.New(dst.Type().Key()).Elem() + val := reflect.New(dst.Type().Elem()).Elem() + + // The map entry should be this sequence of tokens: + // < key : KEY value : VALUE > + // However, implementations may omit key or value, and technically + // we should support them in any order. See b/28924776 for a time + // this went wrong. + + tok := p.next() + var terminator string + switch tok.value { + case "<": + terminator = ">" + case "{": + terminator = "}" + default: + return p.errorf("expected '{' or '<', found %q", tok.value) + } + for { + tok := p.next() + if tok.err != nil { + return tok.err + } + if tok.value == terminator { + break + } + switch tok.value { + case "key": + if err := p.consumeToken(":"); err != nil { + return err + } + if err := p.readAny(key, props.mkeyprop); err != nil { + return err + } + if err := p.consumeOptionalSeparator(); err != nil { + return err + } + case "value": + if err := p.checkForColon(props.mvalprop, dst.Type().Elem()); err != nil { + return err + } + if err := p.readAny(val, props.mvalprop); err != nil { + return err + } + if err := p.consumeOptionalSeparator(); err != nil { + return err + } + default: + p.back() + return p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value) + } + } + + dst.SetMapIndex(key, val) + continue + } + + // Check that it's not already set if it's not a repeated field. + if !props.Repeated && fieldSet[name] { + return p.errorf("non-repeated field %q was repeated", name) + } + + if err := p.checkForColon(props, dst.Type()); err != nil { + return err + } + + // Parse into the field. + fieldSet[name] = true + if err := p.readAny(dst, props); err != nil { + if _, ok := err.(*RequiredNotSetError); !ok { + return err + } + reqFieldErr = err + } + if props.Required { + reqCount-- + } + + if err := p.consumeOptionalSeparator(); err != nil { + return err + } + + } + + if reqCount > 0 { + return p.missingRequiredFieldError(sv) + } + return reqFieldErr +} + +// consumeExtName consumes extension name or expanded Any type URL and the +// following ']'. It returns the name or URL consumed. +func (p *textParser) consumeExtName() (string, error) { + tok := p.next() + if tok.err != nil { + return "", tok.err + } + + // If extension name or type url is quoted, it's a single token. + if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] { + name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0])) + if err != nil { + return "", err + } + return name, p.consumeToken("]") + } + + // Consume everything up to "]" + var parts []string + for tok.value != "]" { + parts = append(parts, tok.value) + tok = p.next() + if tok.err != nil { + return "", p.errorf("unrecognized type_url or extension name: %s", tok.err) + } + } + return strings.Join(parts, ""), nil +} + +// consumeOptionalSeparator consumes an optional semicolon or comma. +// It is used in readStruct to provide backward compatibility. +func (p *textParser) consumeOptionalSeparator() error { + tok := p.next() + if tok.err != nil { + return tok.err + } + if tok.value != ";" && tok.value != "," { + p.back() + } + return nil +} + +func (p *textParser) readAny(v reflect.Value, props *Properties) error { + tok := p.next() + if tok.err != nil { + return tok.err + } + if tok.value == "" { + return p.errorf("unexpected EOF") + } + + switch fv := v; fv.Kind() { + case reflect.Slice: + at := v.Type() + if at.Elem().Kind() == reflect.Uint8 { + // Special case for []byte + if tok.value[0] != '"' && tok.value[0] != '\'' { + // Deliberately written out here, as the error after + // this switch statement would write "invalid []byte: ...", + // which is not as user-friendly. + return p.errorf("invalid string: %v", tok.value) + } + bytes := []byte(tok.unquoted) + fv.Set(reflect.ValueOf(bytes)) + return nil + } + // Repeated field. + if tok.value == "[" { + // Repeated field with list notation, like [1,2,3]. + for { + fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem())) + err := p.readAny(fv.Index(fv.Len()-1), props) + if err != nil { + return err + } + tok := p.next() + if tok.err != nil { + return tok.err + } + if tok.value == "]" { + break + } + if tok.value != "," { + return p.errorf("Expected ']' or ',' found %q", tok.value) + } + } + return nil + } + // One value of the repeated field. + p.back() + fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem())) + return p.readAny(fv.Index(fv.Len()-1), props) + case reflect.Bool: + // true/1/t/True or false/f/0/False. + switch tok.value { + case "true", "1", "t", "True": + fv.SetBool(true) + return nil + case "false", "0", "f", "False": + fv.SetBool(false) + return nil + } + case reflect.Float32, reflect.Float64: + v := tok.value + // Ignore 'f' for compatibility with output generated by C++, but don't + // remove 'f' when the value is "-inf" or "inf". + if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" { + v = v[:len(v)-1] + } + if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil { + fv.SetFloat(f) + return nil + } + case reflect.Int32: + if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil { + fv.SetInt(x) + return nil + } + + if len(props.Enum) == 0 { + break + } + m, ok := enumValueMaps[props.Enum] + if !ok { + break + } + x, ok := m[tok.value] + if !ok { + break + } + fv.SetInt(int64(x)) + return nil + case reflect.Int64: + if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil { + fv.SetInt(x) + return nil + } + + case reflect.Ptr: + // A basic field (indirected through pointer), or a repeated message/group + p.back() + fv.Set(reflect.New(fv.Type().Elem())) + return p.readAny(fv.Elem(), props) + case reflect.String: + if tok.value[0] == '"' || tok.value[0] == '\'' { + fv.SetString(tok.unquoted) + return nil + } + case reflect.Struct: + var terminator string + switch tok.value { + case "{": + terminator = "}" + case "<": + terminator = ">" + default: + return p.errorf("expected '{' or '<', found %q", tok.value) + } + // TODO: Handle nested messages which implement encoding.TextUnmarshaler. + return p.readStruct(fv, terminator) + case reflect.Uint32: + if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil { + fv.SetUint(x) + return nil + } + case reflect.Uint64: + if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil { + fv.SetUint(x) + return nil + } + } + return p.errorf("invalid %v: %v", v.Type(), tok.value) +} + +// UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb +// before starting to unmarshal, so any existing data in pb is always removed. +// If a required field is not set and no other error occurs, +// UnmarshalText returns *RequiredNotSetError. +func UnmarshalText(s string, pb Message) error { + if um, ok := pb.(encoding.TextUnmarshaler); ok { + err := um.UnmarshalText([]byte(s)) + return err + } + pb.Reset() + v := reflect.ValueOf(pb) + if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil { + return pe + } + return nil +} diff --git a/vendor/github.com/mrunalp/fileutils/LICENSE b/vendor/github.com/mrunalp/fileutils/LICENSE new file mode 100644 index 0000000..2744858 --- /dev/null +++ b/vendor/github.com/mrunalp/fileutils/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2014 Docker, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/mrunalp/fileutils/README.md b/vendor/github.com/mrunalp/fileutils/README.md new file mode 100644 index 0000000..6cb4140 --- /dev/null +++ b/vendor/github.com/mrunalp/fileutils/README.md @@ -0,0 +1,5 @@ +# fileutils + +Collection of utilities for file manipulation in golang + +The library is based on docker pkg/archive pkg/idtools but does copies instead of handling archive formats. diff --git a/vendor/github.com/mrunalp/fileutils/fileutils.go b/vendor/github.com/mrunalp/fileutils/fileutils.go new file mode 100644 index 0000000..5a9818a --- /dev/null +++ b/vendor/github.com/mrunalp/fileutils/fileutils.go @@ -0,0 +1,158 @@ +package fileutils + +import ( + "fmt" + "io" + "os" + "path/filepath" + "syscall" +) + +// CopyFile copies the file at source to dest +func CopyFile(source string, dest string) error { + si, err := os.Lstat(source) + if err != nil { + return err + } + + st, ok := si.Sys().(*syscall.Stat_t) + if !ok { + return fmt.Errorf("could not convert to syscall.Stat_t") + } + + uid := int(st.Uid) + gid := int(st.Gid) + + // Handle symlinks + if si.Mode()&os.ModeSymlink != 0 { + target, err := os.Readlink(source) + if err != nil { + return err + } + if err := os.Symlink(target, dest); err != nil { + return err + } + } + + // Handle device files + if st.Mode&syscall.S_IFMT == syscall.S_IFBLK || st.Mode&syscall.S_IFMT == syscall.S_IFCHR { + devMajor := int64(major(uint64(st.Rdev))) + devMinor := int64(minor(uint64(st.Rdev))) + mode := uint32(si.Mode() & 07777) + if st.Mode&syscall.S_IFMT == syscall.S_IFBLK { + mode |= syscall.S_IFBLK + } + if st.Mode&syscall.S_IFMT == syscall.S_IFCHR { + mode |= syscall.S_IFCHR + } + if err := syscall.Mknod(dest, mode, int(mkdev(devMajor, devMinor))); err != nil { + return err + } + } + + // Handle regular files + if si.Mode().IsRegular() { + sf, err := os.Open(source) + if err != nil { + return err + } + defer sf.Close() + + df, err := os.Create(dest) + if err != nil { + return err + } + defer df.Close() + + _, err = io.Copy(df, sf) + if err != nil { + return err + } + } + + // Chown the file + if err := os.Lchown(dest, uid, gid); err != nil { + return err + } + + // Chmod the file + if !(si.Mode()&os.ModeSymlink == os.ModeSymlink) { + if err := os.Chmod(dest, si.Mode()); err != nil { + return err + } + } + + return nil +} + +// CopyDirectory copies the files under the source directory +// to dest directory. The dest directory is created if it +// does not exist. +func CopyDirectory(source string, dest string) error { + fi, err := os.Stat(source) + if err != nil { + return err + } + + // Get owner. + st, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return fmt.Errorf("could not convert to syscall.Stat_t") + } + + // We have to pick an owner here anyway. + if err := MkdirAllNewAs(dest, fi.Mode(), int(st.Uid), int(st.Gid)); err != nil { + return err + } + + return filepath.Walk(source, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Get the relative path + relPath, err := filepath.Rel(source, path) + if err != nil { + return nil + } + + if info.IsDir() { + // Skip the source directory. + if path != source { + // Get the owner. + st, ok := info.Sys().(*syscall.Stat_t) + if !ok { + return fmt.Errorf("could not convert to syscall.Stat_t") + } + + uid := int(st.Uid) + gid := int(st.Gid) + + if err := os.Mkdir(filepath.Join(dest, relPath), info.Mode()); err != nil { + return err + } + + if err := os.Lchown(filepath.Join(dest, relPath), uid, gid); err != nil { + return err + } + } + return nil + } + + return CopyFile(path, filepath.Join(dest, relPath)) + }) +} + +// Gives a number indicating the device driver to be used to access the passed device +func major(device uint64) uint64 { + return (device >> 8) & 0xfff +} + +// Gives a number that serves as a flag to the device driver for the passed device +func minor(device uint64) uint64 { + return (device & 0xff) | ((device >> 12) & 0xfff00) +} + +func mkdev(major int64, minor int64) uint32 { + return uint32(((minor & 0xfff00) << 12) | ((major & 0xfff) << 8) | (minor & 0xff)) +} diff --git a/vendor/github.com/mrunalp/fileutils/idtools.go b/vendor/github.com/mrunalp/fileutils/idtools.go new file mode 100644 index 0000000..161aec8 --- /dev/null +++ b/vendor/github.com/mrunalp/fileutils/idtools.go @@ -0,0 +1,49 @@ +package fileutils + +import ( + "os" + "path/filepath" +) + +// MkdirAllNewAs creates a directory (include any along the path) and then modifies +// ownership ONLY of newly created directories to the requested uid/gid. If the +// directories along the path exist, no change of ownership will be performed +func MkdirAllNewAs(path string, mode os.FileMode, ownerUID, ownerGID int) error { + // make an array containing the original path asked for, plus (for mkAll == true) + // all path components leading up to the complete path that don't exist before we MkdirAll + // so that we can chown all of them properly at the end. If chownExisting is false, we won't + // chown the full directory path if it exists + var paths []string + if _, err := os.Stat(path); err != nil && os.IsNotExist(err) { + paths = []string{path} + } else if err == nil { + // nothing to do; directory path fully exists already + return nil + } + + // walk back to "/" looking for directories which do not exist + // and add them to the paths array for chown after creation + dirPath := path + for { + dirPath = filepath.Dir(dirPath) + if dirPath == "/" { + break + } + if _, err := os.Stat(dirPath); err != nil && os.IsNotExist(err) { + paths = append(paths, dirPath) + } + } + + if err := os.MkdirAll(path, mode); err != nil && !os.IsExist(err) { + return err + } + + // even if it existed, we will chown the requested path + any subpaths that + // didn't exist when we called MkdirAll + for _, pathComponent := range paths { + if err := os.Chown(pathComponent, ownerUID, ownerGID); err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/runtime-spec/LICENSE b/vendor/github.com/opencontainers/runtime-spec/LICENSE new file mode 100644 index 0000000..bdc4036 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-spec/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2015 The Linux Foundation. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/opencontainers/runtime-spec/README.md b/vendor/github.com/opencontainers/runtime-spec/README.md new file mode 100644 index 0000000..b40dba1 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-spec/README.md @@ -0,0 +1,153 @@ +# Open Container Initiative Runtime Specification + +The [Open Container Initiative][oci] develops specifications for standards on Operating System process and application containers. + +The specification can be found [here](spec.md). + +## Table of Contents + +Additional documentation about how this group operates: + +- [Code of Conduct][code-of-conduct] +- [Style and Conventions](style.md) +- [Implementations](implementations.md) +- [Releases](RELEASES.md) +- [project](project.md) +- [charter][charter] + +## Use Cases + +To provide context for users the following section gives example use cases for each part of the spec. + +### Application Bundle Builders + +Application bundle builders can create a [bundle](bundle.md) directory that includes all of the files required for launching an application as a container. +The bundle contains an OCI [configuration file](config.md) where the builder can specify host-independent details such as [which executable to launch](config.md#process) and host-specific settings such as [mount](config.md#mounts) locations, [hook](config.md#posix-platform-hooks) paths, Linux [namespaces](config-linux.md#namespaces) and [cgroups](config-linux.md#control-groups). +Because the configuration includes host-specific settings, application bundle directories copied between two hosts may require configuration adjustments. + +### Hook Developers + +[Hook](config.md#posix-platform-hooks) developers can extend the functionality of an OCI-compliant runtime by hooking into a container's lifecycle with an external application. +Example use cases include sophisticated network configuration, volume garbage collection, etc. + +### Runtime Developers + +Runtime developers can build runtime implementations that run OCI-compliant bundles and container configuration, containing low-level OS and host-specific details, on a particular platform. + +## Contributing + +Development happens on GitHub for the spec. +Issues are used for bugs and actionable items and longer discussions can happen on the [mailing list](#mailing-list). + +The specification and code is licensed under the Apache 2.0 license found in the [LICENSE](./LICENSE) file. + +### Discuss your design + +The project welcomes submissions, but please let everyone know what you are working on. + +Before undertaking a nontrivial change to this specification, send mail to the [mailing list](#mailing-list) to discuss what you plan to do. +This gives everyone a chance to validate the design, helps prevent duplication of effort, and ensures that the idea fits. +It also guarantees that the design is sound before code is written; a GitHub pull-request is not the place for high-level discussions. + +Typos and grammatical errors can go straight to a pull-request. +When in doubt, start on the [mailing-list](#mailing-list). + +### Meetings + +The contributors and maintainers of all OCI projects have monthly meetings, which are usually at 2:00 PM (USA Pacific) on the first Wednesday of every month. +There is an [iCalendar][rfc5545] format for the meetings [here](meeting.ics). +Everyone is welcome to participate via [UberConference web][uberconference] or audio-only: +1 415 968 0849 (no PIN needed). +An initial agenda will be posted to the [mailing list](#mailing-list) in the week before each meeting, and everyone is welcome to propose additional topics or suggest other agenda alterations there. +Minutes are posted to the [mailing list](#mailing-list) and minutes from past calls are archived [here][minutes], with minutes from especially old meetings (September 2015 and earlier) archived [here][runtime-wiki]. + +### Mailing List + +You can subscribe and join the mailing list on [Google Groups][dev-list]. + +### IRC + +OCI discussion happens on #opencontainers on Freenode ([logs][irc-logs]). + +### Git commit + +#### Sign your work + +The sign-off is a simple line at the end of the explanation for the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open-source patch. +The rules are pretty simple: if you can certify the below (from http://developercertificate.org): + +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +660 York Street, Suite 102, +San Francisco, CA 94110 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +then you just add a line to every git commit message: + + Signed-off-by: Joe Smith + +using your real name (sorry, no pseudonyms or anonymous contributions.) + +You can add the sign off when creating the git commit via `git commit -s`. + +#### Commit Style + +Simple house-keeping for clean git history. +Read more on [How to Write a Git Commit Message][how-to-git-commit] or the Discussion section of [git-commit(1)][git-commit.1]. + +1. Separate the subject from body with a blank line +2. Limit the subject line to 50 characters +3. Capitalize the subject line +4. Do not end the subject line with a period +5. Use the imperative mood in the subject line +6. Wrap the body at 72 characters +7. Use the body to explain what and why vs. how + * If there was important/useful/essential conversation or information, copy or include a reference +8. When possible, one keyword to scope the change in the subject (i.e. "README: ...", "runtime: ...") + + +[charter]: https://www.opencontainers.org/about/governance +[code-of-conduct]: https://github.com/opencontainers/tob/blob/master/code-of-conduct.md +[dev-list]: https://groups.google.com/a/opencontainers.org/forum/#!forum/dev +[how-to-git-commit]: http://chris.beams.io/posts/git-commit +[irc-logs]: http://ircbot.wl.linuxfoundation.org/eavesdrop/%23opencontainers/ +[iso-week]: https://en.wikipedia.org/wiki/ISO_week_date#Calculating_the_week_number_of_a_given_date +[minutes]: http://ircbot.wl.linuxfoundation.org/meetings/opencontainers/ +[oci]: https://www.opencontainers.org +[rfc5545]: https://tools.ietf.org/html/rfc5545 +[runtime-wiki]: https://github.com/opencontainers/runtime-spec/wiki +[uberconference]: https://www.uberconference.com/opencontainers + +[git-commit.1]: http://git-scm.com/docs/git-commit diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go new file mode 100644 index 0000000..27268f9 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go @@ -0,0 +1,642 @@ +package specs + +import "os" + +// Spec is the base configuration for the container. +type Spec struct { + // Version of the Open Container Initiative Runtime Specification with which the bundle complies. + Version string `json:"ociVersion"` + // Process configures the container process. + Process *Process `json:"process,omitempty"` + // Root configures the container's root filesystem. + Root *Root `json:"root,omitempty"` + // Hostname configures the container's hostname. + Hostname string `json:"hostname,omitempty"` + // Mounts configures additional mounts (on top of Root). + Mounts []Mount `json:"mounts,omitempty"` + // Hooks configures callbacks for container lifecycle events. + Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris"` + // Annotations contains arbitrary metadata for the container. + Annotations map[string]string `json:"annotations,omitempty"` + + // Linux is platform-specific configuration for Linux based containers. + Linux *Linux `json:"linux,omitempty" platform:"linux"` + // Solaris is platform-specific configuration for Solaris based containers. + Solaris *Solaris `json:"solaris,omitempty" platform:"solaris"` + // Windows is platform-specific configuration for Windows based containers. + Windows *Windows `json:"windows,omitempty" platform:"windows"` + // VM specifies configuration for virtual-machine-based containers. + VM *VM `json:"vm,omitempty" platform:"vm"` +} + +// Process contains information to start a specific application inside the container. +type Process struct { + // Terminal creates an interactive terminal for the container. + Terminal bool `json:"terminal,omitempty"` + // ConsoleSize specifies the size of the console. + ConsoleSize *Box `json:"consoleSize,omitempty"` + // User specifies user information for the process. + User User `json:"user"` + // Args specifies the binary and arguments for the application to execute. + Args []string `json:"args,omitempty"` + // CommandLine specifies the full command line for the application to execute on Windows. + CommandLine string `json:"commandLine,omitempty" platform:"windows"` + // Env populates the process environment for the process. + Env []string `json:"env,omitempty"` + // Cwd is the current working directory for the process and must be + // relative to the container's root. + Cwd string `json:"cwd"` + // Capabilities are Linux capabilities that are kept for the process. + Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"` + // Rlimits specifies rlimit options to apply to the process. + Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris"` + // NoNewPrivileges controls whether additional privileges could be gained by processes in the container. + NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"` + // ApparmorProfile specifies the apparmor profile for the container. + ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"` + // Specify an oom_score_adj for the container. + OOMScoreAdj *int `json:"oomScoreAdj,omitempty" platform:"linux"` + // SelinuxLabel specifies the selinux context that the container process is run as. + SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"` +} + +// LinuxCapabilities specifies the whitelist of capabilities that are kept for a process. +// http://man7.org/linux/man-pages/man7/capabilities.7.html +type LinuxCapabilities struct { + // Bounding is the set of capabilities checked by the kernel. + Bounding []string `json:"bounding,omitempty" platform:"linux"` + // Effective is the set of capabilities checked by the kernel. + Effective []string `json:"effective,omitempty" platform:"linux"` + // Inheritable is the capabilities preserved across execve. + Inheritable []string `json:"inheritable,omitempty" platform:"linux"` + // Permitted is the limiting superset for effective capabilities. + Permitted []string `json:"permitted,omitempty" platform:"linux"` + // Ambient is the ambient set of capabilities that are kept. + Ambient []string `json:"ambient,omitempty" platform:"linux"` +} + +// Box specifies dimensions of a rectangle. Used for specifying the size of a console. +type Box struct { + // Height is the vertical dimension of a box. + Height uint `json:"height"` + // Width is the horizontal dimension of a box. + Width uint `json:"width"` +} + +// User specifies specific user (and group) information for the container process. +type User struct { + // UID is the user id. + UID uint32 `json:"uid" platform:"linux,solaris"` + // GID is the group id. + GID uint32 `json:"gid" platform:"linux,solaris"` + // AdditionalGids are additional group ids set for the container's process. + AdditionalGids []uint32 `json:"additionalGids,omitempty" platform:"linux,solaris"` + // Username is the user name. + Username string `json:"username,omitempty" platform:"windows"` +} + +// Root contains information about the container's root filesystem on the host. +type Root struct { + // Path is the absolute path to the container's root filesystem. + Path string `json:"path"` + // Readonly makes the root filesystem for the container readonly before the process is executed. + Readonly bool `json:"readonly,omitempty"` +} + +// Mount specifies a mount for a container. +type Mount struct { + // Destination is the absolute path where the mount will be placed in the container. + Destination string `json:"destination"` + // Type specifies the mount kind. + Type string `json:"type,omitempty" platform:"linux,solaris"` + // Source specifies the source path of the mount. + Source string `json:"source,omitempty"` + // Options are fstab style mount options. + Options []string `json:"options,omitempty"` +} + +// Hook specifies a command that is run at a particular event in the lifecycle of a container +type Hook struct { + Path string `json:"path"` + Args []string `json:"args,omitempty"` + Env []string `json:"env,omitempty"` + Timeout *int `json:"timeout,omitempty"` +} + +// Hooks for container setup and teardown +type Hooks struct { + // Prestart is a list of hooks to be run before the container process is executed. + Prestart []Hook `json:"prestart,omitempty"` + // Poststart is a list of hooks to be run after the container process is started. + Poststart []Hook `json:"poststart,omitempty"` + // Poststop is a list of hooks to be run after the container process exits. + Poststop []Hook `json:"poststop,omitempty"` +} + +// Linux contains platform-specific configuration for Linux based containers. +type Linux struct { + // UIDMapping specifies user mappings for supporting user namespaces. + UIDMappings []LinuxIDMapping `json:"uidMappings,omitempty"` + // GIDMapping specifies group mappings for supporting user namespaces. + GIDMappings []LinuxIDMapping `json:"gidMappings,omitempty"` + // Sysctl are a set of key value pairs that are set for the container on start + Sysctl map[string]string `json:"sysctl,omitempty"` + // Resources contain cgroup information for handling resource constraints + // for the container + Resources *LinuxResources `json:"resources,omitempty"` + // CgroupsPath specifies the path to cgroups that are created and/or joined by the container. + // The path is expected to be relative to the cgroups mountpoint. + // If resources are specified, the cgroups at CgroupsPath will be updated based on resources. + CgroupsPath string `json:"cgroupsPath,omitempty"` + // Namespaces contains the namespaces that are created and/or joined by the container + Namespaces []LinuxNamespace `json:"namespaces,omitempty"` + // Devices are a list of device nodes that are created for the container + Devices []LinuxDevice `json:"devices,omitempty"` + // Seccomp specifies the seccomp security settings for the container. + Seccomp *LinuxSeccomp `json:"seccomp,omitempty"` + // RootfsPropagation is the rootfs mount propagation mode for the container. + RootfsPropagation string `json:"rootfsPropagation,omitempty"` + // MaskedPaths masks over the provided paths inside the container. + MaskedPaths []string `json:"maskedPaths,omitempty"` + // ReadonlyPaths sets the provided paths as RO inside the container. + ReadonlyPaths []string `json:"readonlyPaths,omitempty"` + // MountLabel specifies the selinux context for the mounts in the container. + MountLabel string `json:"mountLabel,omitempty"` + // IntelRdt contains Intel Resource Director Technology (RDT) information for + // handling resource constraints (e.g., L3 cache, memory bandwidth) for the container + IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"` +} + +// LinuxNamespace is the configuration for a Linux namespace +type LinuxNamespace struct { + // Type is the type of namespace + Type LinuxNamespaceType `json:"type"` + // Path is a path to an existing namespace persisted on disk that can be joined + // and is of the same type + Path string `json:"path,omitempty"` +} + +// LinuxNamespaceType is one of the Linux namespaces +type LinuxNamespaceType string + +const ( + // PIDNamespace for isolating process IDs + PIDNamespace LinuxNamespaceType = "pid" + // NetworkNamespace for isolating network devices, stacks, ports, etc + NetworkNamespace = "network" + // MountNamespace for isolating mount points + MountNamespace = "mount" + // IPCNamespace for isolating System V IPC, POSIX message queues + IPCNamespace = "ipc" + // UTSNamespace for isolating hostname and NIS domain name + UTSNamespace = "uts" + // UserNamespace for isolating user and group IDs + UserNamespace = "user" + // CgroupNamespace for isolating cgroup hierarchies + CgroupNamespace = "cgroup" +) + +// LinuxIDMapping specifies UID/GID mappings +type LinuxIDMapping struct { + // ContainerID is the starting UID/GID in the container + ContainerID uint32 `json:"containerID"` + // HostID is the starting UID/GID on the host to be mapped to 'ContainerID' + HostID uint32 `json:"hostID"` + // Size is the number of IDs to be mapped + Size uint32 `json:"size"` +} + +// POSIXRlimit type and restrictions +type POSIXRlimit struct { + // Type of the rlimit to set + Type string `json:"type"` + // Hard is the hard limit for the specified type + Hard uint64 `json:"hard"` + // Soft is the soft limit for the specified type + Soft uint64 `json:"soft"` +} + +// LinuxHugepageLimit structure corresponds to limiting kernel hugepages +type LinuxHugepageLimit struct { + // Pagesize is the hugepage size + Pagesize string `json:"pageSize"` + // Limit is the limit of "hugepagesize" hugetlb usage + Limit uint64 `json:"limit"` +} + +// LinuxInterfacePriority for network interfaces +type LinuxInterfacePriority struct { + // Name is the name of the network interface + Name string `json:"name"` + // Priority for the interface + Priority uint32 `json:"priority"` +} + +// linuxBlockIODevice holds major:minor format supported in blkio cgroup +type linuxBlockIODevice struct { + // Major is the device's major number. + Major int64 `json:"major"` + // Minor is the device's minor number. + Minor int64 `json:"minor"` +} + +// LinuxWeightDevice struct holds a `major:minor weight` pair for weightDevice +type LinuxWeightDevice struct { + linuxBlockIODevice + // Weight is the bandwidth rate for the device. + Weight *uint16 `json:"weight,omitempty"` + // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, CFQ scheduler only + LeafWeight *uint16 `json:"leafWeight,omitempty"` +} + +// LinuxThrottleDevice struct holds a `major:minor rate_per_second` pair +type LinuxThrottleDevice struct { + linuxBlockIODevice + // Rate is the IO rate limit per cgroup per device + Rate uint64 `json:"rate"` +} + +// LinuxBlockIO for Linux cgroup 'blkio' resource management +type LinuxBlockIO struct { + // Specifies per cgroup weight + Weight *uint16 `json:"weight,omitempty"` + // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, CFQ scheduler only + LeafWeight *uint16 `json:"leafWeight,omitempty"` + // Weight per cgroup per device, can override BlkioWeight + WeightDevice []LinuxWeightDevice `json:"weightDevice,omitempty"` + // IO read rate limit per cgroup per device, bytes per second + ThrottleReadBpsDevice []LinuxThrottleDevice `json:"throttleReadBpsDevice,omitempty"` + // IO write rate limit per cgroup per device, bytes per second + ThrottleWriteBpsDevice []LinuxThrottleDevice `json:"throttleWriteBpsDevice,omitempty"` + // IO read rate limit per cgroup per device, IO per second + ThrottleReadIOPSDevice []LinuxThrottleDevice `json:"throttleReadIOPSDevice,omitempty"` + // IO write rate limit per cgroup per device, IO per second + ThrottleWriteIOPSDevice []LinuxThrottleDevice `json:"throttleWriteIOPSDevice,omitempty"` +} + +// LinuxMemory for Linux cgroup 'memory' resource management +type LinuxMemory struct { + // Memory limit (in bytes). + Limit *int64 `json:"limit,omitempty"` + // Memory reservation or soft_limit (in bytes). + Reservation *int64 `json:"reservation,omitempty"` + // Total memory limit (memory + swap). + Swap *int64 `json:"swap,omitempty"` + // Kernel memory limit (in bytes). + Kernel *int64 `json:"kernel,omitempty"` + // Kernel memory limit for tcp (in bytes) + KernelTCP *int64 `json:"kernelTCP,omitempty"` + // How aggressive the kernel will swap memory pages. + Swappiness *uint64 `json:"swappiness,omitempty"` + // DisableOOMKiller disables the OOM killer for out of memory conditions + DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"` +} + +// LinuxCPU for Linux cgroup 'cpu' resource management +type LinuxCPU struct { + // CPU shares (relative weight (ratio) vs. other cgroups with cpu shares). + Shares *uint64 `json:"shares,omitempty"` + // CPU hardcap limit (in usecs). Allowed cpu time in a given period. + Quota *int64 `json:"quota,omitempty"` + // CPU period to be used for hardcapping (in usecs). + Period *uint64 `json:"period,omitempty"` + // How much time realtime scheduling may use (in usecs). + RealtimeRuntime *int64 `json:"realtimeRuntime,omitempty"` + // CPU period to be used for realtime scheduling (in usecs). + RealtimePeriod *uint64 `json:"realtimePeriod,omitempty"` + // CPUs to use within the cpuset. Default is to use any CPU available. + Cpus string `json:"cpus,omitempty"` + // List of memory nodes in the cpuset. Default is to use any available memory node. + Mems string `json:"mems,omitempty"` +} + +// LinuxPids for Linux cgroup 'pids' resource management (Linux 4.3) +type LinuxPids struct { + // Maximum number of PIDs. Default is "no limit". + Limit int64 `json:"limit"` +} + +// LinuxNetwork identification and priority configuration +type LinuxNetwork struct { + // Set class identifier for container's network packets + ClassID *uint32 `json:"classID,omitempty"` + // Set priority of network traffic for container + Priorities []LinuxInterfacePriority `json:"priorities,omitempty"` +} + +// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11) +type LinuxRdma struct { + // Maximum number of HCA handles that can be opened. Default is "no limit". + HcaHandles *uint32 `json:"hcaHandles,omitempty"` + // Maximum number of HCA objects that can be created. Default is "no limit". + HcaObjects *uint32 `json:"hcaObjects,omitempty"` +} + +// LinuxResources has container runtime resource constraints +type LinuxResources struct { + // Devices configures the device whitelist. + Devices []LinuxDeviceCgroup `json:"devices,omitempty"` + // Memory restriction configuration + Memory *LinuxMemory `json:"memory,omitempty"` + // CPU resource restriction configuration + CPU *LinuxCPU `json:"cpu,omitempty"` + // Task resource restriction configuration. + Pids *LinuxPids `json:"pids,omitempty"` + // BlockIO restriction configuration + BlockIO *LinuxBlockIO `json:"blockIO,omitempty"` + // Hugetlb limit (in bytes) + HugepageLimits []LinuxHugepageLimit `json:"hugepageLimits,omitempty"` + // Network restriction configuration + Network *LinuxNetwork `json:"network,omitempty"` + // Rdma resource restriction configuration. + // Limits are a set of key value pairs that define RDMA resource limits, + // where the key is device name and value is resource limits. + Rdma map[string]LinuxRdma `json:"rdma,omitempty"` +} + +// LinuxDevice represents the mknod information for a Linux special device file +type LinuxDevice struct { + // Path to the device. + Path string `json:"path"` + // Device type, block, char, etc. + Type string `json:"type"` + // Major is the device's major number. + Major int64 `json:"major"` + // Minor is the device's minor number. + Minor int64 `json:"minor"` + // FileMode permission bits for the device. + FileMode *os.FileMode `json:"fileMode,omitempty"` + // UID of the device. + UID *uint32 `json:"uid,omitempty"` + // Gid of the device. + GID *uint32 `json:"gid,omitempty"` +} + +// LinuxDeviceCgroup represents a device rule for the whitelist controller +type LinuxDeviceCgroup struct { + // Allow or deny + Allow bool `json:"allow"` + // Device type, block, char, etc. + Type string `json:"type,omitempty"` + // Major is the device's major number. + Major *int64 `json:"major,omitempty"` + // Minor is the device's minor number. + Minor *int64 `json:"minor,omitempty"` + // Cgroup access permissions format, rwm. + Access string `json:"access,omitempty"` +} + +// Solaris contains platform-specific configuration for Solaris application containers. +type Solaris struct { + // SMF FMRI which should go "online" before we start the container process. + Milestone string `json:"milestone,omitempty"` + // Maximum set of privileges any process in this container can obtain. + LimitPriv string `json:"limitpriv,omitempty"` + // The maximum amount of shared memory allowed for this container. + MaxShmMemory string `json:"maxShmMemory,omitempty"` + // Specification for automatic creation of network resources for this container. + Anet []SolarisAnet `json:"anet,omitempty"` + // Set limit on the amount of CPU time that can be used by container. + CappedCPU *SolarisCappedCPU `json:"cappedCPU,omitempty"` + // The physical and swap caps on the memory that can be used by this container. + CappedMemory *SolarisCappedMemory `json:"cappedMemory,omitempty"` +} + +// SolarisCappedCPU allows users to set limit on the amount of CPU time that can be used by container. +type SolarisCappedCPU struct { + Ncpus string `json:"ncpus,omitempty"` +} + +// SolarisCappedMemory allows users to set the physical and swap caps on the memory that can be used by this container. +type SolarisCappedMemory struct { + Physical string `json:"physical,omitempty"` + Swap string `json:"swap,omitempty"` +} + +// SolarisAnet provides the specification for automatic creation of network resources for this container. +type SolarisAnet struct { + // Specify a name for the automatically created VNIC datalink. + Linkname string `json:"linkname,omitempty"` + // Specify the link over which the VNIC will be created. + Lowerlink string `json:"lowerLink,omitempty"` + // The set of IP addresses that the container can use. + Allowedaddr string `json:"allowedAddress,omitempty"` + // Specifies whether allowedAddress limitation is to be applied to the VNIC. + Configallowedaddr string `json:"configureAllowedAddress,omitempty"` + // The value of the optional default router. + Defrouter string `json:"defrouter,omitempty"` + // Enable one or more types of link protection. + Linkprotection string `json:"linkProtection,omitempty"` + // Set the VNIC's macAddress + Macaddress string `json:"macAddress,omitempty"` +} + +// Windows defines the runtime configuration for Windows based containers, including Hyper-V containers. +type Windows struct { + // LayerFolders contains a list of absolute paths to directories containing image layers. + LayerFolders []string `json:"layerFolders"` + // Devices are the list of devices to be mapped into the container. + Devices []WindowsDevice `json:"devices,omitempty"` + // Resources contains information for handling resource constraints for the container. + Resources *WindowsResources `json:"resources,omitempty"` + // CredentialSpec contains a JSON object describing a group Managed Service Account (gMSA) specification. + CredentialSpec interface{} `json:"credentialSpec,omitempty"` + // Servicing indicates if the container is being started in a mode to apply a Windows Update servicing operation. + Servicing bool `json:"servicing,omitempty"` + // IgnoreFlushesDuringBoot indicates if the container is being started in a mode where disk writes are not flushed during its boot process. + IgnoreFlushesDuringBoot bool `json:"ignoreFlushesDuringBoot,omitempty"` + // HyperV contains information for running a container with Hyper-V isolation. + HyperV *WindowsHyperV `json:"hyperv,omitempty"` + // Network restriction configuration. + Network *WindowsNetwork `json:"network,omitempty"` +} + +// WindowsDevice represents information about a host device to be mapped into the container. +type WindowsDevice struct { + // Device identifier: interface class GUID, etc. + ID string `json:"id"` + // Device identifier type: "class", etc. + IDType string `json:"idType"` +} + +// WindowsResources has container runtime resource constraints for containers running on Windows. +type WindowsResources struct { + // Memory restriction configuration. + Memory *WindowsMemoryResources `json:"memory,omitempty"` + // CPU resource restriction configuration. + CPU *WindowsCPUResources `json:"cpu,omitempty"` + // Storage restriction configuration. + Storage *WindowsStorageResources `json:"storage,omitempty"` +} + +// WindowsMemoryResources contains memory resource management settings. +type WindowsMemoryResources struct { + // Memory limit in bytes. + Limit *uint64 `json:"limit,omitempty"` +} + +// WindowsCPUResources contains CPU resource management settings. +type WindowsCPUResources struct { + // Number of CPUs available to the container. + Count *uint64 `json:"count,omitempty"` + // CPU shares (relative weight to other containers with cpu shares). + Shares *uint16 `json:"shares,omitempty"` + // Specifies the portion of processor cycles that this container can use as a percentage times 100. + Maximum *uint16 `json:"maximum,omitempty"` +} + +// WindowsStorageResources contains storage resource management settings. +type WindowsStorageResources struct { + // Specifies maximum Iops for the system drive. + Iops *uint64 `json:"iops,omitempty"` + // Specifies maximum bytes per second for the system drive. + Bps *uint64 `json:"bps,omitempty"` + // Sandbox size specifies the minimum size of the system drive in bytes. + SandboxSize *uint64 `json:"sandboxSize,omitempty"` +} + +// WindowsNetwork contains network settings for Windows containers. +type WindowsNetwork struct { + // List of HNS endpoints that the container should connect to. + EndpointList []string `json:"endpointList,omitempty"` + // Specifies if unqualified DNS name resolution is allowed. + AllowUnqualifiedDNSQuery bool `json:"allowUnqualifiedDNSQuery,omitempty"` + // Comma separated list of DNS suffixes to use for name resolution. + DNSSearchList []string `json:"DNSSearchList,omitempty"` + // Name (ID) of the container that we will share with the network stack. + NetworkSharedContainerName string `json:"networkSharedContainerName,omitempty"` + // name (ID) of the network namespace that will be used for the container. + NetworkNamespace string `json:"networkNamespace,omitempty"` +} + +// WindowsHyperV contains information for configuring a container to run with Hyper-V isolation. +type WindowsHyperV struct { + // UtilityVMPath is an optional path to the image used for the Utility VM. + UtilityVMPath string `json:"utilityVMPath,omitempty"` +} + +// VM contains information for virtual-machine-based containers. +type VM struct { + // Hypervisor specifies hypervisor-related configuration for virtual-machine-based containers. + Hypervisor VMHypervisor `json:"hypervisor,omitempty"` + // Kernel specifies kernel-related configuration for virtual-machine-based containers. + Kernel VMKernel `json:"kernel"` + // Image specifies guest image related configuration for virtual-machine-based containers. + Image VMImage `json:"image,omitempty"` +} + +// VMHypervisor contains information about the hypervisor to use for a virtual machine. +type VMHypervisor struct { + // Path is the host path to the hypervisor used to manage the virtual machine. + Path string `json:"path"` + // Parameters specifies parameters to pass to the hypervisor. + Parameters []string `json:"parameters,omitempty"` +} + +// VMKernel contains information about the kernel to use for a virtual machine. +type VMKernel struct { + // Path is the host path to the kernel used to boot the virtual machine. + Path string `json:"path"` + // Parameters specifies parameters to pass to the kernel. + Parameters []string `json:"parameters,omitempty"` + // InitRD is the host path to an initial ramdisk to be used by the kernel. + InitRD string `json:"initrd,omitempty"` +} + +// VMImage contains information about the virtual machine root image. +type VMImage struct { + // Path is the host path to the root image that the VM kernel would boot into. + Path string `json:"path"` + // Format is the root image format type (e.g. "qcow2", "raw", "vhd", etc). + Format string `json:"format"` +} + +// LinuxSeccomp represents syscall restrictions +type LinuxSeccomp struct { + DefaultAction LinuxSeccompAction `json:"defaultAction"` + Architectures []Arch `json:"architectures,omitempty"` + Syscalls []LinuxSyscall `json:"syscalls,omitempty"` +} + +// Arch used for additional architectures +type Arch string + +// Additional architectures permitted to be used for system calls +// By default only the native architecture of the kernel is permitted +const ( + ArchX86 Arch = "SCMP_ARCH_X86" + ArchX86_64 Arch = "SCMP_ARCH_X86_64" + ArchX32 Arch = "SCMP_ARCH_X32" + ArchARM Arch = "SCMP_ARCH_ARM" + ArchAARCH64 Arch = "SCMP_ARCH_AARCH64" + ArchMIPS Arch = "SCMP_ARCH_MIPS" + ArchMIPS64 Arch = "SCMP_ARCH_MIPS64" + ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32" + ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL" + ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64" + ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32" + ArchPPC Arch = "SCMP_ARCH_PPC" + ArchPPC64 Arch = "SCMP_ARCH_PPC64" + ArchPPC64LE Arch = "SCMP_ARCH_PPC64LE" + ArchS390 Arch = "SCMP_ARCH_S390" + ArchS390X Arch = "SCMP_ARCH_S390X" + ArchPARISC Arch = "SCMP_ARCH_PARISC" + ArchPARISC64 Arch = "SCMP_ARCH_PARISC64" +) + +// LinuxSeccompAction taken upon Seccomp rule match +type LinuxSeccompAction string + +// Define actions for Seccomp rules +const ( + ActKill LinuxSeccompAction = "SCMP_ACT_KILL" + ActTrap LinuxSeccompAction = "SCMP_ACT_TRAP" + ActErrno LinuxSeccompAction = "SCMP_ACT_ERRNO" + ActTrace LinuxSeccompAction = "SCMP_ACT_TRACE" + ActAllow LinuxSeccompAction = "SCMP_ACT_ALLOW" +) + +// LinuxSeccompOperator used to match syscall arguments in Seccomp +type LinuxSeccompOperator string + +// Define operators for syscall arguments in Seccomp +const ( + OpNotEqual LinuxSeccompOperator = "SCMP_CMP_NE" + OpLessThan LinuxSeccompOperator = "SCMP_CMP_LT" + OpLessEqual LinuxSeccompOperator = "SCMP_CMP_LE" + OpEqualTo LinuxSeccompOperator = "SCMP_CMP_EQ" + OpGreaterEqual LinuxSeccompOperator = "SCMP_CMP_GE" + OpGreaterThan LinuxSeccompOperator = "SCMP_CMP_GT" + OpMaskedEqual LinuxSeccompOperator = "SCMP_CMP_MASKED_EQ" +) + +// LinuxSeccompArg used for matching specific syscall arguments in Seccomp +type LinuxSeccompArg struct { + Index uint `json:"index"` + Value uint64 `json:"value"` + ValueTwo uint64 `json:"valueTwo,omitempty"` + Op LinuxSeccompOperator `json:"op"` +} + +// LinuxSyscall is used to match a syscall in Seccomp +type LinuxSyscall struct { + Names []string `json:"names"` + Action LinuxSeccompAction `json:"action"` + Args []LinuxSeccompArg `json:"args,omitempty"` +} + +// LinuxIntelRdt has container runtime resource constraints for Intel RDT +// CAT and MBA features which introduced in Linux 4.10 and 4.12 kernel +type LinuxIntelRdt struct { + // The identity for RDT Class of Service + ClosID string `json:"closID,omitempty"` + // The schema for L3 cache id and capacity bitmask (CBM) + // Format: "L3:=;=;..." + L3CacheSchema string `json:"l3CacheSchema,omitempty"` + + // The schema of memory bandwidth per L3 cache id + // Format: "MB:=bandwidth0;=bandwidth1;..." + // The unit of memory bandwidth is specified in "percentages" by + // default, and in "MBps" if MBA Software Controller is enabled. + MemBwSchema string `json:"memBwSchema,omitempty"` +} diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/state.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/state.go new file mode 100644 index 0000000..89dce34 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/state.go @@ -0,0 +1,17 @@ +package specs + +// State holds information about the runtime state of the container. +type State struct { + // Version is the version of the specification that is supported. + Version string `json:"ociVersion"` + // ID is the container ID + ID string `json:"id"` + // Status is the runtime status of the container. + Status string `json:"status"` + // Pid is the process ID for the container process. + Pid int `json:"pid,omitempty"` + // Bundle is the path to the container's bundle directory. + Bundle string `json:"bundle"` + // Annotations are key values associated with the container. + Annotations map[string]string `json:"annotations,omitempty"` +} diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go new file mode 100644 index 0000000..b920fc1 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go @@ -0,0 +1,18 @@ +package specs + +import "fmt" + +const ( + // VersionMajor is for an API incompatible changes + VersionMajor = 1 + // VersionMinor is for functionality in a backwards-compatible manner + VersionMinor = 0 + // VersionPatch is for backwards-compatible bug fixes + VersionPatch = 1 + + // VersionDev indicates development branch. Releases will be empty string. + VersionDev = "-dev" +) + +// Version is the specification version that the package types support. +var Version = fmt.Sprintf("%d.%d.%d%s", VersionMajor, VersionMinor, VersionPatch, VersionDev) diff --git a/vendor/github.com/opencontainers/selinux/LICENSE b/vendor/github.com/opencontainers/selinux/LICENSE new file mode 100644 index 0000000..8dada3e --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/opencontainers/selinux/README.md b/vendor/github.com/opencontainers/selinux/README.md new file mode 100644 index 0000000..04594bd --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/README.md @@ -0,0 +1,18 @@ +# selinux + +[![GoDoc](https://godoc.org/github.com/opencontainers/selinux?status.svg)](https://godoc.org/github.com/opencontainers/selinux) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/selinux)](https://goreportcard.com/report/github.com/opencontainers/selinux) [![Build Status](https://travis-ci.org/opencontainers/selinux.svg?branch=master)](https://travis-ci.org/opencontainers/selinux) + +Common SELinux package used across the container ecosystem. + +Please see the [godoc](https://godoc.org/github.com/opencontainers/selinux) for more information. + +## Code of Conduct + +Participation in the OpenContainers community is governed by [OpenContainer's Code of Conduct][code-of-conduct]. + +## Security + +If you find an issue, please follow the [security][security] protocol to report it. + +[security]: https://github.com/opencontainers/org/blob/master/security +[code-of-conduct]: https://github.com/opencontainers/org/blob/master/CODE_OF_CONDUCT.md diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go new file mode 100644 index 0000000..e178568 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go @@ -0,0 +1,109 @@ +// +build !selinux !linux + +package label + +// InitLabels returns the process label and file labels to be used within +// the container. A list of options can be passed into this function to alter +// the labels. +func InitLabels(options []string) (string, string, error) { + return "", "", nil +} + +func ROMountLabel() string { + return "" +} + +func GenLabels(options string) (string, string, error) { + return "", "", nil +} + +func FormatMountLabel(src string, mountLabel string) string { + return src +} + +func SetProcessLabel(processLabel string) error { + return nil +} + +func ProcessLabel() (string, error) { + return "", nil +} + +func SetSocketLabel(processLabel string) error { + return nil +} + +func SocketLabel() (string, error) { + return "", nil +} + +func SetKeyLabel(processLabel string) error { + return nil +} + +func KeyLabel() (string, error) { + return "", nil +} + +func FileLabel(path string) (string, error) { + return "", nil +} + +func SetFileLabel(path string, fileLabel string) error { + return nil +} + +func SetFileCreateLabel(fileLabel string) error { + return nil +} + +func Relabel(path string, fileLabel string, shared bool) error { + return nil +} + +func PidLabel(pid int) (string, error) { + return "", nil +} + +func Init() { +} + +// ClearLabels clears all reserved labels +func ClearLabels() { + return +} + +func ReserveLabel(label string) error { + return nil +} + +func ReleaseLabel(label string) error { + return nil +} + +// DupSecOpt takes a process label and returns security options that +// can be used to set duplicate labels on future container processes +func DupSecOpt(src string) ([]string, error) { + return nil, nil +} + +// DisableSecOpt returns a security opt that can disable labeling +// support for future container processes +func DisableSecOpt() []string { + return nil +} + +// Validate checks that the label does not include unexpected options +func Validate(label string) error { + return nil +} + +// RelabelNeeded checks whether the user requested a relabel +func RelabelNeeded(label string) bool { + return false +} + +// IsShared checks that the label includes a "shared" mark +func IsShared(label string) bool { + return false +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_selinux.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_selinux.go new file mode 100644 index 0000000..2730fcf --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_selinux.go @@ -0,0 +1,291 @@ +// +build selinux,linux + +package label + +import ( + "fmt" + "os" + "os/user" + "strings" + + "github.com/opencontainers/selinux/go-selinux" +) + +// Valid Label Options +var validOptions = map[string]bool{ + "disable": true, + "type": true, + "filetype": true, + "user": true, + "role": true, + "level": true, +} + +var ErrIncompatibleLabel = fmt.Errorf("Bad SELinux option z and Z can not be used together") + +// InitLabels returns the process label and file labels to be used within +// the container. A list of options can be passed into this function to alter +// the labels. The labels returned will include a random MCS String, that is +// guaranteed to be unique. +func InitLabels(options []string) (plabel string, mlabel string, Err error) { + if !selinux.GetEnabled() { + return "", "", nil + } + processLabel, mountLabel := selinux.ContainerLabels() + if processLabel != "" { + defer func() { + if Err != nil { + ReleaseLabel(mountLabel) + } + }() + pcon, err := selinux.NewContext(processLabel) + if err != nil { + return "", "", err + } + + mcon, err := selinux.NewContext(mountLabel) + if err != nil { + return "", "", err + } + for _, opt := range options { + if opt == "disable" { + return "", mountLabel, nil + } + if i := strings.Index(opt, ":"); i == -1 { + return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type, filetype' followed by ':' and a value", opt) + } + con := strings.SplitN(opt, ":", 2) + if !validOptions[con[0]] { + return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type, filetype'", con[0]) + + } + if con[0] == "filetype" { + mcon["type"] = con[1] + } + pcon[con[0]] = con[1] + if con[0] == "level" || con[0] == "user" { + mcon[con[0]] = con[1] + } + } + _ = ReleaseLabel(processLabel) + processLabel = pcon.Get() + mountLabel = mcon.Get() + _ = ReserveLabel(processLabel) + } + return processLabel, mountLabel, nil +} + +func ROMountLabel() string { + return selinux.ROFileLabel() +} + +// DEPRECATED: The GenLabels function is only to be used during the transition to the official API. +func GenLabels(options string) (string, string, error) { + return InitLabels(strings.Fields(options)) +} + +// FormatMountLabel returns a string to be used by the mount command. +// The format of this string will be used to alter the labeling of the mountpoint. +// The string returned is suitable to be used as the options field of the mount command. +// If you need to have additional mount point options, you can pass them in as +// the first parameter. Second parameter is the label that you wish to apply +// to all content in the mount point. +func FormatMountLabel(src, mountLabel string) string { + if mountLabel != "" { + switch src { + case "": + src = fmt.Sprintf("context=%q", mountLabel) + default: + src = fmt.Sprintf("%s,context=%q", src, mountLabel) + } + } + return src +} + +// SetProcessLabel takes a process label and tells the kernel to assign the +// label to the next program executed by the current process. +func SetProcessLabel(processLabel string) error { + return selinux.SetExecLabel(processLabel) +} + +// SetSocketLabel takes a process label and tells the kernel to assign the +// label to the next socket that gets created +func SetSocketLabel(processLabel string) error { + return selinux.SetSocketLabel(processLabel) +} + +// SocketLabel retrieves the current default socket label setting +func SocketLabel() (string, error) { + return selinux.SocketLabel() +} + +// SetKeyLabel takes a process label and tells the kernel to assign the +// label to the next kernel keyring that gets created +func SetKeyLabel(processLabel string) error { + return selinux.SetKeyLabel(processLabel) +} + +// KeyLabel retrieves the current default kernel keyring label setting +func KeyLabel() (string, error) { + return selinux.KeyLabel() +} + +// ProcessLabel returns the process label that the kernel will assign +// to the next program executed by the current process. If "" is returned +// this indicates that the default labeling will happen for the process. +func ProcessLabel() (string, error) { + return selinux.ExecLabel() +} + +// FileLabel returns the label for specified path +func FileLabel(path string) (string, error) { + return selinux.FileLabel(path) +} + +// SetFileLabel modifies the "path" label to the specified file label +func SetFileLabel(path string, fileLabel string) error { + if selinux.GetEnabled() && fileLabel != "" { + return selinux.SetFileLabel(path, fileLabel) + } + return nil +} + +// SetFileCreateLabel tells the kernel the label for all files to be created +func SetFileCreateLabel(fileLabel string) error { + if selinux.GetEnabled() { + return selinux.SetFSCreateLabel(fileLabel) + } + return nil +} + +// Relabel changes the label of path to the filelabel string. +// It changes the MCS label to s0 if shared is true. +// This will allow all containers to share the content. +func Relabel(path string, fileLabel string, shared bool) error { + if !selinux.GetEnabled() { + return nil + } + + if fileLabel == "" { + return nil + } + + exclude_paths := map[string]bool{ + "/": true, + "/bin": true, + "/boot": true, + "/dev": true, + "/etc": true, + "/etc/passwd": true, + "/etc/pki": true, + "/etc/shadow": true, + "/home": true, + "/lib": true, + "/lib64": true, + "/media": true, + "/opt": true, + "/proc": true, + "/root": true, + "/run": true, + "/sbin": true, + "/srv": true, + "/sys": true, + "/tmp": true, + "/usr": true, + "/var": true, + "/var/lib": true, + "/var/log": true, + } + + if home := os.Getenv("HOME"); home != "" { + exclude_paths[home] = true + } + + if sudoUser := os.Getenv("SUDO_USER"); sudoUser != "" { + if usr, err := user.Lookup(sudoUser); err == nil { + exclude_paths[usr.HomeDir] = true + } + } + + if path != "/" { + path = strings.TrimSuffix(path, "/") + } + if exclude_paths[path] { + return fmt.Errorf("SELinux relabeling of %s is not allowed", path) + } + + if shared { + c, err := selinux.NewContext(fileLabel) + if err != nil { + return err + } + + c["level"] = "s0" + fileLabel = c.Get() + } + if err := selinux.Chcon(path, fileLabel, true); err != nil { + return err + } + return nil +} + +// PidLabel will return the label of the process running with the specified pid +func PidLabel(pid int) (string, error) { + return selinux.PidLabel(pid) +} + +// Init initialises the labeling system +func Init() { + selinux.GetEnabled() +} + +// ClearLabels will clear all reserved labels +func ClearLabels() { + selinux.ClearLabels() +} + +// ReserveLabel will record the fact that the MCS label has already been used. +// This will prevent InitLabels from using the MCS label in a newly created +// container +func ReserveLabel(label string) error { + selinux.ReserveLabel(label) + return nil +} + +// ReleaseLabel will remove the reservation of the MCS label. +// This will allow InitLabels to use the MCS label in a newly created +// containers +func ReleaseLabel(label string) error { + selinux.ReleaseLabel(label) + return nil +} + +// DupSecOpt takes a process label and returns security options that +// can be used to set duplicate labels on future container processes +func DupSecOpt(src string) ([]string, error) { + return selinux.DupSecOpt(src) +} + +// DisableSecOpt returns a security opt that can disable labeling +// support for future container processes +func DisableSecOpt() []string { + return selinux.DisableSecOpt() +} + +// Validate checks that the label does not include unexpected options +func Validate(label string) error { + if strings.Contains(label, "z") && strings.Contains(label, "Z") { + return ErrIncompatibleLabel + } + return nil +} + +// RelabelNeeded checks whether the user requested a relabel +func RelabelNeeded(label string) bool { + return strings.Contains(label, "z") || strings.Contains(label, "Z") +} + +// IsShared checks that the label includes a "shared" mark +func IsShared(label string) bool { + return strings.Contains(label, "z") +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go new file mode 100644 index 0000000..8cdf1b0 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go @@ -0,0 +1,813 @@ +// +build selinux,linux + +package selinux + +import ( + "bufio" + "bytes" + "crypto/rand" + "encoding/binary" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "sync" + "syscall" + + "golang.org/x/sys/unix" +) + +const ( + // Enforcing constant indicate SELinux is in enforcing mode + Enforcing = 1 + // Permissive constant to indicate SELinux is in permissive mode + Permissive = 0 + // Disabled constant to indicate SELinux is disabled + Disabled = -1 + + selinuxDir = "/etc/selinux/" + selinuxConfig = selinuxDir + "config" + selinuxfsMount = "/sys/fs/selinux" + selinuxTypeTag = "SELINUXTYPE" + selinuxTag = "SELINUX" + xattrNameSelinux = "security.selinux" + stRdOnly = 0x01 + selinuxfsMagic = 0xf97cff8c +) + +type selinuxState struct { + enabledSet bool + enabled bool + selinuxfsSet bool + selinuxfs string + mcsList map[string]bool + sync.Mutex +} + +var ( + // ErrMCSAlreadyExists is returned when trying to allocate a duplicate MCS. + ErrMCSAlreadyExists = errors.New("MCS label already exists") + // ErrEmptyPath is returned when an empty path has been specified. + ErrEmptyPath = errors.New("empty path") + // InvalidLabel is returned when an invalid label is specified. + InvalidLabel = errors.New("Invalid Label") + + assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`) + roFileLabel string + state = selinuxState{ + mcsList: make(map[string]bool), + } +) + +// Context is a representation of the SELinux label broken into 4 parts +type Context map[string]string + +func (s *selinuxState) setEnable(enabled bool) bool { + s.Lock() + defer s.Unlock() + s.enabledSet = true + s.enabled = enabled + return s.enabled +} + +func (s *selinuxState) getEnabled() bool { + s.Lock() + enabled := s.enabled + enabledSet := s.enabledSet + s.Unlock() + if enabledSet { + return enabled + } + + enabled = false + if fs := getSelinuxMountPoint(); fs != "" { + if con, _ := CurrentLabel(); con != "kernel" { + enabled = true + } + } + return s.setEnable(enabled) +} + +// SetDisabled disables selinux support for the package +func SetDisabled() { + state.setEnable(false) +} + +func (s *selinuxState) setSELinuxfs(selinuxfs string) string { + s.Lock() + defer s.Unlock() + s.selinuxfsSet = true + s.selinuxfs = selinuxfs + return s.selinuxfs +} + +func verifySELinuxfsMount(mnt string) bool { + var buf syscall.Statfs_t + for { + err := syscall.Statfs(mnt, &buf) + if err == nil { + break + } + if err == syscall.EAGAIN { + continue + } + return false + } + if uint32(buf.Type) != uint32(selinuxfsMagic) { + return false + } + if (buf.Flags & stRdOnly) != 0 { + return false + } + + return true +} + +func findSELinuxfs() string { + // fast path: check the default mount first + if verifySELinuxfsMount(selinuxfsMount) { + return selinuxfsMount + } + + // check if selinuxfs is available before going the slow path + fs, err := ioutil.ReadFile("/proc/filesystems") + if err != nil { + return "" + } + if !bytes.Contains(fs, []byte("\tselinuxfs\n")) { + return "" + } + + // slow path: try to find among the mounts + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "" + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for { + mnt := findSELinuxfsMount(scanner) + if mnt == "" { // error or not found + return "" + } + if verifySELinuxfsMount(mnt) { + return mnt + } + } +} + +// findSELinuxfsMount returns a next selinuxfs mount point found, +// if there is one, or an empty string in case of EOF or error. +func findSELinuxfsMount(s *bufio.Scanner) string { + for s.Scan() { + txt := s.Text() + // The first field after - is fs type. + // Safe as spaces in mountpoints are encoded as \040 + if !strings.Contains(txt, " - selinuxfs ") { + continue + } + const mPos = 5 // mount point is 5th field + fields := strings.SplitN(txt, " ", mPos+1) + if len(fields) < mPos+1 { + continue + } + return fields[mPos-1] + } + + return "" +} + +func (s *selinuxState) getSELinuxfs() string { + s.Lock() + selinuxfs := s.selinuxfs + selinuxfsSet := s.selinuxfsSet + s.Unlock() + if selinuxfsSet { + return selinuxfs + } + + return s.setSELinuxfs(findSELinuxfs()) +} + +// getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs +// filesystem or an empty string if no mountpoint is found. Selinuxfs is +// a proc-like pseudo-filesystem that exposes the selinux policy API to +// processes. The existence of an selinuxfs mount is used to determine +// whether selinux is currently enabled or not. +func getSelinuxMountPoint() string { + return state.getSELinuxfs() +} + +// GetEnabled returns whether selinux is currently enabled. +func GetEnabled() bool { + return state.getEnabled() +} + +func readConfig(target string) string { + var ( + val, key string + bufin *bufio.Reader + ) + + in, err := os.Open(selinuxConfig) + if err != nil { + return "" + } + defer in.Close() + + bufin = bufio.NewReader(in) + + for done := false; !done; { + var line string + if line, err = bufin.ReadString('\n'); err != nil { + if err != io.EOF { + return "" + } + done = true + } + line = strings.TrimSpace(line) + if len(line) == 0 { + // Skip blank lines + continue + } + if line[0] == ';' || line[0] == '#' { + // Skip comments + continue + } + if groups := assignRegex.FindStringSubmatch(line); groups != nil { + key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) + if key == target { + return strings.Trim(val, "\"") + } + } + } + return "" +} + +func getSELinuxPolicyRoot() string { + return filepath.Join(selinuxDir, readConfig(selinuxTypeTag)) +} + +func isProcHandle(fh *os.File) (bool, error) { + var buf unix.Statfs_t + err := unix.Fstatfs(int(fh.Fd()), &buf) + return buf.Type == unix.PROC_SUPER_MAGIC, err +} + +func readCon(fpath string) (string, error) { + if fpath == "" { + return "", ErrEmptyPath + } + + in, err := os.Open(fpath) + if err != nil { + return "", err + } + defer in.Close() + + if ok, err := isProcHandle(in); err != nil { + return "", err + } else if !ok { + return "", fmt.Errorf("%s not on procfs", fpath) + } + + var retval string + if _, err := fmt.Fscanf(in, "%s", &retval); err != nil { + return "", err + } + return strings.Trim(retval, "\x00"), nil +} + +// SetFileLabel sets the SELinux label for this path or returns an error. +func SetFileLabel(fpath string, label string) error { + if fpath == "" { + return ErrEmptyPath + } + return lsetxattr(fpath, xattrNameSelinux, []byte(label), 0) +} + +// FileLabel returns the SELinux label for this path or returns an error. +func FileLabel(fpath string) (string, error) { + if fpath == "" { + return "", ErrEmptyPath + } + + label, err := lgetxattr(fpath, xattrNameSelinux) + if err != nil { + return "", err + } + // Trim the NUL byte at the end of the byte buffer, if present. + if len(label) > 0 && label[len(label)-1] == '\x00' { + label = label[:len(label)-1] + } + return string(label), nil +} + +/* +SetFSCreateLabel tells kernel the label to create all file system objects +created by this task. Setting label="" to return to default. +*/ +func SetFSCreateLabel(label string) error { + return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()), label) +} + +/* +FSCreateLabel returns the default label the kernel which the kernel is using +for file system objects created by this task. "" indicates default. +*/ +func FSCreateLabel() (string, error) { + return readCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid())) +} + +// CurrentLabel returns the SELinux label of the current process thread, or an error. +func CurrentLabel() (string, error) { + return readCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid())) +} + +// PidLabel returns the SELinux label of the given pid, or an error. +func PidLabel(pid int) (string, error) { + return readCon(fmt.Sprintf("/proc/%d/attr/current", pid)) +} + +/* +ExecLabel returns the SELinux label that the kernel will use for any programs +that are executed by the current process thread, or an error. +*/ +func ExecLabel() (string, error) { + return readCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid())) +} + +func writeCon(fpath string, val string) error { + if fpath == "" { + return ErrEmptyPath + } + if val == "" { + if !GetEnabled() { + return nil + } + } + + out, err := os.OpenFile(fpath, os.O_WRONLY, 0) + if err != nil { + return err + } + defer out.Close() + + if ok, err := isProcHandle(out); err != nil { + return err + } else if !ok { + return fmt.Errorf("%s not on procfs", fpath) + } + + if val != "" { + _, err = out.Write([]byte(val)) + } else { + _, err = out.Write(nil) + } + return err +} + +/* +CanonicalizeContext takes a context string and writes it to the kernel +the function then returns the context that the kernel will use. This function +can be used to see if two contexts are equivalent +*/ +func CanonicalizeContext(val string) (string, error) { + return readWriteCon(filepath.Join(getSelinuxMountPoint(), "context"), val) +} + +func readWriteCon(fpath string, val string) (string, error) { + if fpath == "" { + return "", ErrEmptyPath + } + f, err := os.OpenFile(fpath, os.O_RDWR, 0) + if err != nil { + return "", err + } + defer f.Close() + + _, err = f.Write([]byte(val)) + if err != nil { + return "", err + } + + var retval string + if _, err := fmt.Fscanf(f, "%s", &retval); err != nil { + return "", err + } + return strings.Trim(retval, "\x00"), nil +} + +/* +SetExecLabel sets the SELinux label that the kernel will use for any programs +that are executed by the current process thread, or an error. +*/ +func SetExecLabel(label string) error { + return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), label) +} + +/* +SetTaskLabel sets the SELinux label for the current thread, or an error. +This requires the dyntransition permission. +*/ +func SetTaskLabel(label string) error { + return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid()), label) +} + +// SetSocketLabel takes a process label and tells the kernel to assign the +// label to the next socket that gets created +func SetSocketLabel(label string) error { + return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/sockcreate", syscall.Gettid()), label) +} + +// SocketLabel retrieves the current socket label setting +func SocketLabel() (string, error) { + return readCon(fmt.Sprintf("/proc/self/task/%d/attr/sockcreate", syscall.Gettid())) +} + +// PeerLabel retrieves the label of the client on the other side of a socket +func PeerLabel(fd uintptr) (string, error) { + return unix.GetsockoptString(int(fd), syscall.SOL_SOCKET, syscall.SO_PEERSEC) +} + +// SetKeyLabel takes a process label and tells the kernel to assign the +// label to the next kernel keyring that gets created +func SetKeyLabel(label string) error { + err := writeCon("/proc/self/attr/keycreate", label) + if os.IsNotExist(err) { + return nil + } + if label == "" && os.IsPermission(err) && !GetEnabled() { + return nil + } + return err +} + +// KeyLabel retrieves the current kernel keyring label setting +func KeyLabel() (string, error) { + return readCon("/proc/self/attr/keycreate") +} + +// Get returns the Context as a string +func (c Context) Get() string { + if c["level"] != "" { + return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"]) + } + return fmt.Sprintf("%s:%s:%s", c["user"], c["role"], c["type"]) +} + +// NewContext creates a new Context struct from the specified label +func NewContext(label string) (Context, error) { + c := make(Context) + + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + if len(con) < 3 { + return c, InvalidLabel + } + c["user"] = con[0] + c["role"] = con[1] + c["type"] = con[2] + if len(con) > 3 { + c["level"] = con[3] + } + } + return c, nil +} + +// ClearLabels clears all reserved labels +func ClearLabels() { + state.Lock() + state.mcsList = make(map[string]bool) + state.Unlock() +} + +// ReserveLabel reserves the MLS/MCS level component of the specified label +func ReserveLabel(label string) { + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + if len(con) > 3 { + mcsAdd(con[3]) + } + } +} + +func selinuxEnforcePath() string { + return fmt.Sprintf("%s/enforce", getSelinuxMountPoint()) +} + +// EnforceMode returns the current SELinux mode Enforcing, Permissive, Disabled +func EnforceMode() int { + var enforce int + + enforceS, err := readCon(selinuxEnforcePath()) + if err != nil { + return -1 + } + + enforce, err = strconv.Atoi(string(enforceS)) + if err != nil { + return -1 + } + return enforce +} + +/* +SetEnforceMode sets the current SELinux mode Enforcing, Permissive. +Disabled is not valid, since this needs to be set at boot time. +*/ +func SetEnforceMode(mode int) error { + return writeCon(selinuxEnforcePath(), fmt.Sprintf("%d", mode)) +} + +/* +DefaultEnforceMode returns the systems default SELinux mode Enforcing, +Permissive or Disabled. Note this is is just the default at boot time. +EnforceMode tells you the systems current mode. +*/ +func DefaultEnforceMode() int { + switch readConfig(selinuxTag) { + case "enforcing": + return Enforcing + case "permissive": + return Permissive + } + return Disabled +} + +func mcsAdd(mcs string) error { + if mcs == "" { + return nil + } + state.Lock() + defer state.Unlock() + if state.mcsList[mcs] { + return ErrMCSAlreadyExists + } + state.mcsList[mcs] = true + return nil +} + +func mcsDelete(mcs string) { + if mcs == "" { + return + } + state.Lock() + defer state.Unlock() + state.mcsList[mcs] = false +} + +func intToMcs(id int, catRange uint32) string { + var ( + SETSIZE = int(catRange) + TIER = SETSIZE + ORD = id + ) + + if id < 1 || id > 523776 { + return "" + } + + for ORD > TIER { + ORD = ORD - TIER + TIER-- + } + TIER = SETSIZE - TIER + ORD = ORD + TIER + return fmt.Sprintf("s0:c%d,c%d", TIER, ORD) +} + +func uniqMcs(catRange uint32) string { + var ( + n uint32 + c1, c2 uint32 + mcs string + ) + + for { + binary.Read(rand.Reader, binary.LittleEndian, &n) + c1 = n % catRange + binary.Read(rand.Reader, binary.LittleEndian, &n) + c2 = n % catRange + if c1 == c2 { + continue + } else { + if c1 > c2 { + c1, c2 = c2, c1 + } + } + mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2) + if err := mcsAdd(mcs); err != nil { + continue + } + break + } + return mcs +} + +/* +ReleaseLabel will unreserve the MLS/MCS Level field of the specified label. +Allowing it to be used by another process. +*/ +func ReleaseLabel(label string) { + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + if len(con) > 3 { + mcsDelete(con[3]) + } + } +} + +// ROFileLabel returns the specified SELinux readonly file label +func ROFileLabel() string { + return roFileLabel +} + +/* +ContainerLabels returns an allocated processLabel and fileLabel to be used for +container labeling by the calling process. +*/ +func ContainerLabels() (processLabel string, fileLabel string) { + var ( + val, key string + bufin *bufio.Reader + ) + + if !GetEnabled() { + return "", "" + } + lxcPath := fmt.Sprintf("%s/contexts/lxc_contexts", getSELinuxPolicyRoot()) + in, err := os.Open(lxcPath) + if err != nil { + return "", "" + } + defer in.Close() + + bufin = bufio.NewReader(in) + + for done := false; !done; { + var line string + if line, err = bufin.ReadString('\n'); err != nil { + if err == io.EOF { + done = true + } else { + goto exit + } + } + line = strings.TrimSpace(line) + if len(line) == 0 { + // Skip blank lines + continue + } + if line[0] == ';' || line[0] == '#' { + // Skip comments + continue + } + if groups := assignRegex.FindStringSubmatch(line); groups != nil { + key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2]) + if key == "process" { + processLabel = strings.Trim(val, "\"") + } + if key == "file" { + fileLabel = strings.Trim(val, "\"") + } + if key == "ro_file" { + roFileLabel = strings.Trim(val, "\"") + } + } + } + + if processLabel == "" || fileLabel == "" { + return "", "" + } + + if roFileLabel == "" { + roFileLabel = fileLabel + } +exit: + scon, _ := NewContext(processLabel) + if scon["level"] != "" { + mcs := uniqMcs(1024) + scon["level"] = mcs + processLabel = scon.Get() + scon, _ = NewContext(fileLabel) + scon["level"] = mcs + fileLabel = scon.Get() + } + return processLabel, fileLabel +} + +// SecurityCheckContext validates that the SELinux label is understood by the kernel +func SecurityCheckContext(val string) error { + return writeCon(fmt.Sprintf("%s/context", getSelinuxMountPoint()), val) +} + +/* +CopyLevel returns a label with the MLS/MCS level from src label replaced on +the dest label. +*/ +func CopyLevel(src, dest string) (string, error) { + if src == "" { + return "", nil + } + if err := SecurityCheckContext(src); err != nil { + return "", err + } + if err := SecurityCheckContext(dest); err != nil { + return "", err + } + scon, err := NewContext(src) + if err != nil { + return "", err + } + tcon, err := NewContext(dest) + if err != nil { + return "", err + } + mcsDelete(tcon["level"]) + mcsAdd(scon["level"]) + tcon["level"] = scon["level"] + return tcon.Get(), nil +} + +// Prevent users from relabing system files +func badPrefix(fpath string) error { + if fpath == "" { + return ErrEmptyPath + } + + badPrefixes := []string{"/usr"} + for _, prefix := range badPrefixes { + if strings.HasPrefix(fpath, prefix) { + return fmt.Errorf("relabeling content in %s is not allowed", prefix) + } + } + return nil +} + +// Chcon changes the `fpath` file object to the SELinux label `label`. +// If `fpath` is a directory and `recurse`` is true, Chcon will walk the +// directory tree setting the label. +func Chcon(fpath string, label string, recurse bool) error { + if fpath == "" { + return ErrEmptyPath + } + if label == "" { + return nil + } + if err := badPrefix(fpath); err != nil { + return err + } + callback := func(p string, info os.FileInfo, err error) error { + e := SetFileLabel(p, label) + if os.IsNotExist(e) { + return nil + } + return e + } + + if recurse { + return filepath.Walk(fpath, callback) + } + + return SetFileLabel(fpath, label) +} + +// DupSecOpt takes an SELinux process label and returns security options that +// can be used to set the SELinux Type and Level for future container processes. +func DupSecOpt(src string) ([]string, error) { + if src == "" { + return nil, nil + } + con, err := NewContext(src) + if err != nil { + return nil, err + } + if con["user"] == "" || + con["role"] == "" || + con["type"] == "" { + return nil, nil + } + dup := []string{"user:" + con["user"], + "role:" + con["role"], + "type:" + con["type"], + } + + if con["level"] != "" { + dup = append(dup, "level:"+con["level"]) + } + + return dup, nil +} + +// DisableSecOpt returns a security opt that can be used to disable SELinux +// labeling support for future container processes. +func DisableSecOpt() []string { + return []string{"disable"} +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go new file mode 100644 index 0000000..0c2e1cd --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go @@ -0,0 +1,230 @@ +// +build !selinux + +package selinux + +import ( + "errors" +) + +const ( + // Enforcing constant indicate SELinux is in enforcing mode + Enforcing = 1 + // Permissive constant to indicate SELinux is in permissive mode + Permissive = 0 + // Disabled constant to indicate SELinux is disabled + Disabled = -1 +) + +var ( + // ErrMCSAlreadyExists is returned when trying to allocate a duplicate MCS. + ErrMCSAlreadyExists = errors.New("MCS label already exists") + // ErrEmptyPath is returned when an empty path has been specified. + ErrEmptyPath = errors.New("empty path") +) + +// Context is a representation of the SELinux label broken into 4 parts +type Context map[string]string + +// SetDisabled disables selinux support for the package +func SetDisabled() { + return +} + +// GetEnabled returns whether selinux is currently enabled. +func GetEnabled() bool { + return false +} + +// SetFileLabel sets the SELinux label for this path or returns an error. +func SetFileLabel(fpath string, label string) error { + return nil +} + +// FileLabel returns the SELinux label for this path or returns an error. +func FileLabel(fpath string) (string, error) { + return "", nil +} + +/* +SetFSCreateLabel tells kernel the label to create all file system objects +created by this task. Setting label="" to return to default. +*/ +func SetFSCreateLabel(label string) error { + return nil +} + +/* +FSCreateLabel returns the default label the kernel which the kernel is using +for file system objects created by this task. "" indicates default. +*/ +func FSCreateLabel() (string, error) { + return "", nil +} + +// CurrentLabel returns the SELinux label of the current process thread, or an error. +func CurrentLabel() (string, error) { + return "", nil +} + +// PidLabel returns the SELinux label of the given pid, or an error. +func PidLabel(pid int) (string, error) { + return "", nil +} + +/* +ExecLabel returns the SELinux label that the kernel will use for any programs +that are executed by the current process thread, or an error. +*/ +func ExecLabel() (string, error) { + return "", nil +} + +/* +CanonicalizeContext takes a context string and writes it to the kernel +the function then returns the context that the kernel will use. This function +can be used to see if two contexts are equivalent +*/ +func CanonicalizeContext(val string) (string, error) { + return "", nil +} + +/* +SetExecLabel sets the SELinux label that the kernel will use for any programs +that are executed by the current process thread, or an error. +*/ +func SetExecLabel(label string) error { + return nil +} + +/* +SetTaskLabel sets the SELinux label for the current thread, or an error. +This requires the dyntransition permission. +*/ +func SetTaskLabel(label string) error { + return nil +} + +/* +SetSocketLabel sets the SELinux label that the kernel will use for any programs +that are executed by the current process thread, or an error. +*/ +func SetSocketLabel(label string) error { + return nil +} + +// SocketLabel retrieves the current socket label setting +func SocketLabel() (string, error) { + return "", nil +} + +// PeerLabel retrieves the label of the client on the other side of a socket +func PeerLabel(fd uintptr) (string, error) { + return "", nil +} + +// SetKeyLabel takes a process label and tells the kernel to assign the +// label to the next kernel keyring that gets created +func SetKeyLabel(label string) error { + return nil +} + +// KeyLabel retrieves the current kernel keyring label setting +func KeyLabel() (string, error) { + return "", nil +} + +// Get returns the Context as a string +func (c Context) Get() string { + return "" +} + +// NewContext creates a new Context struct from the specified label +func NewContext(label string) (Context, error) { + c := make(Context) + return c, nil +} + +// ClearLabels clears all reserved MLS/MCS levels +func ClearLabels() { + return +} + +// ReserveLabel reserves the MLS/MCS level component of the specified label +func ReserveLabel(label string) { + return +} + +// EnforceMode returns the current SELinux mode Enforcing, Permissive, Disabled +func EnforceMode() int { + return Disabled +} + +/* +SetEnforceMode sets the current SELinux mode Enforcing, Permissive. +Disabled is not valid, since this needs to be set at boot time. +*/ +func SetEnforceMode(mode int) error { + return nil +} + +/* +DefaultEnforceMode returns the systems default SELinux mode Enforcing, +Permissive or Disabled. Note this is is just the default at boot time. +EnforceMode tells you the systems current mode. +*/ +func DefaultEnforceMode() int { + return Disabled +} + +/* +ReleaseLabel will unreserve the MLS/MCS Level field of the specified label. +Allowing it to be used by another process. +*/ +func ReleaseLabel(label string) { + return +} + +// ROFileLabel returns the specified SELinux readonly file label +func ROFileLabel() string { + return "" +} + +/* +ContainerLabels returns an allocated processLabel and fileLabel to be used for +container labeling by the calling process. +*/ +func ContainerLabels() (processLabel string, fileLabel string) { + return "", "" +} + +// SecurityCheckContext validates that the SELinux label is understood by the kernel +func SecurityCheckContext(val string) error { + return nil +} + +/* +CopyLevel returns a label with the MLS/MCS level from src label replaced on +the dest label. +*/ +func CopyLevel(src, dest string) (string, error) { + return "", nil +} + +// Chcon changes the `fpath` file object to the SELinux label `label`. +// If `fpath` is a directory and `recurse`` is true, Chcon will walk the +// directory tree setting the label. +func Chcon(fpath string, label string, recurse bool) error { + return nil +} + +// DupSecOpt takes an SELinux process label and returns security options that +// can be used to set the SELinux Type and Level for future container processes. +func DupSecOpt(src string) ([]string, error) { + return nil, nil +} + +// DisableSecOpt returns a security opt that can be used to disable SELinux +// labeling support for future container processes. +func DisableSecOpt() []string { + return []string{"disable"} +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go new file mode 100644 index 0000000..67a9d8e --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go @@ -0,0 +1,78 @@ +// +build selinux,linux + +package selinux + +import ( + "syscall" + "unsafe" +) + +var _zero uintptr + +// Returns a []byte slice if the xattr is set and nil otherwise +// Requires path and its attribute as arguments +func lgetxattr(path string, attr string) ([]byte, error) { + var sz int + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return nil, err + } + attrBytes, err := syscall.BytePtrFromString(attr) + if err != nil { + return nil, err + } + + // Start with a 128 length byte array + sz = 128 + dest := make([]byte, sz) + destBytes := unsafe.Pointer(&dest[0]) + _sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) + + switch { + case errno == syscall.ENODATA: + return nil, errno + case errno == syscall.ENOTSUP: + return nil, errno + case errno == syscall.ERANGE: + // 128 byte array might just not be good enough, + // A dummy buffer is used ``uintptr(0)`` to get real size + // of the xattrs on disk + _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0) + sz = int(_sz) + if sz < 0 { + return nil, errno + } + dest = make([]byte, sz) + destBytes := unsafe.Pointer(&dest[0]) + _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) + if errno != 0 { + return nil, errno + } + case errno != 0: + return nil, errno + } + sz = int(_sz) + return dest[:sz], nil +} + +func lsetxattr(path string, attr string, data []byte, flags int) error { + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return err + } + attrBytes, err := syscall.BytePtrFromString(attr) + if err != nil { + return err + } + var dataBytes unsafe.Pointer + if len(data) > 0 { + dataBytes = unsafe.Pointer(&data[0]) + } else { + dataBytes = unsafe.Pointer(&_zero) + } + _, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0) + if errno != 0 { + return errno + } + return nil +} diff --git a/vendor/github.com/pkg/errors/LICENSE b/vendor/github.com/pkg/errors/LICENSE new file mode 100644 index 0000000..835ba3e --- /dev/null +++ b/vendor/github.com/pkg/errors/LICENSE @@ -0,0 +1,23 @@ +Copyright (c) 2015, Dave Cheney +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/pkg/errors/README.md b/vendor/github.com/pkg/errors/README.md new file mode 100644 index 0000000..6483ba2 --- /dev/null +++ b/vendor/github.com/pkg/errors/README.md @@ -0,0 +1,52 @@ +# errors [![Travis-CI](https://travis-ci.org/pkg/errors.svg)](https://travis-ci.org/pkg/errors) [![AppVeyor](https://ci.appveyor.com/api/projects/status/b98mptawhudj53ep/branch/master?svg=true)](https://ci.appveyor.com/project/davecheney/errors/branch/master) [![GoDoc](https://godoc.org/github.com/pkg/errors?status.svg)](http://godoc.org/github.com/pkg/errors) [![Report card](https://goreportcard.com/badge/github.com/pkg/errors)](https://goreportcard.com/report/github.com/pkg/errors) [![Sourcegraph](https://sourcegraph.com/github.com/pkg/errors/-/badge.svg)](https://sourcegraph.com/github.com/pkg/errors?badge) + +Package errors provides simple error handling primitives. + +`go get github.com/pkg/errors` + +The traditional error handling idiom in Go is roughly akin to +```go +if err != nil { + return err +} +``` +which applied recursively up the call stack results in error reports without context or debugging information. The errors package allows programmers to add context to the failure path in their code in a way that does not destroy the original value of the error. + +## Adding context to an error + +The errors.Wrap function returns a new error that adds context to the original error. For example +```go +_, err := ioutil.ReadAll(r) +if err != nil { + return errors.Wrap(err, "read failed") +} +``` +## Retrieving the cause of an error + +Using `errors.Wrap` constructs a stack of errors, adding context to the preceding error. Depending on the nature of the error it may be necessary to reverse the operation of errors.Wrap to retrieve the original error for inspection. Any error value which implements this interface can be inspected by `errors.Cause`. +```go +type causer interface { + Cause() error +} +``` +`errors.Cause` will recursively retrieve the topmost error which does not implement `causer`, which is assumed to be the original cause. For example: +```go +switch err := errors.Cause(err).(type) { +case *MyError: + // handle specifically +default: + // unknown error +} +``` + +[Read the package documentation for more information](https://godoc.org/github.com/pkg/errors). + +## Contributing + +We welcome pull requests, bug fixes and issue reports. With that said, the bar for adding new symbols to this package is intentionally set high. + +Before proposing a change, please discuss your change by raising an issue. + +## License + +BSD-2-Clause diff --git a/vendor/github.com/pkg/errors/errors.go b/vendor/github.com/pkg/errors/errors.go new file mode 100644 index 0000000..7421f32 --- /dev/null +++ b/vendor/github.com/pkg/errors/errors.go @@ -0,0 +1,282 @@ +// Package errors provides simple error handling primitives. +// +// The traditional error handling idiom in Go is roughly akin to +// +// if err != nil { +// return err +// } +// +// which when applied recursively up the call stack results in error reports +// without context or debugging information. The errors package allows +// programmers to add context to the failure path in their code in a way +// that does not destroy the original value of the error. +// +// Adding context to an error +// +// The errors.Wrap function returns a new error that adds context to the +// original error by recording a stack trace at the point Wrap is called, +// together with the supplied message. For example +// +// _, err := ioutil.ReadAll(r) +// if err != nil { +// return errors.Wrap(err, "read failed") +// } +// +// If additional control is required, the errors.WithStack and +// errors.WithMessage functions destructure errors.Wrap into its component +// operations: annotating an error with a stack trace and with a message, +// respectively. +// +// Retrieving the cause of an error +// +// Using errors.Wrap constructs a stack of errors, adding context to the +// preceding error. Depending on the nature of the error it may be necessary +// to reverse the operation of errors.Wrap to retrieve the original error +// for inspection. Any error value which implements this interface +// +// type causer interface { +// Cause() error +// } +// +// can be inspected by errors.Cause. errors.Cause will recursively retrieve +// the topmost error that does not implement causer, which is assumed to be +// the original cause. For example: +// +// switch err := errors.Cause(err).(type) { +// case *MyError: +// // handle specifically +// default: +// // unknown error +// } +// +// Although the causer interface is not exported by this package, it is +// considered a part of its stable public interface. +// +// Formatted printing of errors +// +// All error values returned from this package implement fmt.Formatter and can +// be formatted by the fmt package. The following verbs are supported: +// +// %s print the error. If the error has a Cause it will be +// printed recursively. +// %v see %s +// %+v extended format. Each Frame of the error's StackTrace will +// be printed in detail. +// +// Retrieving the stack trace of an error or wrapper +// +// New, Errorf, Wrap, and Wrapf record a stack trace at the point they are +// invoked. This information can be retrieved with the following interface: +// +// type stackTracer interface { +// StackTrace() errors.StackTrace +// } +// +// The returned errors.StackTrace type is defined as +// +// type StackTrace []Frame +// +// The Frame type represents a call site in the stack trace. Frame supports +// the fmt.Formatter interface that can be used for printing information about +// the stack trace of this error. For example: +// +// if err, ok := err.(stackTracer); ok { +// for _, f := range err.StackTrace() { +// fmt.Printf("%+s:%d", f) +// } +// } +// +// Although the stackTracer interface is not exported by this package, it is +// considered a part of its stable public interface. +// +// See the documentation for Frame.Format for more details. +package errors + +import ( + "fmt" + "io" +) + +// New returns an error with the supplied message. +// New also records the stack trace at the point it was called. +func New(message string) error { + return &fundamental{ + msg: message, + stack: callers(), + } +} + +// Errorf formats according to a format specifier and returns the string +// as a value that satisfies error. +// Errorf also records the stack trace at the point it was called. +func Errorf(format string, args ...interface{}) error { + return &fundamental{ + msg: fmt.Sprintf(format, args...), + stack: callers(), + } +} + +// fundamental is an error that has a message and a stack, but no caller. +type fundamental struct { + msg string + *stack +} + +func (f *fundamental) Error() string { return f.msg } + +func (f *fundamental) Format(s fmt.State, verb rune) { + switch verb { + case 'v': + if s.Flag('+') { + io.WriteString(s, f.msg) + f.stack.Format(s, verb) + return + } + fallthrough + case 's': + io.WriteString(s, f.msg) + case 'q': + fmt.Fprintf(s, "%q", f.msg) + } +} + +// WithStack annotates err with a stack trace at the point WithStack was called. +// If err is nil, WithStack returns nil. +func WithStack(err error) error { + if err == nil { + return nil + } + return &withStack{ + err, + callers(), + } +} + +type withStack struct { + error + *stack +} + +func (w *withStack) Cause() error { return w.error } + +func (w *withStack) Format(s fmt.State, verb rune) { + switch verb { + case 'v': + if s.Flag('+') { + fmt.Fprintf(s, "%+v", w.Cause()) + w.stack.Format(s, verb) + return + } + fallthrough + case 's': + io.WriteString(s, w.Error()) + case 'q': + fmt.Fprintf(s, "%q", w.Error()) + } +} + +// Wrap returns an error annotating err with a stack trace +// at the point Wrap is called, and the supplied message. +// If err is nil, Wrap returns nil. +func Wrap(err error, message string) error { + if err == nil { + return nil + } + err = &withMessage{ + cause: err, + msg: message, + } + return &withStack{ + err, + callers(), + } +} + +// Wrapf returns an error annotating err with a stack trace +// at the point Wrapf is called, and the format specifier. +// If err is nil, Wrapf returns nil. +func Wrapf(err error, format string, args ...interface{}) error { + if err == nil { + return nil + } + err = &withMessage{ + cause: err, + msg: fmt.Sprintf(format, args...), + } + return &withStack{ + err, + callers(), + } +} + +// WithMessage annotates err with a new message. +// If err is nil, WithMessage returns nil. +func WithMessage(err error, message string) error { + if err == nil { + return nil + } + return &withMessage{ + cause: err, + msg: message, + } +} + +// WithMessagef annotates err with the format specifier. +// If err is nil, WithMessagef returns nil. +func WithMessagef(err error, format string, args ...interface{}) error { + if err == nil { + return nil + } + return &withMessage{ + cause: err, + msg: fmt.Sprintf(format, args...), + } +} + +type withMessage struct { + cause error + msg string +} + +func (w *withMessage) Error() string { return w.msg + ": " + w.cause.Error() } +func (w *withMessage) Cause() error { return w.cause } + +func (w *withMessage) Format(s fmt.State, verb rune) { + switch verb { + case 'v': + if s.Flag('+') { + fmt.Fprintf(s, "%+v\n", w.Cause()) + io.WriteString(s, w.msg) + return + } + fallthrough + case 's', 'q': + io.WriteString(s, w.Error()) + } +} + +// Cause returns the underlying cause of the error, if possible. +// An error value has a cause if it implements the following +// interface: +// +// type causer interface { +// Cause() error +// } +// +// If the error does not implement Cause, the original error will +// be returned. If the error is nil, nil will be returned without further +// investigation. +func Cause(err error) error { + type causer interface { + Cause() error + } + + for err != nil { + cause, ok := err.(causer) + if !ok { + break + } + err = cause.Cause() + } + return err +} diff --git a/vendor/github.com/pkg/errors/stack.go b/vendor/github.com/pkg/errors/stack.go new file mode 100644 index 0000000..2874a04 --- /dev/null +++ b/vendor/github.com/pkg/errors/stack.go @@ -0,0 +1,147 @@ +package errors + +import ( + "fmt" + "io" + "path" + "runtime" + "strings" +) + +// Frame represents a program counter inside a stack frame. +type Frame uintptr + +// pc returns the program counter for this frame; +// multiple frames may have the same PC value. +func (f Frame) pc() uintptr { return uintptr(f) - 1 } + +// file returns the full path to the file that contains the +// function for this Frame's pc. +func (f Frame) file() string { + fn := runtime.FuncForPC(f.pc()) + if fn == nil { + return "unknown" + } + file, _ := fn.FileLine(f.pc()) + return file +} + +// line returns the line number of source code of the +// function for this Frame's pc. +func (f Frame) line() int { + fn := runtime.FuncForPC(f.pc()) + if fn == nil { + return 0 + } + _, line := fn.FileLine(f.pc()) + return line +} + +// Format formats the frame according to the fmt.Formatter interface. +// +// %s source file +// %d source line +// %n function name +// %v equivalent to %s:%d +// +// Format accepts flags that alter the printing of some verbs, as follows: +// +// %+s function name and path of source file relative to the compile time +// GOPATH separated by \n\t (\n\t) +// %+v equivalent to %+s:%d +func (f Frame) Format(s fmt.State, verb rune) { + switch verb { + case 's': + switch { + case s.Flag('+'): + pc := f.pc() + fn := runtime.FuncForPC(pc) + if fn == nil { + io.WriteString(s, "unknown") + } else { + file, _ := fn.FileLine(pc) + fmt.Fprintf(s, "%s\n\t%s", fn.Name(), file) + } + default: + io.WriteString(s, path.Base(f.file())) + } + case 'd': + fmt.Fprintf(s, "%d", f.line()) + case 'n': + name := runtime.FuncForPC(f.pc()).Name() + io.WriteString(s, funcname(name)) + case 'v': + f.Format(s, 's') + io.WriteString(s, ":") + f.Format(s, 'd') + } +} + +// StackTrace is stack of Frames from innermost (newest) to outermost (oldest). +type StackTrace []Frame + +// Format formats the stack of Frames according to the fmt.Formatter interface. +// +// %s lists source files for each Frame in the stack +// %v lists the source file and line number for each Frame in the stack +// +// Format accepts flags that alter the printing of some verbs, as follows: +// +// %+v Prints filename, function, and line number for each Frame in the stack. +func (st StackTrace) Format(s fmt.State, verb rune) { + switch verb { + case 'v': + switch { + case s.Flag('+'): + for _, f := range st { + fmt.Fprintf(s, "\n%+v", f) + } + case s.Flag('#'): + fmt.Fprintf(s, "%#v", []Frame(st)) + default: + fmt.Fprintf(s, "%v", []Frame(st)) + } + case 's': + fmt.Fprintf(s, "%s", []Frame(st)) + } +} + +// stack represents a stack of program counters. +type stack []uintptr + +func (s *stack) Format(st fmt.State, verb rune) { + switch verb { + case 'v': + switch { + case st.Flag('+'): + for _, pc := range *s { + f := Frame(pc) + fmt.Fprintf(st, "\n%+v", f) + } + } + } +} + +func (s *stack) StackTrace() StackTrace { + f := make([]Frame, len(*s)) + for i := 0; i < len(f); i++ { + f[i] = Frame((*s)[i]) + } + return f +} + +func callers() *stack { + const depth = 32 + var pcs [depth]uintptr + n := runtime.Callers(3, pcs[:]) + var st stack = pcs[0:n] + return &st +} + +// funcname removes the path prefix component of a function's name reported by func.Name(). +func funcname(name string) string { + i := strings.LastIndex(name, "/") + name = name[i+1:] + i = strings.Index(name, ".") + return name[i+1:] +} diff --git a/vendor/github.com/seccomp/libseccomp-golang/LICENSE b/vendor/github.com/seccomp/libseccomp-golang/LICENSE new file mode 100644 index 0000000..81cf60d --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2015 Matthew Heon +Copyright (c) 2015 Paul Moore +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/seccomp/libseccomp-golang/README b/vendor/github.com/seccomp/libseccomp-golang/README new file mode 100644 index 0000000..66839a4 --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/README @@ -0,0 +1,51 @@ +libseccomp-golang: Go Language Bindings for the libseccomp Project +=============================================================================== +https://github.com/seccomp/libseccomp-golang +https://github.com/seccomp/libseccomp + +The libseccomp library provides an easy to use, platform independent, interface +to the Linux Kernel's syscall filtering mechanism. The libseccomp API is +designed to abstract away the underlying BPF based syscall filter language and +present a more conventional function-call based filtering interface that should +be familiar to, and easily adopted by, application developers. + +The libseccomp-golang library provides a Go based interface to the libseccomp +library. + +* Online Resources + +The library source repository currently lives on GitHub at the following URLs: + + -> https://github.com/seccomp/libseccomp-golang + -> https://github.com/seccomp/libseccomp + +The project mailing list is currently hosted on Google Groups at the URL below, +please note that a Google account is not required to subscribe to the mailing +list. + + -> https://groups.google.com/d/forum/libseccomp + +Documentation is also available at: + + -> https://godoc.org/github.com/seccomp/libseccomp-golang + +* Installing the package + +The libseccomp-golang bindings require at least Go v1.2.1 and GCC v4.8.4; +earlier versions may yield unpredictable results. If you meet these +requirements you can install this package using the command below: + + $ go get github.com/seccomp/libseccomp-golang + +* Testing the Library + +A number of tests and lint related recipes are provided in the Makefile, if +you want to run the standard regression tests, you can excute the following: + + $ make check + +In order to execute the 'make lint' recipe the 'golint' tool is needed, it +can be found at: + + -> https://github.com/golang/lint + diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go new file mode 100644 index 0000000..a3cc538 --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go @@ -0,0 +1,935 @@ +// +build linux + +// Public API specification for libseccomp Go bindings +// Contains public API for the bindings + +// Package seccomp provides bindings for libseccomp, a library wrapping the Linux +// seccomp syscall. Seccomp enables an application to restrict system call use +// for itself and its children. +package seccomp + +import ( + "fmt" + "os" + "runtime" + "strings" + "sync" + "syscall" + "unsafe" +) + +// C wrapping code + +// #cgo pkg-config: libseccomp +// #include +// #include +import "C" + +// Exported types + +// VersionError denotes that the system libseccomp version is incompatible +// with this package. +type VersionError struct { + message string + minimum string +} + +func (e VersionError) Error() string { + format := "Libseccomp version too low: " + if e.message != "" { + format += e.message + ": " + } + format += "minimum supported is " + if e.minimum != "" { + format += e.minimum + ": " + } else { + format += "2.2.0: " + } + format += "detected %d.%d.%d" + return fmt.Sprintf(format, verMajor, verMinor, verMicro) +} + +// ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a +// per-architecture basis. +type ScmpArch uint + +// ScmpAction represents an action to be taken on a filter rule match in +// libseccomp +type ScmpAction uint + +// ScmpCompareOp represents a comparison operator which can be used in a filter +// rule +type ScmpCompareOp uint + +// ScmpCondition represents a rule in a libseccomp filter context +type ScmpCondition struct { + Argument uint `json:"argument,omitempty"` + Op ScmpCompareOp `json:"operator,omitempty"` + Operand1 uint64 `json:"operand_one,omitempty"` + Operand2 uint64 `json:"operand_two,omitempty"` +} + +// ScmpSyscall represents a Linux System Call +type ScmpSyscall int32 + +// Exported Constants + +const ( + // Valid architectures recognized by libseccomp + // PowerPC and S390(x) architectures are unavailable below library version + // v2.3.0 and will returns errors if used with incompatible libraries + + // ArchInvalid is a placeholder to ensure uninitialized ScmpArch + // variables are invalid + ArchInvalid ScmpArch = iota + // ArchNative is the native architecture of the kernel + ArchNative ScmpArch = iota + // ArchX86 represents 32-bit x86 syscalls + ArchX86 ScmpArch = iota + // ArchAMD64 represents 64-bit x86-64 syscalls + ArchAMD64 ScmpArch = iota + // ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers) + ArchX32 ScmpArch = iota + // ArchARM represents 32-bit ARM syscalls + ArchARM ScmpArch = iota + // ArchARM64 represents 64-bit ARM syscalls + ArchARM64 ScmpArch = iota + // ArchMIPS represents 32-bit MIPS syscalls + ArchMIPS ScmpArch = iota + // ArchMIPS64 represents 64-bit MIPS syscalls + ArchMIPS64 ScmpArch = iota + // ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers) + ArchMIPS64N32 ScmpArch = iota + // ArchMIPSEL represents 32-bit MIPS syscalls (little endian) + ArchMIPSEL ScmpArch = iota + // ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian) + ArchMIPSEL64 ScmpArch = iota + // ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian, + // 32-bit pointers) + ArchMIPSEL64N32 ScmpArch = iota + // ArchPPC represents 32-bit POWERPC syscalls + ArchPPC ScmpArch = iota + // ArchPPC64 represents 64-bit POWER syscalls (big endian) + ArchPPC64 ScmpArch = iota + // ArchPPC64LE represents 64-bit POWER syscalls (little endian) + ArchPPC64LE ScmpArch = iota + // ArchS390 represents 31-bit System z/390 syscalls + ArchS390 ScmpArch = iota + // ArchS390X represents 64-bit System z/390 syscalls + ArchS390X ScmpArch = iota +) + +const ( + // Supported actions on filter match + + // ActInvalid is a placeholder to ensure uninitialized ScmpAction + // variables are invalid + ActInvalid ScmpAction = iota + // ActKill kills the process + ActKill ScmpAction = iota + // ActTrap throws SIGSYS + ActTrap ScmpAction = iota + // ActErrno causes the syscall to return a negative error code. This + // code can be set with the SetReturnCode method + ActErrno ScmpAction = iota + // ActTrace causes the syscall to notify tracing processes with the + // given error code. This code can be set with the SetReturnCode method + ActTrace ScmpAction = iota + // ActAllow permits the syscall to continue execution + ActAllow ScmpAction = iota + // ActLog permits the syscall to continue execution after logging it. + // This action is only usable when libseccomp API level 3 or higher is + // supported. + ActLog ScmpAction = iota +) + +const ( + // These are comparison operators used in conditional seccomp rules + // They are used to compare the value of a single argument of a syscall + // against a user-defined constant + + // CompareInvalid is a placeholder to ensure uninitialized ScmpCompareOp + // variables are invalid + CompareInvalid ScmpCompareOp = iota + // CompareNotEqual returns true if the argument is not equal to the + // given value + CompareNotEqual ScmpCompareOp = iota + // CompareLess returns true if the argument is less than the given value + CompareLess ScmpCompareOp = iota + // CompareLessOrEqual returns true if the argument is less than or equal + // to the given value + CompareLessOrEqual ScmpCompareOp = iota + // CompareEqual returns true if the argument is equal to the given value + CompareEqual ScmpCompareOp = iota + // CompareGreaterEqual returns true if the argument is greater than or + // equal to the given value + CompareGreaterEqual ScmpCompareOp = iota + // CompareGreater returns true if the argument is greater than the given + // value + CompareGreater ScmpCompareOp = iota + // CompareMaskedEqual returns true if the argument is equal to the given + // value, when masked (bitwise &) against the second given value + CompareMaskedEqual ScmpCompareOp = iota +) + +// Helpers for types + +// GetArchFromString returns an ScmpArch constant from a string representing an +// architecture +func GetArchFromString(arch string) (ScmpArch, error) { + if err := ensureSupportedVersion(); err != nil { + return ArchInvalid, err + } + + switch strings.ToLower(arch) { + case "x86": + return ArchX86, nil + case "amd64", "x86-64", "x86_64", "x64": + return ArchAMD64, nil + case "x32": + return ArchX32, nil + case "arm": + return ArchARM, nil + case "arm64", "aarch64": + return ArchARM64, nil + case "mips": + return ArchMIPS, nil + case "mips64": + return ArchMIPS64, nil + case "mips64n32": + return ArchMIPS64N32, nil + case "mipsel": + return ArchMIPSEL, nil + case "mipsel64": + return ArchMIPSEL64, nil + case "mipsel64n32": + return ArchMIPSEL64N32, nil + case "ppc": + return ArchPPC, nil + case "ppc64": + return ArchPPC64, nil + case "ppc64le": + return ArchPPC64LE, nil + case "s390": + return ArchS390, nil + case "s390x": + return ArchS390X, nil + default: + return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %q", arch) + } +} + +// String returns a string representation of an architecture constant +func (a ScmpArch) String() string { + switch a { + case ArchX86: + return "x86" + case ArchAMD64: + return "amd64" + case ArchX32: + return "x32" + case ArchARM: + return "arm" + case ArchARM64: + return "arm64" + case ArchMIPS: + return "mips" + case ArchMIPS64: + return "mips64" + case ArchMIPS64N32: + return "mips64n32" + case ArchMIPSEL: + return "mipsel" + case ArchMIPSEL64: + return "mipsel64" + case ArchMIPSEL64N32: + return "mipsel64n32" + case ArchPPC: + return "ppc" + case ArchPPC64: + return "ppc64" + case ArchPPC64LE: + return "ppc64le" + case ArchS390: + return "s390" + case ArchS390X: + return "s390x" + case ArchNative: + return "native" + case ArchInvalid: + return "Invalid architecture" + default: + return fmt.Sprintf("Unknown architecture %#x", uint(a)) + } +} + +// String returns a string representation of a comparison operator constant +func (a ScmpCompareOp) String() string { + switch a { + case CompareNotEqual: + return "Not equal" + case CompareLess: + return "Less than" + case CompareLessOrEqual: + return "Less than or equal to" + case CompareEqual: + return "Equal" + case CompareGreaterEqual: + return "Greater than or equal to" + case CompareGreater: + return "Greater than" + case CompareMaskedEqual: + return "Masked equality" + case CompareInvalid: + return "Invalid comparison operator" + default: + return fmt.Sprintf("Unrecognized comparison operator %#x", uint(a)) + } +} + +// String returns a string representation of a seccomp match action +func (a ScmpAction) String() string { + switch a & 0xFFFF { + case ActKill: + return "Action: Kill Process" + case ActTrap: + return "Action: Send SIGSYS" + case ActErrno: + return fmt.Sprintf("Action: Return error code %d", (a >> 16)) + case ActTrace: + return fmt.Sprintf("Action: Notify tracing processes with code %d", + (a >> 16)) + case ActLog: + return "Action: Log system call" + case ActAllow: + return "Action: Allow system call" + default: + return fmt.Sprintf("Unrecognized Action %#x", uint(a)) + } +} + +// SetReturnCode adds a return code to a supporting ScmpAction, clearing any +// existing code Only valid on ActErrno and ActTrace. Takes no action otherwise. +// Accepts 16-bit return code as argument. +// Returns a valid ScmpAction of the original type with the new error code set. +func (a ScmpAction) SetReturnCode(code int16) ScmpAction { + aTmp := a & 0x0000FFFF + if aTmp == ActErrno || aTmp == ActTrace { + return (aTmp | (ScmpAction(code)&0xFFFF)<<16) + } + return a +} + +// GetReturnCode returns the return code of an ScmpAction +func (a ScmpAction) GetReturnCode() int16 { + return int16(a >> 16) +} + +// General utility functions + +// GetLibraryVersion returns the version of the library the bindings are built +// against. +// The version is formatted as follows: Major.Minor.Micro +func GetLibraryVersion() (major, minor, micro uint) { + return verMajor, verMinor, verMicro +} + +// GetApi returns the API level supported by the system. +// Returns a positive int containing the API level, or 0 with an error if the +// API level could not be detected due to the library being older than v2.4.0. +// See the seccomp_api_get(3) man page for details on available API levels: +// https://github.com/seccomp/libseccomp/blob/master/doc/man/man3/seccomp_api_get.3 +func GetApi() (uint, error) { + return getApi() +} + +// SetApi forcibly sets the API level. General use of this function is strongly +// discouraged. +// Returns an error if the API level could not be set. An error is always +// returned if the library is older than v2.4.0 +// See the seccomp_api_get(3) man page for details on available API levels: +// https://github.com/seccomp/libseccomp/blob/master/doc/man/man3/seccomp_api_get.3 +func SetApi(api uint) error { + return setApi(api) +} + +// Syscall functions + +// GetName retrieves the name of a syscall from its number. +// Acts on any syscall number. +// Returns either a string containing the name of the syscall, or an error. +func (s ScmpSyscall) GetName() (string, error) { + return s.GetNameByArch(ArchNative) +} + +// GetNameByArch retrieves the name of a syscall from its number for a given +// architecture. +// Acts on any syscall number. +// Accepts a valid architecture constant. +// Returns either a string containing the name of the syscall, or an error. +// if the syscall is unrecognized or an issue occurred. +func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) { + if err := sanitizeArch(arch); err != nil { + return "", err + } + + cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s)) + if cString == nil { + return "", fmt.Errorf("could not resolve syscall name for %#x", int32(s)) + } + defer C.free(unsafe.Pointer(cString)) + + finalStr := C.GoString(cString) + return finalStr, nil +} + +// GetSyscallFromName returns the number of a syscall by name on the kernel's +// native architecture. +// Accepts a string containing the name of a syscall. +// Returns the number of the syscall, or an error if no syscall with that name +// was found. +func GetSyscallFromName(name string) (ScmpSyscall, error) { + if err := ensureSupportedVersion(); err != nil { + return 0, err + } + + cString := C.CString(name) + defer C.free(unsafe.Pointer(cString)) + + result := C.seccomp_syscall_resolve_name(cString) + if result == scmpError { + return 0, fmt.Errorf("could not resolve name to syscall: %q", name) + } + + return ScmpSyscall(result), nil +} + +// GetSyscallFromNameByArch returns the number of a syscall by name for a given +// architecture's ABI. +// Accepts the name of a syscall and an architecture constant. +// Returns the number of the syscall, or an error if an invalid architecture is +// passed or a syscall with that name was not found. +func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) { + if err := ensureSupportedVersion(); err != nil { + return 0, err + } + if err := sanitizeArch(arch); err != nil { + return 0, err + } + + cString := C.CString(name) + defer C.free(unsafe.Pointer(cString)) + + result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString) + if result == scmpError { + return 0, fmt.Errorf("could not resolve name to syscall: %q on %v", name, arch) + } + + return ScmpSyscall(result), nil +} + +// MakeCondition creates and returns a new condition to attach to a filter rule. +// Associated rules will only match if this condition is true. +// Accepts the number the argument we are checking, and a comparison operator +// and value to compare to. +// The rule will match if argument $arg (zero-indexed) of the syscall is +// $COMPARE_OP the provided comparison value. +// Some comparison operators accept two values. Masked equals, for example, +// will mask $arg of the syscall with the second value provided (via bitwise +// AND) and then compare against the first value provided. +// For example, in the less than or equal case, if the syscall argument was +// 0 and the value provided was 1, the condition would match, as 0 is less +// than or equal to 1. +// Return either an error on bad argument or a valid ScmpCondition struct. +func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) { + var condStruct ScmpCondition + + if err := ensureSupportedVersion(); err != nil { + return condStruct, err + } + + if comparison == CompareInvalid { + return condStruct, fmt.Errorf("invalid comparison operator") + } else if arg > 5 { + return condStruct, fmt.Errorf("syscalls only have up to 6 arguments (%d given)", arg) + } else if len(values) > 2 { + return condStruct, fmt.Errorf("conditions can have at most 2 arguments (%d given)", len(values)) + } else if len(values) == 0 { + return condStruct, fmt.Errorf("must provide at least one value to compare against") + } + + condStruct.Argument = arg + condStruct.Op = comparison + condStruct.Operand1 = values[0] + if len(values) == 2 { + condStruct.Operand2 = values[1] + } else { + condStruct.Operand2 = 0 // Unused + } + + return condStruct, nil +} + +// Utility Functions + +// GetNativeArch returns architecture token representing the native kernel +// architecture +func GetNativeArch() (ScmpArch, error) { + if err := ensureSupportedVersion(); err != nil { + return ArchInvalid, err + } + + arch := C.seccomp_arch_native() + + return archFromNative(arch) +} + +// Public Filter API + +// ScmpFilter represents a filter context in libseccomp. +// A filter context is initially empty. Rules can be added to it, and it can +// then be loaded into the kernel. +type ScmpFilter struct { + filterCtx C.scmp_filter_ctx + valid bool + lock sync.Mutex +} + +// NewFilter creates and returns a new filter context. +// Accepts a default action to be taken for syscalls which match no rules in +// the filter. +// Returns a reference to a valid filter context, or nil and an error if the +// filter context could not be created or an invalid default action was given. +func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) { + if err := ensureSupportedVersion(); err != nil { + return nil, err + } + + if err := sanitizeAction(defaultAction); err != nil { + return nil, err + } + + fPtr := C.seccomp_init(defaultAction.toNative()) + if fPtr == nil { + return nil, fmt.Errorf("could not create filter") + } + + filter := new(ScmpFilter) + filter.filterCtx = fPtr + filter.valid = true + runtime.SetFinalizer(filter, filterFinalizer) + + // Enable TSync so all goroutines will receive the same rules + // If the kernel does not support TSYNC, allow us to continue without error + if err := filter.setFilterAttr(filterAttrTsync, 0x1); err != nil && err != syscall.ENOTSUP { + filter.Release() + return nil, fmt.Errorf("could not create filter - error setting tsync bit: %v", err) + } + + return filter, nil +} + +// IsValid determines whether a filter context is valid to use. +// Some operations (Release and Merge) render filter contexts invalid and +// consequently prevent further use. +func (f *ScmpFilter) IsValid() bool { + f.lock.Lock() + defer f.lock.Unlock() + + return f.valid +} + +// Reset resets a filter context, removing all its existing state. +// Accepts a new default action to be taken for syscalls which do not match. +// Returns an error if the filter or action provided are invalid. +func (f *ScmpFilter) Reset(defaultAction ScmpAction) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeAction(defaultAction); err != nil { + return err + } else if !f.valid { + return errBadFilter + } + + retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative()) + if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Release releases a filter context, freeing its memory. Should be called after +// loading into the kernel, when the filter is no longer needed. +// After calling this function, the given filter is no longer valid and cannot +// be used. +// Release() will be invoked automatically when a filter context is garbage +// collected, but can also be called manually to free memory. +func (f *ScmpFilter) Release() { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return + } + + f.valid = false + C.seccomp_release(f.filterCtx) +} + +// Merge merges two filter contexts. +// The source filter src will be released as part of the process, and will no +// longer be usable or valid after this call. +// To be merged, filters must NOT share any architectures, and all their +// attributes (Default Action, Bad Arch Action, and No New Privs bools) +// must match. +// The filter src will be merged into the filter this is called on. +// The architectures of the src filter not present in the destination, and all +// associated rules, will be added to the destination. +// Returns an error if merging the filters failed. +func (f *ScmpFilter) Merge(src *ScmpFilter) error { + f.lock.Lock() + defer f.lock.Unlock() + + src.lock.Lock() + defer src.lock.Unlock() + + if !src.valid || !f.valid { + return fmt.Errorf("one or more of the filter contexts is invalid or uninitialized") + } + + // Merge the filters + retCode := C.seccomp_merge(f.filterCtx, src.filterCtx) + if syscall.Errno(-1*retCode) == syscall.EINVAL { + return fmt.Errorf("filters could not be merged due to a mismatch in attributes or invalid filter") + } else if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + src.valid = false + + return nil +} + +// IsArchPresent checks if an architecture is present in a filter. +// If a filter contains an architecture, it uses its default action for +// syscalls which do not match rules in it, and its rules can match syscalls +// for that ABI. +// If a filter does not contain an architecture, all syscalls made to that +// kernel ABI will fail with the filter's default Bad Architecture Action +// (by default, killing the process). +// Accepts an architecture constant. +// Returns true if the architecture is present in the filter, false otherwise, +// and an error on an invalid filter context, architecture constant, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return false, err + } else if !f.valid { + return false, errBadFilter + } + + retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative()) + if syscall.Errno(-1*retCode) == syscall.EEXIST { + // -EEXIST is "arch not present" + return false, nil + } else if retCode != 0 { + return false, syscall.Errno(-1 * retCode) + } + + return true, nil +} + +// AddArch adds an architecture to the filter. +// Accepts an architecture constant. +// Returns an error on invalid filter context or architecture token, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) AddArch(arch ScmpArch) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return err + } else if !f.valid { + return errBadFilter + } + + // Libseccomp returns -EEXIST if the specified architecture is already + // present. Succeed silently in this case, as it's not fatal, and the + // architecture is present already. + retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative()) + if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// RemoveArch removes an architecture from the filter. +// Accepts an architecture constant. +// Returns an error on invalid filter context or architecture token, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) RemoveArch(arch ScmpArch) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return err + } else if !f.valid { + return errBadFilter + } + + // Similar to AddArch, -EEXIST is returned if the arch is not present + // Succeed silently in that case, this is not fatal and the architecture + // is not present in the filter after RemoveArch + retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative()) + if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Load loads a filter context into the kernel. +// Returns an error if the filter context is invalid or the syscall failed. +func (f *ScmpFilter) Load() error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_load(f.filterCtx); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// GetDefaultAction returns the default action taken on a syscall which does not +// match a rule in the filter, or an error if an issue was encountered +// retrieving the value. +func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) { + action, err := f.getFilterAttr(filterAttrActDefault) + if err != nil { + return 0x0, err + } + + return actionFromNative(action) +} + +// GetBadArchAction returns the default action taken on a syscall for an +// architecture not in the filter, or an error if an issue was encountered +// retrieving the value. +func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) { + action, err := f.getFilterAttr(filterAttrActBadArch) + if err != nil { + return 0x0, err + } + + return actionFromNative(action) +} + +// GetNoNewPrivsBit returns the current state the No New Privileges bit will be set +// to on the filter being loaded, or an error if an issue was encountered +// retrieving the value. +// The No New Privileges bit tells the kernel that new processes run with exec() +// cannot gain more privileges than the process that ran exec(). +// For example, a process with No New Privileges set would be unable to exec +// setuid/setgid executables. +func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) { + noNewPrivs, err := f.getFilterAttr(filterAttrNNP) + if err != nil { + return false, err + } + + if noNewPrivs == 0 { + return false, nil + } + + return true, nil +} + +// GetLogBit returns the current state the Log bit will be set to on the filter +// being loaded, or an error if an issue was encountered retrieving the value. +// The Log bit tells the kernel that all actions taken by the filter, with the +// exception of ActAllow, should be logged. +// The Log bit is only usable when libseccomp API level 3 or higher is +// supported. +func (f *ScmpFilter) GetLogBit() (bool, error) { + log, err := f.getFilterAttr(filterAttrLog) + if err != nil { + api, apiErr := getApi() + if (apiErr != nil && api == 0) || (apiErr == nil && api < 3) { + return false, fmt.Errorf("getting the log bit is only supported in libseccomp 2.4.0 and newer with API level 3 or higher") + } + + return false, err + } + + if log == 0 { + return false, nil + } + + return true, nil +} + +// SetBadArchAction sets the default action taken on a syscall for an +// architecture not in the filter, or an error if an issue was encountered +// setting the value. +func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error { + if err := sanitizeAction(action); err != nil { + return err + } + + return f.setFilterAttr(filterAttrActBadArch, action.toNative()) +} + +// SetNoNewPrivsBit sets the state of the No New Privileges bit, which will be +// applied on filter load, or an error if an issue was encountered setting the +// value. +// Filters with No New Privileges set to 0 can only be loaded if the process +// has the CAP_SYS_ADMIN capability. +func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error { + var toSet C.uint32_t = 0x0 + + if state { + toSet = 0x1 + } + + return f.setFilterAttr(filterAttrNNP, toSet) +} + +// SetLogBit sets the state of the Log bit, which will be applied on filter +// load, or an error if an issue was encountered setting the value. +// The Log bit is only usable when libseccomp API level 3 or higher is +// supported. +func (f *ScmpFilter) SetLogBit(state bool) error { + var toSet C.uint32_t = 0x0 + + if state { + toSet = 0x1 + } + + err := f.setFilterAttr(filterAttrLog, toSet) + if err != nil { + api, apiErr := getApi() + if (apiErr != nil && api == 0) || (apiErr == nil && api < 3) { + return fmt.Errorf("setting the log bit is only supported in libseccomp 2.4.0 and newer with API level 3 or higher") + } + } + + return err +} + +// SetSyscallPriority sets a syscall's priority. +// This provides a hint to the filter generator in libseccomp about the +// importance of this syscall. High-priority syscalls are placed +// first in the filter code, and incur less overhead (at the expense of +// lower-priority syscalls). +func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call), + C.uint8_t(priority)); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// AddRule adds a single rule for an unconditional action on a syscall. +// Accepts the number of the syscall and the action to be taken on the call +// being made. +// Returns an error if an issue was encountered adding the rule. +func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error { + return f.addRuleGeneric(call, action, false, nil) +} + +// AddRuleExact adds a single rule for an unconditional action on a syscall. +// Accepts the number of the syscall and the action to be taken on the call +// being made. +// No modifications will be made to the rule, and it will fail to add if it +// cannot be applied to the current architecture without modification. +// The rule will function exactly as described, but it may not function identically +// (or be able to be applied to) all architectures. +// Returns an error if an issue was encountered adding the rule. +func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error { + return f.addRuleGeneric(call, action, true, nil) +} + +// AddRuleConditional adds a single rule for a conditional action on a syscall. +// Returns an error if an issue was encountered adding the rule. +// All conditions must match for the rule to match. +// There is a bug in library versions below v2.2.1 which can, in some cases, +// cause conditions to be lost when more than one are used. Consequently, +// AddRuleConditional is disabled on library versions lower than v2.2.1 +func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { + return f.addRuleGeneric(call, action, false, conds) +} + +// AddRuleConditionalExact adds a single rule for a conditional action on a +// syscall. +// No modifications will be made to the rule, and it will fail to add if it +// cannot be applied to the current architecture without modification. +// The rule will function exactly as described, but it may not function identically +// (or be able to be applied to) all architectures. +// Returns an error if an issue was encountered adding the rule. +// There is a bug in library versions below v2.2.1 which can, in some cases, +// cause conditions to be lost when more than one are used. Consequently, +// AddRuleConditionalExact is disabled on library versions lower than v2.2.1 +func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { + return f.addRuleGeneric(call, action, true, conds) +} + +// ExportPFC output PFC-formatted, human-readable dump of a filter context's +// rules to a file. +// Accepts file to write to (must be open for writing). +// Returns an error if writing to the file fails. +func (f *ScmpFilter) ExportPFC(file *os.File) error { + f.lock.Lock() + defer f.lock.Unlock() + + fd := file.Fd() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// ExportBPF outputs Berkeley Packet Filter-formatted, kernel-readable dump of a +// filter context's rules to a file. +// Accepts file to write to (must be open for writing). +// Returns an error if writing to the file fails. +func (f *ScmpFilter) ExportBPF(file *os.File) error { + f.lock.Lock() + defer f.lock.Unlock() + + fd := file.Fd() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go new file mode 100644 index 0000000..4e36b27 --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go @@ -0,0 +1,571 @@ +// +build linux + +// Internal functions for libseccomp Go bindings +// No exported functions + +package seccomp + +import ( + "fmt" + "syscall" +) + +// Unexported C wrapping code - provides the C-Golang interface +// Get the seccomp header in scope +// Need stdlib.h for free() on cstrings + +// #cgo pkg-config: libseccomp +/* +#include +#include +#include + +#if SCMP_VER_MAJOR < 2 +#error Minimum supported version of Libseccomp is v2.2.0 +#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 2 +#error Minimum supported version of Libseccomp is v2.2.0 +#endif + +#define ARCH_BAD ~0 + +const uint32_t C_ARCH_BAD = ARCH_BAD; + +#ifndef SCMP_ARCH_PPC +#define SCMP_ARCH_PPC ARCH_BAD +#endif + +#ifndef SCMP_ARCH_PPC64 +#define SCMP_ARCH_PPC64 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_PPC64LE +#define SCMP_ARCH_PPC64LE ARCH_BAD +#endif + +#ifndef SCMP_ARCH_S390 +#define SCMP_ARCH_S390 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_S390X +#define SCMP_ARCH_S390X ARCH_BAD +#endif + +const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE; +const uint32_t C_ARCH_X86 = SCMP_ARCH_X86; +const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64; +const uint32_t C_ARCH_X32 = SCMP_ARCH_X32; +const uint32_t C_ARCH_ARM = SCMP_ARCH_ARM; +const uint32_t C_ARCH_AARCH64 = SCMP_ARCH_AARCH64; +const uint32_t C_ARCH_MIPS = SCMP_ARCH_MIPS; +const uint32_t C_ARCH_MIPS64 = SCMP_ARCH_MIPS64; +const uint32_t C_ARCH_MIPS64N32 = SCMP_ARCH_MIPS64N32; +const uint32_t C_ARCH_MIPSEL = SCMP_ARCH_MIPSEL; +const uint32_t C_ARCH_MIPSEL64 = SCMP_ARCH_MIPSEL64; +const uint32_t C_ARCH_MIPSEL64N32 = SCMP_ARCH_MIPSEL64N32; +const uint32_t C_ARCH_PPC = SCMP_ARCH_PPC; +const uint32_t C_ARCH_PPC64 = SCMP_ARCH_PPC64; +const uint32_t C_ARCH_PPC64LE = SCMP_ARCH_PPC64LE; +const uint32_t C_ARCH_S390 = SCMP_ARCH_S390; +const uint32_t C_ARCH_S390X = SCMP_ARCH_S390X; + +#ifndef SCMP_ACT_LOG +#define SCMP_ACT_LOG 0x7ffc0000U +#endif + +const uint32_t C_ACT_KILL = SCMP_ACT_KILL; +const uint32_t C_ACT_TRAP = SCMP_ACT_TRAP; +const uint32_t C_ACT_ERRNO = SCMP_ACT_ERRNO(0); +const uint32_t C_ACT_TRACE = SCMP_ACT_TRACE(0); +const uint32_t C_ACT_LOG = SCMP_ACT_LOG; +const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW; + +// The libseccomp SCMP_FLTATR_CTL_LOG member of the scmp_filter_attr enum was +// added in v2.4.0 +#if (SCMP_VER_MAJOR < 2) || \ + (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 4) +#define SCMP_FLTATR_CTL_LOG _SCMP_FLTATR_MIN +#endif + +const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT; +const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH; +const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP; +const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC; +const uint32_t C_ATTRIBUTE_LOG = (uint32_t)SCMP_FLTATR_CTL_LOG; + +const int C_CMP_NE = (int)SCMP_CMP_NE; +const int C_CMP_LT = (int)SCMP_CMP_LT; +const int C_CMP_LE = (int)SCMP_CMP_LE; +const int C_CMP_EQ = (int)SCMP_CMP_EQ; +const int C_CMP_GE = (int)SCMP_CMP_GE; +const int C_CMP_GT = (int)SCMP_CMP_GT; +const int C_CMP_MASKED_EQ = (int)SCMP_CMP_MASKED_EQ; + +const int C_VERSION_MAJOR = SCMP_VER_MAJOR; +const int C_VERSION_MINOR = SCMP_VER_MINOR; +const int C_VERSION_MICRO = SCMP_VER_MICRO; + +#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 3 +unsigned int get_major_version() +{ + return seccomp_version()->major; +} + +unsigned int get_minor_version() +{ + return seccomp_version()->minor; +} + +unsigned int get_micro_version() +{ + return seccomp_version()->micro; +} +#else +unsigned int get_major_version() +{ + return (unsigned int)C_VERSION_MAJOR; +} + +unsigned int get_minor_version() +{ + return (unsigned int)C_VERSION_MINOR; +} + +unsigned int get_micro_version() +{ + return (unsigned int)C_VERSION_MICRO; +} +#endif + +// The libseccomp API level functions were added in v2.4.0 +#if (SCMP_VER_MAJOR < 2) || \ + (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 4) +const unsigned int seccomp_api_get(void) +{ + // libseccomp-golang requires libseccomp v2.2.0, at a minimum, which + // supported API level 2. However, the kernel may not support API level + // 2 constructs which are the seccomp() system call and the TSYNC + // filter flag. Return the "reserved" value of 0 here to indicate that + // proper API level support is not available in libseccomp. + return 0; +} + +int seccomp_api_set(unsigned int level) +{ + return -EOPNOTSUPP; +} +#endif + +typedef struct scmp_arg_cmp* scmp_cast_t; + +void* make_arg_cmp_array(unsigned int length) +{ + return calloc(length, sizeof(struct scmp_arg_cmp)); +} + +// Wrapper to add an scmp_arg_cmp struct to an existing arg_cmp array +void add_struct_arg_cmp( + struct scmp_arg_cmp* arr, + unsigned int pos, + unsigned int arg, + int compare, + uint64_t a, + uint64_t b + ) +{ + arr[pos].arg = arg; + arr[pos].op = compare; + arr[pos].datum_a = a; + arr[pos].datum_b = b; + + return; +} +*/ +import "C" + +// Nonexported types +type scmpFilterAttr uint32 + +// Nonexported constants + +const ( + filterAttrActDefault scmpFilterAttr = iota + filterAttrActBadArch scmpFilterAttr = iota + filterAttrNNP scmpFilterAttr = iota + filterAttrTsync scmpFilterAttr = iota + filterAttrLog scmpFilterAttr = iota +) + +const ( + // An error return from certain libseccomp functions + scmpError C.int = -1 + // Comparison boundaries to check for architecture validity + archStart ScmpArch = ArchNative + archEnd ScmpArch = ArchS390X + // Comparison boundaries to check for action validity + actionStart ScmpAction = ActKill + actionEnd ScmpAction = ActLog + // Comparison boundaries to check for comparison operator validity + compareOpStart ScmpCompareOp = CompareNotEqual + compareOpEnd ScmpCompareOp = CompareMaskedEqual +) + +var ( + // Error thrown on bad filter context + errBadFilter = fmt.Errorf("filter is invalid or uninitialized") + // Constants representing library major, minor, and micro versions + verMajor = uint(C.get_major_version()) + verMinor = uint(C.get_minor_version()) + verMicro = uint(C.get_micro_version()) +) + +// Nonexported functions + +// Check if library version is greater than or equal to the given one +func checkVersionAbove(major, minor, micro uint) bool { + return (verMajor > major) || + (verMajor == major && verMinor > minor) || + (verMajor == major && verMinor == minor && verMicro >= micro) +} + +// Ensure that the library is supported, i.e. >= 2.2.0. +func ensureSupportedVersion() error { + if !checkVersionAbove(2, 2, 0) { + return VersionError{} + } + return nil +} + +// Get the API level +func getApi() (uint, error) { + api := C.seccomp_api_get() + if api == 0 { + return 0, fmt.Errorf("API level operations are not supported") + } + + return uint(api), nil +} + +// Set the API level +func setApi(api uint) error { + if retCode := C.seccomp_api_set(C.uint(api)); retCode != 0 { + if syscall.Errno(-1*retCode) == syscall.EOPNOTSUPP { + return fmt.Errorf("API level operations are not supported") + } + + return fmt.Errorf("could not set API level: %v", retCode) + } + + return nil +} + +// Filter helpers + +// Filter finalizer - ensure that kernel context for filters is freed +func filterFinalizer(f *ScmpFilter) { + f.Release() +} + +// Get a raw filter attribute +func (f *ScmpFilter) getFilterAttr(attr scmpFilterAttr) (C.uint32_t, error) { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return 0x0, errBadFilter + } + + var attribute C.uint32_t + + retCode := C.seccomp_attr_get(f.filterCtx, attr.toNative(), &attribute) + if retCode != 0 { + return 0x0, syscall.Errno(-1 * retCode) + } + + return attribute, nil +} + +// Set a raw filter attribute +func (f *ScmpFilter) setFilterAttr(attr scmpFilterAttr, value C.uint32_t) error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + retCode := C.seccomp_attr_set(f.filterCtx, attr.toNative(), value) + if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// DOES NOT LOCK OR CHECK VALIDITY +// Assumes caller has already done this +// Wrapper for seccomp_rule_add_... functions +func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact bool, length C.uint, cond C.scmp_cast_t) error { + if length != 0 && cond == nil { + return fmt.Errorf("null conditions list, but length is nonzero") + } + + var retCode C.int + if exact { + retCode = C.seccomp_rule_add_exact_array(f.filterCtx, action.toNative(), C.int(call), length, cond) + } else { + retCode = C.seccomp_rule_add_array(f.filterCtx, action.toNative(), C.int(call), length, cond) + } + + if syscall.Errno(-1*retCode) == syscall.EFAULT { + return fmt.Errorf("unrecognized syscall %#x", int32(call)) + } else if syscall.Errno(-1*retCode) == syscall.EPERM { + return fmt.Errorf("requested action matches default action of filter") + } else if syscall.Errno(-1*retCode) == syscall.EINVAL { + return fmt.Errorf("two checks on same syscall argument") + } else if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Generic add function for filter rules +func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact bool, conds []ScmpCondition) error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + if len(conds) == 0 { + if err := f.addRuleWrapper(call, action, exact, 0, nil); err != nil { + return err + } + } else { + // We don't support conditional filtering in library version v2.1 + if !checkVersionAbove(2, 2, 1) { + return VersionError{ + message: "conditional filtering is not supported", + minimum: "2.2.1", + } + } + + argsArr := C.make_arg_cmp_array(C.uint(len(conds))) + if argsArr == nil { + return fmt.Errorf("error allocating memory for conditions") + } + defer C.free(argsArr) + + for i, cond := range conds { + C.add_struct_arg_cmp(C.scmp_cast_t(argsArr), C.uint(i), + C.uint(cond.Argument), cond.Op.toNative(), + C.uint64_t(cond.Operand1), C.uint64_t(cond.Operand2)) + } + + if err := f.addRuleWrapper(call, action, exact, C.uint(len(conds)), C.scmp_cast_t(argsArr)); err != nil { + return err + } + } + + return nil +} + +// Generic Helpers + +// Helper - Sanitize Arch token input +func sanitizeArch(in ScmpArch) error { + if in < archStart || in > archEnd { + return fmt.Errorf("unrecognized architecture %#x", uint(in)) + } + + if in.toNative() == C.C_ARCH_BAD { + return fmt.Errorf("architecture %v is not supported on this version of the library", in) + } + + return nil +} + +func sanitizeAction(in ScmpAction) error { + inTmp := in & 0x0000FFFF + if inTmp < actionStart || inTmp > actionEnd { + return fmt.Errorf("unrecognized action %#x", uint(inTmp)) + } + + if inTmp != ActTrace && inTmp != ActErrno && (in&0xFFFF0000) != 0 { + return fmt.Errorf("highest 16 bits must be zeroed except for Trace and Errno") + } + + return nil +} + +func sanitizeCompareOp(in ScmpCompareOp) error { + if in < compareOpStart || in > compareOpEnd { + return fmt.Errorf("unrecognized comparison operator %#x", uint(in)) + } + + return nil +} + +func archFromNative(a C.uint32_t) (ScmpArch, error) { + switch a { + case C.C_ARCH_X86: + return ArchX86, nil + case C.C_ARCH_X86_64: + return ArchAMD64, nil + case C.C_ARCH_X32: + return ArchX32, nil + case C.C_ARCH_ARM: + return ArchARM, nil + case C.C_ARCH_NATIVE: + return ArchNative, nil + case C.C_ARCH_AARCH64: + return ArchARM64, nil + case C.C_ARCH_MIPS: + return ArchMIPS, nil + case C.C_ARCH_MIPS64: + return ArchMIPS64, nil + case C.C_ARCH_MIPS64N32: + return ArchMIPS64N32, nil + case C.C_ARCH_MIPSEL: + return ArchMIPSEL, nil + case C.C_ARCH_MIPSEL64: + return ArchMIPSEL64, nil + case C.C_ARCH_MIPSEL64N32: + return ArchMIPSEL64N32, nil + case C.C_ARCH_PPC: + return ArchPPC, nil + case C.C_ARCH_PPC64: + return ArchPPC64, nil + case C.C_ARCH_PPC64LE: + return ArchPPC64LE, nil + case C.C_ARCH_S390: + return ArchS390, nil + case C.C_ARCH_S390X: + return ArchS390X, nil + default: + return 0x0, fmt.Errorf("unrecognized architecture %#x", uint32(a)) + } +} + +// Only use with sanitized arches, no error handling +func (a ScmpArch) toNative() C.uint32_t { + switch a { + case ArchX86: + return C.C_ARCH_X86 + case ArchAMD64: + return C.C_ARCH_X86_64 + case ArchX32: + return C.C_ARCH_X32 + case ArchARM: + return C.C_ARCH_ARM + case ArchARM64: + return C.C_ARCH_AARCH64 + case ArchMIPS: + return C.C_ARCH_MIPS + case ArchMIPS64: + return C.C_ARCH_MIPS64 + case ArchMIPS64N32: + return C.C_ARCH_MIPS64N32 + case ArchMIPSEL: + return C.C_ARCH_MIPSEL + case ArchMIPSEL64: + return C.C_ARCH_MIPSEL64 + case ArchMIPSEL64N32: + return C.C_ARCH_MIPSEL64N32 + case ArchPPC: + return C.C_ARCH_PPC + case ArchPPC64: + return C.C_ARCH_PPC64 + case ArchPPC64LE: + return C.C_ARCH_PPC64LE + case ArchS390: + return C.C_ARCH_S390 + case ArchS390X: + return C.C_ARCH_S390X + case ArchNative: + return C.C_ARCH_NATIVE + default: + return 0x0 + } +} + +// Only use with sanitized ops, no error handling +func (a ScmpCompareOp) toNative() C.int { + switch a { + case CompareNotEqual: + return C.C_CMP_NE + case CompareLess: + return C.C_CMP_LT + case CompareLessOrEqual: + return C.C_CMP_LE + case CompareEqual: + return C.C_CMP_EQ + case CompareGreaterEqual: + return C.C_CMP_GE + case CompareGreater: + return C.C_CMP_GT + case CompareMaskedEqual: + return C.C_CMP_MASKED_EQ + default: + return 0x0 + } +} + +func actionFromNative(a C.uint32_t) (ScmpAction, error) { + aTmp := a & 0xFFFF + switch a & 0xFFFF0000 { + case C.C_ACT_KILL: + return ActKill, nil + case C.C_ACT_TRAP: + return ActTrap, nil + case C.C_ACT_ERRNO: + return ActErrno.SetReturnCode(int16(aTmp)), nil + case C.C_ACT_TRACE: + return ActTrace.SetReturnCode(int16(aTmp)), nil + case C.C_ACT_LOG: + return ActLog, nil + case C.C_ACT_ALLOW: + return ActAllow, nil + default: + return 0x0, fmt.Errorf("unrecognized action %#x", uint32(a)) + } +} + +// Only use with sanitized actions, no error handling +func (a ScmpAction) toNative() C.uint32_t { + switch a & 0xFFFF { + case ActKill: + return C.C_ACT_KILL + case ActTrap: + return C.C_ACT_TRAP + case ActErrno: + return C.C_ACT_ERRNO | (C.uint32_t(a) >> 16) + case ActTrace: + return C.C_ACT_TRACE | (C.uint32_t(a) >> 16) + case ActLog: + return C.C_ACT_LOG + case ActAllow: + return C.C_ACT_ALLOW + default: + return 0x0 + } +} + +// Internal only, assumes safe attribute +func (a scmpFilterAttr) toNative() uint32 { + switch a { + case filterAttrActDefault: + return uint32(C.C_ATTRIBUTE_DEFAULT) + case filterAttrActBadArch: + return uint32(C.C_ATTRIBUTE_BADARCH) + case filterAttrNNP: + return uint32(C.C_ATTRIBUTE_NNP) + case filterAttrTsync: + return uint32(C.C_ATTRIBUTE_TSYNC) + case filterAttrLog: + return uint32(C.C_ATTRIBUTE_LOG) + default: + return 0x0 + } +} diff --git a/vendor/github.com/sirupsen/logrus/LICENSE b/vendor/github.com/sirupsen/logrus/LICENSE new file mode 100644 index 0000000..f090cb4 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 Simon Eskildsen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/github.com/sirupsen/logrus/README.md b/vendor/github.com/sirupsen/logrus/README.md new file mode 100644 index 0000000..a4796eb --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/README.md @@ -0,0 +1,495 @@ +# Logrus :walrus: [![Build Status](https://travis-ci.org/sirupsen/logrus.svg?branch=master)](https://travis-ci.org/sirupsen/logrus) [![GoDoc](https://godoc.org/github.com/sirupsen/logrus?status.svg)](https://godoc.org/github.com/sirupsen/logrus) + +Logrus is a structured logger for Go (golang), completely API compatible with +the standard library logger. + +**Seeing weird case-sensitive problems?** It's in the past been possible to +import Logrus as both upper- and lower-case. Due to the Go package environment, +this caused issues in the community and we needed a standard. Some environments +experienced problems with the upper-case variant, so the lower-case was decided. +Everything using `logrus` will need to use the lower-case: +`github.com/sirupsen/logrus`. Any package that isn't, should be changed. + +To fix Glide, see [these +comments](https://github.com/sirupsen/logrus/issues/553#issuecomment-306591437). +For an in-depth explanation of the casing issue, see [this +comment](https://github.com/sirupsen/logrus/issues/570#issuecomment-313933276). + +**Are you interested in assisting in maintaining Logrus?** Currently I have a +lot of obligations, and I am unable to provide Logrus with the maintainership it +needs. If you'd like to help, please reach out to me at `simon at author's +username dot com`. + +Nicely color-coded in development (when a TTY is attached, otherwise just +plain text): + +![Colored](http://i.imgur.com/PY7qMwd.png) + +With `log.SetFormatter(&log.JSONFormatter{})`, for easy parsing by logstash +or Splunk: + +```json +{"animal":"walrus","level":"info","msg":"A group of walrus emerges from the +ocean","size":10,"time":"2014-03-10 19:57:38.562264131 -0400 EDT"} + +{"level":"warning","msg":"The group's number increased tremendously!", +"number":122,"omg":true,"time":"2014-03-10 19:57:38.562471297 -0400 EDT"} + +{"animal":"walrus","level":"info","msg":"A giant walrus appears!", +"size":10,"time":"2014-03-10 19:57:38.562500591 -0400 EDT"} + +{"animal":"walrus","level":"info","msg":"Tremendously sized cow enters the ocean.", +"size":9,"time":"2014-03-10 19:57:38.562527896 -0400 EDT"} + +{"level":"fatal","msg":"The ice breaks!","number":100,"omg":true, +"time":"2014-03-10 19:57:38.562543128 -0400 EDT"} +``` + +With the default `log.SetFormatter(&log.TextFormatter{})` when a TTY is not +attached, the output is compatible with the +[logfmt](http://godoc.org/github.com/kr/logfmt) format: + +```text +time="2015-03-26T01:27:38-04:00" level=debug msg="Started observing beach" animal=walrus number=8 +time="2015-03-26T01:27:38-04:00" level=info msg="A group of walrus emerges from the ocean" animal=walrus size=10 +time="2015-03-26T01:27:38-04:00" level=warning msg="The group's number increased tremendously!" number=122 omg=true +time="2015-03-26T01:27:38-04:00" level=debug msg="Temperature changes" temperature=-4 +time="2015-03-26T01:27:38-04:00" level=panic msg="It's over 9000!" animal=orca size=9009 +time="2015-03-26T01:27:38-04:00" level=fatal msg="The ice breaks!" err=&{0x2082280c0 map[animal:orca size:9009] 2015-03-26 01:27:38.441574009 -0400 EDT panic It's over 9000!} number=100 omg=true +``` +To ensure this behaviour even if a TTY is attached, set your formatter as follows: + +```go + log.SetFormatter(&log.TextFormatter{ + DisableColors: true, + FullTimestamp: true, + }) +``` + +#### Logging Method Name + +If you wish to add the calling method as a field, instruct the logger via: +```go +log.SetReportCaller(true) +``` +This adds the caller as 'method' like so: + +```json +{"animal":"penguin","level":"fatal","method":"github.com/sirupsen/arcticcreatures.migrate","msg":"a penguin swims by", +"time":"2014-03-10 19:57:38.562543129 -0400 EDT"} +``` + +```text +time="2015-03-26T01:27:38-04:00" level=fatal method=github.com/sirupsen/arcticcreatures.migrate msg="a penguin swims by" animal=penguin +``` +Note that this does add measurable overhead - the cost will depend on the version of Go, but is +between 20 and 40% in recent tests with 1.6 and 1.7. You can validate this in your +environment via benchmarks: +``` +go test -bench=.*CallerTracing +``` + + +#### Case-sensitivity + +The organization's name was changed to lower-case--and this will not be changed +back. If you are getting import conflicts due to case sensitivity, please use +the lower-case import: `github.com/sirupsen/logrus`. + +#### Example + +The simplest way to use Logrus is simply the package-level exported logger: + +```go +package main + +import ( + log "github.com/sirupsen/logrus" +) + +func main() { + log.WithFields(log.Fields{ + "animal": "walrus", + }).Info("A walrus appears") +} +``` + +Note that it's completely api-compatible with the stdlib logger, so you can +replace your `log` imports everywhere with `log "github.com/sirupsen/logrus"` +and you'll now have the flexibility of Logrus. You can customize it all you +want: + +```go +package main + +import ( + "os" + log "github.com/sirupsen/logrus" +) + +func init() { + // Log as JSON instead of the default ASCII formatter. + log.SetFormatter(&log.JSONFormatter{}) + + // Output to stdout instead of the default stderr + // Can be any io.Writer, see below for File example + log.SetOutput(os.Stdout) + + // Only log the warning severity or above. + log.SetLevel(log.WarnLevel) +} + +func main() { + log.WithFields(log.Fields{ + "animal": "walrus", + "size": 10, + }).Info("A group of walrus emerges from the ocean") + + log.WithFields(log.Fields{ + "omg": true, + "number": 122, + }).Warn("The group's number increased tremendously!") + + log.WithFields(log.Fields{ + "omg": true, + "number": 100, + }).Fatal("The ice breaks!") + + // A common pattern is to re-use fields between logging statements by re-using + // the logrus.Entry returned from WithFields() + contextLogger := log.WithFields(log.Fields{ + "common": "this is a common field", + "other": "I also should be logged always", + }) + + contextLogger.Info("I'll be logged with common and other field") + contextLogger.Info("Me too") +} +``` + +For more advanced usage such as logging to multiple locations from the same +application, you can also create an instance of the `logrus` Logger: + +```go +package main + +import ( + "os" + "github.com/sirupsen/logrus" +) + +// Create a new instance of the logger. You can have any number of instances. +var log = logrus.New() + +func main() { + // The API for setting attributes is a little different than the package level + // exported logger. See Godoc. + log.Out = os.Stdout + + // You could set this to any `io.Writer` such as a file + // file, err := os.OpenFile("logrus.log", os.O_CREATE|os.O_WRONLY, 0666) + // if err == nil { + // log.Out = file + // } else { + // log.Info("Failed to log to file, using default stderr") + // } + + log.WithFields(logrus.Fields{ + "animal": "walrus", + "size": 10, + }).Info("A group of walrus emerges from the ocean") +} +``` + +#### Fields + +Logrus encourages careful, structured logging through logging fields instead of +long, unparseable error messages. For example, instead of: `log.Fatalf("Failed +to send event %s to topic %s with key %d")`, you should log the much more +discoverable: + +```go +log.WithFields(log.Fields{ + "event": event, + "topic": topic, + "key": key, +}).Fatal("Failed to send event") +``` + +We've found this API forces you to think about logging in a way that produces +much more useful logging messages. We've been in countless situations where just +a single added field to a log statement that was already there would've saved us +hours. The `WithFields` call is optional. + +In general, with Logrus using any of the `printf`-family functions should be +seen as a hint you should add a field, however, you can still use the +`printf`-family functions with Logrus. + +#### Default Fields + +Often it's helpful to have fields _always_ attached to log statements in an +application or parts of one. For example, you may want to always log the +`request_id` and `user_ip` in the context of a request. Instead of writing +`log.WithFields(log.Fields{"request_id": request_id, "user_ip": user_ip})` on +every line, you can create a `logrus.Entry` to pass around instead: + +```go +requestLogger := log.WithFields(log.Fields{"request_id": request_id, "user_ip": user_ip}) +requestLogger.Info("something happened on that request") # will log request_id and user_ip +requestLogger.Warn("something not great happened") +``` + +#### Hooks + +You can add hooks for logging levels. For example to send errors to an exception +tracking service on `Error`, `Fatal` and `Panic`, info to StatsD or log to +multiple places simultaneously, e.g. syslog. + +Logrus comes with [built-in hooks](hooks/). Add those, or your custom hook, in +`init`: + +```go +import ( + log "github.com/sirupsen/logrus" + "gopkg.in/gemnasium/logrus-airbrake-hook.v2" // the package is named "airbrake" + logrus_syslog "github.com/sirupsen/logrus/hooks/syslog" + "log/syslog" +) + +func init() { + + // Use the Airbrake hook to report errors that have Error severity or above to + // an exception tracker. You can create custom hooks, see the Hooks section. + log.AddHook(airbrake.NewHook(123, "xyz", "production")) + + hook, err := logrus_syslog.NewSyslogHook("udp", "localhost:514", syslog.LOG_INFO, "") + if err != nil { + log.Error("Unable to connect to local syslog daemon") + } else { + log.AddHook(hook) + } +} +``` +Note: Syslog hook also support connecting to local syslog (Ex. "/dev/log" or "/var/run/syslog" or "/var/run/log"). For the detail, please check the [syslog hook README](hooks/syslog/README.md). + +A list of currently known of service hook can be found in this wiki [page](https://github.com/sirupsen/logrus/wiki/Hooks) + + +#### Level logging + +Logrus has seven logging levels: Trace, Debug, Info, Warning, Error, Fatal and Panic. + +```go +log.Trace("Something very low level.") +log.Debug("Useful debugging information.") +log.Info("Something noteworthy happened!") +log.Warn("You should probably take a look at this.") +log.Error("Something failed but I'm not quitting.") +// Calls os.Exit(1) after logging +log.Fatal("Bye.") +// Calls panic() after logging +log.Panic("I'm bailing.") +``` + +You can set the logging level on a `Logger`, then it will only log entries with +that severity or anything above it: + +```go +// Will log anything that is info or above (warn, error, fatal, panic). Default. +log.SetLevel(log.InfoLevel) +``` + +It may be useful to set `log.Level = logrus.DebugLevel` in a debug or verbose +environment if your application has that. + +#### Entries + +Besides the fields added with `WithField` or `WithFields` some fields are +automatically added to all logging events: + +1. `time`. The timestamp when the entry was created. +2. `msg`. The logging message passed to `{Info,Warn,Error,Fatal,Panic}` after + the `AddFields` call. E.g. `Failed to send event.` +3. `level`. The logging level. E.g. `info`. + +#### Environments + +Logrus has no notion of environment. + +If you wish for hooks and formatters to only be used in specific environments, +you should handle that yourself. For example, if your application has a global +variable `Environment`, which is a string representation of the environment you +could do: + +```go +import ( + log "github.com/sirupsen/logrus" +) + +init() { + // do something here to set environment depending on an environment variable + // or command-line flag + if Environment == "production" { + log.SetFormatter(&log.JSONFormatter{}) + } else { + // The TextFormatter is default, you don't actually have to do this. + log.SetFormatter(&log.TextFormatter{}) + } +} +``` + +This configuration is how `logrus` was intended to be used, but JSON in +production is mostly only useful if you do log aggregation with tools like +Splunk or Logstash. + +#### Formatters + +The built-in logging formatters are: + +* `logrus.TextFormatter`. Logs the event in colors if stdout is a tty, otherwise + without colors. + * *Note:* to force colored output when there is no TTY, set the `ForceColors` + field to `true`. To force no colored output even if there is a TTY set the + `DisableColors` field to `true`. For Windows, see + [github.com/mattn/go-colorable](https://github.com/mattn/go-colorable). + * When colors are enabled, levels are truncated to 4 characters by default. To disable + truncation set the `DisableLevelTruncation` field to `true`. + * All options are listed in the [generated docs](https://godoc.org/github.com/sirupsen/logrus#TextFormatter). +* `logrus.JSONFormatter`. Logs fields as JSON. + * All options are listed in the [generated docs](https://godoc.org/github.com/sirupsen/logrus#JSONFormatter). + +Third party logging formatters: + +* [`FluentdFormatter`](https://github.com/joonix/log). Formats entries that can be parsed by Kubernetes and Google Container Engine. +* [`GELF`](https://github.com/fabienm/go-logrus-formatters). Formats entries so they comply to Graylog's [GELF 1.1 specification](http://docs.graylog.org/en/2.4/pages/gelf.html). +* [`logstash`](https://github.com/bshuster-repo/logrus-logstash-hook). Logs fields as [Logstash](http://logstash.net) Events. +* [`prefixed`](https://github.com/x-cray/logrus-prefixed-formatter). Displays log entry source along with alternative layout. +* [`zalgo`](https://github.com/aybabtme/logzalgo). Invoking the P͉̫o̳̼̊w̖͈̰͎e̬͔̭͂r͚̼̹̲ ̫͓͉̳͈ō̠͕͖̚f̝͍̠ ͕̲̞͖͑Z̖̫̤̫ͪa͉̬͈̗l͖͎g̳̥o̰̥̅!̣͔̲̻͊̄ ̙̘̦̹̦. +* [`nested-logrus-formatter`](https://github.com/antonfisher/nested-logrus-formatter). Converts logrus fields to a nested structure. + +You can define your formatter by implementing the `Formatter` interface, +requiring a `Format` method. `Format` takes an `*Entry`. `entry.Data` is a +`Fields` type (`map[string]interface{}`) with all your fields as well as the +default ones (see Entries section above): + +```go +type MyJSONFormatter struct { +} + +log.SetFormatter(new(MyJSONFormatter)) + +func (f *MyJSONFormatter) Format(entry *Entry) ([]byte, error) { + // Note this doesn't include Time, Level and Message which are available on + // the Entry. Consult `godoc` on information about those fields or read the + // source of the official loggers. + serialized, err := json.Marshal(entry.Data) + if err != nil { + return nil, fmt.Errorf("Failed to marshal fields to JSON, %v", err) + } + return append(serialized, '\n'), nil +} +``` + +#### Logger as an `io.Writer` + +Logrus can be transformed into an `io.Writer`. That writer is the end of an `io.Pipe` and it is your responsibility to close it. + +```go +w := logger.Writer() +defer w.Close() + +srv := http.Server{ + // create a stdlib log.Logger that writes to + // logrus.Logger. + ErrorLog: log.New(w, "", 0), +} +``` + +Each line written to that writer will be printed the usual way, using formatters +and hooks. The level for those entries is `info`. + +This means that we can override the standard library logger easily: + +```go +logger := logrus.New() +logger.Formatter = &logrus.JSONFormatter{} + +// Use logrus for standard log output +// Note that `log` here references stdlib's log +// Not logrus imported under the name `log`. +log.SetOutput(logger.Writer()) +``` + +#### Rotation + +Log rotation is not provided with Logrus. Log rotation should be done by an +external program (like `logrotate(8)`) that can compress and delete old log +entries. It should not be a feature of the application-level logger. + +#### Tools + +| Tool | Description | +| ---- | ----------- | +|[Logrus Mate](https://github.com/gogap/logrus_mate)|Logrus mate is a tool for Logrus to manage loggers, you can initial logger's level, hook and formatter by config file, the logger will generated with different config at different environment.| +|[Logrus Viper Helper](https://github.com/heirko/go-contrib/tree/master/logrusHelper)|An Helper around Logrus to wrap with spf13/Viper to load configuration with fangs! And to simplify Logrus configuration use some behavior of [Logrus Mate](https://github.com/gogap/logrus_mate). [sample](https://github.com/heirko/iris-contrib/blob/master/middleware/logrus-logger/example) | + +#### Testing + +Logrus has a built in facility for asserting the presence of log messages. This is implemented through the `test` hook and provides: + +* decorators for existing logger (`test.NewLocal` and `test.NewGlobal`) which basically just add the `test` hook +* a test logger (`test.NewNullLogger`) that just records log messages (and does not output any): + +```go +import( + "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus/hooks/test" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestSomething(t*testing.T){ + logger, hook := test.NewNullLogger() + logger.Error("Helloerror") + + assert.Equal(t, 1, len(hook.Entries)) + assert.Equal(t, logrus.ErrorLevel, hook.LastEntry().Level) + assert.Equal(t, "Helloerror", hook.LastEntry().Message) + + hook.Reset() + assert.Nil(t, hook.LastEntry()) +} +``` + +#### Fatal handlers + +Logrus can register one or more functions that will be called when any `fatal` +level message is logged. The registered handlers will be executed before +logrus performs a `os.Exit(1)`. This behavior may be helpful if callers need +to gracefully shutdown. Unlike a `panic("Something went wrong...")` call which can be intercepted with a deferred `recover` a call to `os.Exit(1)` can not be intercepted. + +``` +... +handler := func() { + // gracefully shutdown something... +} +logrus.RegisterExitHandler(handler) +... +``` + +#### Thread safety + +By default, Logger is protected by a mutex for concurrent writes. The mutex is held when calling hooks and writing logs. +If you are sure such locking is not needed, you can call logger.SetNoLock() to disable the locking. + +Situation when locking is not needed includes: + +* You have no hooks registered, or hooks calling is already thread-safe. + +* Writing to logger.Out is already thread-safe, for example: + + 1) logger.Out is protected by locks. + + 2) logger.Out is a os.File handler opened with `O_APPEND` flag, and every write is smaller than 4k. (This allow multi-thread/multi-process writing) + + (Refer to http://www.notthewizard.com/2014/06/17/are-files-appends-really-atomic/) diff --git a/vendor/github.com/sirupsen/logrus/alt_exit.go b/vendor/github.com/sirupsen/logrus/alt_exit.go new file mode 100644 index 0000000..8fd189e --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/alt_exit.go @@ -0,0 +1,76 @@ +package logrus + +// The following code was sourced and modified from the +// https://github.com/tebeka/atexit package governed by the following license: +// +// Copyright (c) 2012 Miki Tebeka . +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software is furnished to do so, +// subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import ( + "fmt" + "os" +) + +var handlers = []func(){} + +func runHandler(handler func()) { + defer func() { + if err := recover(); err != nil { + fmt.Fprintln(os.Stderr, "Error: Logrus exit handler error:", err) + } + }() + + handler() +} + +func runHandlers() { + for _, handler := range handlers { + runHandler(handler) + } +} + +// Exit runs all the Logrus atexit handlers and then terminates the program using os.Exit(code) +func Exit(code int) { + runHandlers() + os.Exit(code) +} + +// RegisterExitHandler appends a Logrus Exit handler to the list of handlers, +// call logrus.Exit to invoke all handlers. The handlers will also be invoked when +// any Fatal log entry is made. +// +// This method is useful when a caller wishes to use logrus to log a fatal +// message but also needs to gracefully shutdown. An example usecase could be +// closing database connections, or sending a alert that the application is +// closing. +func RegisterExitHandler(handler func()) { + handlers = append(handlers, handler) +} + +// DeferExitHandler prepends a Logrus Exit handler to the list of handlers, +// call logrus.Exit to invoke all handlers. The handlers will also be invoked when +// any Fatal log entry is made. +// +// This method is useful when a caller wishes to use logrus to log a fatal +// message but also needs to gracefully shutdown. An example usecase could be +// closing database connections, or sending a alert that the application is +// closing. +func DeferExitHandler(handler func()) { + handlers = append([]func(){handler}, handlers...) +} diff --git a/vendor/github.com/sirupsen/logrus/doc.go b/vendor/github.com/sirupsen/logrus/doc.go new file mode 100644 index 0000000..da67aba --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/doc.go @@ -0,0 +1,26 @@ +/* +Package logrus is a structured logger for Go, completely API compatible with the standard library logger. + + +The simplest way to use Logrus is simply the package-level exported logger: + + package main + + import ( + log "github.com/sirupsen/logrus" + ) + + func main() { + log.WithFields(log.Fields{ + "animal": "walrus", + "number": 1, + "size": 10, + }).Info("A walrus appears") + } + +Output: + time="2015-09-07T08:48:33Z" level=info msg="A walrus appears" animal=walrus number=1 size=10 + +For a full guide visit https://github.com/sirupsen/logrus +*/ +package logrus diff --git a/vendor/github.com/sirupsen/logrus/entry.go b/vendor/github.com/sirupsen/logrus/entry.go new file mode 100644 index 0000000..63e2558 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/entry.go @@ -0,0 +1,407 @@ +package logrus + +import ( + "bytes" + "context" + "fmt" + "os" + "reflect" + "runtime" + "strings" + "sync" + "time" +) + +var ( + bufferPool *sync.Pool + + // qualified package name, cached at first use + logrusPackage string + + // Positions in the call stack when tracing to report the calling method + minimumCallerDepth int + + // Used for caller information initialisation + callerInitOnce sync.Once +) + +const ( + maximumCallerDepth int = 25 + knownLogrusFrames int = 4 +) + +func init() { + bufferPool = &sync.Pool{ + New: func() interface{} { + return new(bytes.Buffer) + }, + } + + // start at the bottom of the stack before the package-name cache is primed + minimumCallerDepth = 1 +} + +// Defines the key when adding errors using WithError. +var ErrorKey = "error" + +// An entry is the final or intermediate Logrus logging entry. It contains all +// the fields passed with WithField{,s}. It's finally logged when Trace, Debug, +// Info, Warn, Error, Fatal or Panic is called on it. These objects can be +// reused and passed around as much as you wish to avoid field duplication. +type Entry struct { + Logger *Logger + + // Contains all the fields set by the user. + Data Fields + + // Time at which the log entry was created + Time time.Time + + // Level the log entry was logged at: Trace, Debug, Info, Warn, Error, Fatal or Panic + // This field will be set on entry firing and the value will be equal to the one in Logger struct field. + Level Level + + // Calling method, with package name + Caller *runtime.Frame + + // Message passed to Trace, Debug, Info, Warn, Error, Fatal or Panic + Message string + + // When formatter is called in entry.log(), a Buffer may be set to entry + Buffer *bytes.Buffer + + // Contains the context set by the user. Useful for hook processing etc. + Context context.Context + + // err may contain a field formatting error + err string +} + +func NewEntry(logger *Logger) *Entry { + return &Entry{ + Logger: logger, + // Default is three fields, plus one optional. Give a little extra room. + Data: make(Fields, 6), + } +} + +// Returns the string representation from the reader and ultimately the +// formatter. +func (entry *Entry) String() (string, error) { + serialized, err := entry.Logger.Formatter.Format(entry) + if err != nil { + return "", err + } + str := string(serialized) + return str, nil +} + +// Add an error as single field (using the key defined in ErrorKey) to the Entry. +func (entry *Entry) WithError(err error) *Entry { + return entry.WithField(ErrorKey, err) +} + +// Add a context to the Entry. +func (entry *Entry) WithContext(ctx context.Context) *Entry { + return &Entry{Logger: entry.Logger, Data: entry.Data, Time: entry.Time, err: entry.err, Context: ctx} +} + +// Add a single field to the Entry. +func (entry *Entry) WithField(key string, value interface{}) *Entry { + return entry.WithFields(Fields{key: value}) +} + +// Add a map of fields to the Entry. +func (entry *Entry) WithFields(fields Fields) *Entry { + data := make(Fields, len(entry.Data)+len(fields)) + for k, v := range entry.Data { + data[k] = v + } + fieldErr := entry.err + for k, v := range fields { + isErrField := false + if t := reflect.TypeOf(v); t != nil { + switch t.Kind() { + case reflect.Func: + isErrField = true + case reflect.Ptr: + isErrField = t.Elem().Kind() == reflect.Func + } + } + if isErrField { + tmp := fmt.Sprintf("can not add field %q", k) + if fieldErr != "" { + fieldErr = entry.err + ", " + tmp + } else { + fieldErr = tmp + } + } else { + data[k] = v + } + } + return &Entry{Logger: entry.Logger, Data: data, Time: entry.Time, err: fieldErr, Context: entry.Context} +} + +// Overrides the time of the Entry. +func (entry *Entry) WithTime(t time.Time) *Entry { + return &Entry{Logger: entry.Logger, Data: entry.Data, Time: t, err: entry.err, Context: entry.Context} +} + +// getPackageName reduces a fully qualified function name to the package name +// There really ought to be to be a better way... +func getPackageName(f string) string { + for { + lastPeriod := strings.LastIndex(f, ".") + lastSlash := strings.LastIndex(f, "/") + if lastPeriod > lastSlash { + f = f[:lastPeriod] + } else { + break + } + } + + return f +} + +// getCaller retrieves the name of the first non-logrus calling function +func getCaller() *runtime.Frame { + + // cache this package's fully-qualified name + callerInitOnce.Do(func() { + pcs := make([]uintptr, 2) + _ = runtime.Callers(0, pcs) + logrusPackage = getPackageName(runtime.FuncForPC(pcs[1]).Name()) + + // now that we have the cache, we can skip a minimum count of known-logrus functions + // XXX this is dubious, the number of frames may vary + minimumCallerDepth = knownLogrusFrames + }) + + // Restrict the lookback frames to avoid runaway lookups + pcs := make([]uintptr, maximumCallerDepth) + depth := runtime.Callers(minimumCallerDepth, pcs) + frames := runtime.CallersFrames(pcs[:depth]) + + for f, again := frames.Next(); again; f, again = frames.Next() { + pkg := getPackageName(f.Function) + + // If the caller isn't part of this package, we're done + if pkg != logrusPackage { + return &f + } + } + + // if we got here, we failed to find the caller's context + return nil +} + +func (entry Entry) HasCaller() (has bool) { + return entry.Logger != nil && + entry.Logger.ReportCaller && + entry.Caller != nil +} + +// This function is not declared with a pointer value because otherwise +// race conditions will occur when using multiple goroutines +func (entry Entry) log(level Level, msg string) { + var buffer *bytes.Buffer + + // Default to now, but allow users to override if they want. + // + // We don't have to worry about polluting future calls to Entry#log() + // with this assignment because this function is declared with a + // non-pointer receiver. + if entry.Time.IsZero() { + entry.Time = time.Now() + } + + entry.Level = level + entry.Message = msg + if entry.Logger.ReportCaller { + entry.Caller = getCaller() + } + + entry.fireHooks() + + buffer = bufferPool.Get().(*bytes.Buffer) + buffer.Reset() + defer bufferPool.Put(buffer) + entry.Buffer = buffer + + entry.write() + + entry.Buffer = nil + + // To avoid Entry#log() returning a value that only would make sense for + // panic() to use in Entry#Panic(), we avoid the allocation by checking + // directly here. + if level <= PanicLevel { + panic(&entry) + } +} + +func (entry *Entry) fireHooks() { + entry.Logger.mu.Lock() + defer entry.Logger.mu.Unlock() + err := entry.Logger.Hooks.Fire(entry.Level, entry) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to fire hook: %v\n", err) + } +} + +func (entry *Entry) write() { + entry.Logger.mu.Lock() + defer entry.Logger.mu.Unlock() + serialized, err := entry.Logger.Formatter.Format(entry) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to obtain reader, %v\n", err) + } else { + _, err = entry.Logger.Out.Write(serialized) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to write to log, %v\n", err) + } + } +} + +func (entry *Entry) Log(level Level, args ...interface{}) { + if entry.Logger.IsLevelEnabled(level) { + entry.log(level, fmt.Sprint(args...)) + } +} + +func (entry *Entry) Trace(args ...interface{}) { + entry.Log(TraceLevel, args...) +} + +func (entry *Entry) Debug(args ...interface{}) { + entry.Log(DebugLevel, args...) +} + +func (entry *Entry) Print(args ...interface{}) { + entry.Info(args...) +} + +func (entry *Entry) Info(args ...interface{}) { + entry.Log(InfoLevel, args...) +} + +func (entry *Entry) Warn(args ...interface{}) { + entry.Log(WarnLevel, args...) +} + +func (entry *Entry) Warning(args ...interface{}) { + entry.Warn(args...) +} + +func (entry *Entry) Error(args ...interface{}) { + entry.Log(ErrorLevel, args...) +} + +func (entry *Entry) Fatal(args ...interface{}) { + entry.Log(FatalLevel, args...) + entry.Logger.Exit(1) +} + +func (entry *Entry) Panic(args ...interface{}) { + entry.Log(PanicLevel, args...) + panic(fmt.Sprint(args...)) +} + +// Entry Printf family functions + +func (entry *Entry) Logf(level Level, format string, args ...interface{}) { + if entry.Logger.IsLevelEnabled(level) { + entry.Log(level, fmt.Sprintf(format, args...)) + } +} + +func (entry *Entry) Tracef(format string, args ...interface{}) { + entry.Logf(TraceLevel, format, args...) +} + +func (entry *Entry) Debugf(format string, args ...interface{}) { + entry.Logf(DebugLevel, format, args...) +} + +func (entry *Entry) Infof(format string, args ...interface{}) { + entry.Logf(InfoLevel, format, args...) +} + +func (entry *Entry) Printf(format string, args ...interface{}) { + entry.Infof(format, args...) +} + +func (entry *Entry) Warnf(format string, args ...interface{}) { + entry.Logf(WarnLevel, format, args...) +} + +func (entry *Entry) Warningf(format string, args ...interface{}) { + entry.Warnf(format, args...) +} + +func (entry *Entry) Errorf(format string, args ...interface{}) { + entry.Logf(ErrorLevel, format, args...) +} + +func (entry *Entry) Fatalf(format string, args ...interface{}) { + entry.Logf(FatalLevel, format, args...) + entry.Logger.Exit(1) +} + +func (entry *Entry) Panicf(format string, args ...interface{}) { + entry.Logf(PanicLevel, format, args...) +} + +// Entry Println family functions + +func (entry *Entry) Logln(level Level, args ...interface{}) { + if entry.Logger.IsLevelEnabled(level) { + entry.Log(level, entry.sprintlnn(args...)) + } +} + +func (entry *Entry) Traceln(args ...interface{}) { + entry.Logln(TraceLevel, args...) +} + +func (entry *Entry) Debugln(args ...interface{}) { + entry.Logln(DebugLevel, args...) +} + +func (entry *Entry) Infoln(args ...interface{}) { + entry.Logln(InfoLevel, args...) +} + +func (entry *Entry) Println(args ...interface{}) { + entry.Infoln(args...) +} + +func (entry *Entry) Warnln(args ...interface{}) { + entry.Logln(WarnLevel, args...) +} + +func (entry *Entry) Warningln(args ...interface{}) { + entry.Warnln(args...) +} + +func (entry *Entry) Errorln(args ...interface{}) { + entry.Logln(ErrorLevel, args...) +} + +func (entry *Entry) Fatalln(args ...interface{}) { + entry.Logln(FatalLevel, args...) + entry.Logger.Exit(1) +} + +func (entry *Entry) Panicln(args ...interface{}) { + entry.Logln(PanicLevel, args...) +} + +// Sprintlnn => Sprint no newline. This is to get the behavior of how +// fmt.Sprintln where spaces are always added between operands, regardless of +// their type. Instead of vendoring the Sprintln implementation to spare a +// string allocation, we do the simplest thing. +func (entry *Entry) sprintlnn(args ...interface{}) string { + msg := fmt.Sprintln(args...) + return msg[:len(msg)-1] +} diff --git a/vendor/github.com/sirupsen/logrus/exported.go b/vendor/github.com/sirupsen/logrus/exported.go new file mode 100644 index 0000000..62fc2f2 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/exported.go @@ -0,0 +1,225 @@ +package logrus + +import ( + "context" + "io" + "time" +) + +var ( + // std is the name of the standard logger in stdlib `log` + std = New() +) + +func StandardLogger() *Logger { + return std +} + +// SetOutput sets the standard logger output. +func SetOutput(out io.Writer) { + std.SetOutput(out) +} + +// SetFormatter sets the standard logger formatter. +func SetFormatter(formatter Formatter) { + std.SetFormatter(formatter) +} + +// SetReportCaller sets whether the standard logger will include the calling +// method as a field. +func SetReportCaller(include bool) { + std.SetReportCaller(include) +} + +// SetLevel sets the standard logger level. +func SetLevel(level Level) { + std.SetLevel(level) +} + +// GetLevel returns the standard logger level. +func GetLevel() Level { + return std.GetLevel() +} + +// IsLevelEnabled checks if the log level of the standard logger is greater than the level param +func IsLevelEnabled(level Level) bool { + return std.IsLevelEnabled(level) +} + +// AddHook adds a hook to the standard logger hooks. +func AddHook(hook Hook) { + std.AddHook(hook) +} + +// WithError creates an entry from the standard logger and adds an error to it, using the value defined in ErrorKey as key. +func WithError(err error) *Entry { + return std.WithField(ErrorKey, err) +} + +// WithContext creates an entry from the standard logger and adds a context to it. +func WithContext(ctx context.Context) *Entry { + return std.WithContext(ctx) +} + +// WithField creates an entry from the standard logger and adds a field to +// it. If you want multiple fields, use `WithFields`. +// +// Note that it doesn't log until you call Debug, Print, Info, Warn, Fatal +// or Panic on the Entry it returns. +func WithField(key string, value interface{}) *Entry { + return std.WithField(key, value) +} + +// WithFields creates an entry from the standard logger and adds multiple +// fields to it. This is simply a helper for `WithField`, invoking it +// once for each field. +// +// Note that it doesn't log until you call Debug, Print, Info, Warn, Fatal +// or Panic on the Entry it returns. +func WithFields(fields Fields) *Entry { + return std.WithFields(fields) +} + +// WithTime creats an entry from the standard logger and overrides the time of +// logs generated with it. +// +// Note that it doesn't log until you call Debug, Print, Info, Warn, Fatal +// or Panic on the Entry it returns. +func WithTime(t time.Time) *Entry { + return std.WithTime(t) +} + +// Trace logs a message at level Trace on the standard logger. +func Trace(args ...interface{}) { + std.Trace(args...) +} + +// Debug logs a message at level Debug on the standard logger. +func Debug(args ...interface{}) { + std.Debug(args...) +} + +// Print logs a message at level Info on the standard logger. +func Print(args ...interface{}) { + std.Print(args...) +} + +// Info logs a message at level Info on the standard logger. +func Info(args ...interface{}) { + std.Info(args...) +} + +// Warn logs a message at level Warn on the standard logger. +func Warn(args ...interface{}) { + std.Warn(args...) +} + +// Warning logs a message at level Warn on the standard logger. +func Warning(args ...interface{}) { + std.Warning(args...) +} + +// Error logs a message at level Error on the standard logger. +func Error(args ...interface{}) { + std.Error(args...) +} + +// Panic logs a message at level Panic on the standard logger. +func Panic(args ...interface{}) { + std.Panic(args...) +} + +// Fatal logs a message at level Fatal on the standard logger then the process will exit with status set to 1. +func Fatal(args ...interface{}) { + std.Fatal(args...) +} + +// Tracef logs a message at level Trace on the standard logger. +func Tracef(format string, args ...interface{}) { + std.Tracef(format, args...) +} + +// Debugf logs a message at level Debug on the standard logger. +func Debugf(format string, args ...interface{}) { + std.Debugf(format, args...) +} + +// Printf logs a message at level Info on the standard logger. +func Printf(format string, args ...interface{}) { + std.Printf(format, args...) +} + +// Infof logs a message at level Info on the standard logger. +func Infof(format string, args ...interface{}) { + std.Infof(format, args...) +} + +// Warnf logs a message at level Warn on the standard logger. +func Warnf(format string, args ...interface{}) { + std.Warnf(format, args...) +} + +// Warningf logs a message at level Warn on the standard logger. +func Warningf(format string, args ...interface{}) { + std.Warningf(format, args...) +} + +// Errorf logs a message at level Error on the standard logger. +func Errorf(format string, args ...interface{}) { + std.Errorf(format, args...) +} + +// Panicf logs a message at level Panic on the standard logger. +func Panicf(format string, args ...interface{}) { + std.Panicf(format, args...) +} + +// Fatalf logs a message at level Fatal on the standard logger then the process will exit with status set to 1. +func Fatalf(format string, args ...interface{}) { + std.Fatalf(format, args...) +} + +// Traceln logs a message at level Trace on the standard logger. +func Traceln(args ...interface{}) { + std.Traceln(args...) +} + +// Debugln logs a message at level Debug on the standard logger. +func Debugln(args ...interface{}) { + std.Debugln(args...) +} + +// Println logs a message at level Info on the standard logger. +func Println(args ...interface{}) { + std.Println(args...) +} + +// Infoln logs a message at level Info on the standard logger. +func Infoln(args ...interface{}) { + std.Infoln(args...) +} + +// Warnln logs a message at level Warn on the standard logger. +func Warnln(args ...interface{}) { + std.Warnln(args...) +} + +// Warningln logs a message at level Warn on the standard logger. +func Warningln(args ...interface{}) { + std.Warningln(args...) +} + +// Errorln logs a message at level Error on the standard logger. +func Errorln(args ...interface{}) { + std.Errorln(args...) +} + +// Panicln logs a message at level Panic on the standard logger. +func Panicln(args ...interface{}) { + std.Panicln(args...) +} + +// Fatalln logs a message at level Fatal on the standard logger then the process will exit with status set to 1. +func Fatalln(args ...interface{}) { + std.Fatalln(args...) +} diff --git a/vendor/github.com/sirupsen/logrus/formatter.go b/vendor/github.com/sirupsen/logrus/formatter.go new file mode 100644 index 0000000..4088837 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/formatter.go @@ -0,0 +1,78 @@ +package logrus + +import "time" + +// Default key names for the default fields +const ( + defaultTimestampFormat = time.RFC3339 + FieldKeyMsg = "msg" + FieldKeyLevel = "level" + FieldKeyTime = "time" + FieldKeyLogrusError = "logrus_error" + FieldKeyFunc = "func" + FieldKeyFile = "file" +) + +// The Formatter interface is used to implement a custom Formatter. It takes an +// `Entry`. It exposes all the fields, including the default ones: +// +// * `entry.Data["msg"]`. The message passed from Info, Warn, Error .. +// * `entry.Data["time"]`. The timestamp. +// * `entry.Data["level"]. The level the entry was logged at. +// +// Any additional fields added with `WithField` or `WithFields` are also in +// `entry.Data`. Format is expected to return an array of bytes which are then +// logged to `logger.Out`. +type Formatter interface { + Format(*Entry) ([]byte, error) +} + +// This is to not silently overwrite `time`, `msg`, `func` and `level` fields when +// dumping it. If this code wasn't there doing: +// +// logrus.WithField("level", 1).Info("hello") +// +// Would just silently drop the user provided level. Instead with this code +// it'll logged as: +// +// {"level": "info", "fields.level": 1, "msg": "hello", "time": "..."} +// +// It's not exported because it's still using Data in an opinionated way. It's to +// avoid code duplication between the two default formatters. +func prefixFieldClashes(data Fields, fieldMap FieldMap, reportCaller bool) { + timeKey := fieldMap.resolve(FieldKeyTime) + if t, ok := data[timeKey]; ok { + data["fields."+timeKey] = t + delete(data, timeKey) + } + + msgKey := fieldMap.resolve(FieldKeyMsg) + if m, ok := data[msgKey]; ok { + data["fields."+msgKey] = m + delete(data, msgKey) + } + + levelKey := fieldMap.resolve(FieldKeyLevel) + if l, ok := data[levelKey]; ok { + data["fields."+levelKey] = l + delete(data, levelKey) + } + + logrusErrKey := fieldMap.resolve(FieldKeyLogrusError) + if l, ok := data[logrusErrKey]; ok { + data["fields."+logrusErrKey] = l + delete(data, logrusErrKey) + } + + // If reportCaller is not set, 'func' will not conflict. + if reportCaller { + funcKey := fieldMap.resolve(FieldKeyFunc) + if l, ok := data[funcKey]; ok { + data["fields."+funcKey] = l + } + fileKey := fieldMap.resolve(FieldKeyFile) + if l, ok := data[fileKey]; ok { + data["fields."+fileKey] = l + } + } +} diff --git a/vendor/github.com/sirupsen/logrus/go.mod b/vendor/github.com/sirupsen/logrus/go.mod new file mode 100644 index 0000000..8261a2b --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/go.mod @@ -0,0 +1,10 @@ +module github.com/sirupsen/logrus + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/konsorten/go-windows-terminal-sequences v1.0.1 + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/stretchr/objx v0.1.1 // indirect + github.com/stretchr/testify v1.2.2 + golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33 +) diff --git a/vendor/github.com/sirupsen/logrus/hooks.go b/vendor/github.com/sirupsen/logrus/hooks.go new file mode 100644 index 0000000..3f151cd --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/hooks.go @@ -0,0 +1,34 @@ +package logrus + +// A hook to be fired when logging on the logging levels returned from +// `Levels()` on your implementation of the interface. Note that this is not +// fired in a goroutine or a channel with workers, you should handle such +// functionality yourself if your call is non-blocking and you don't wish for +// the logging calls for levels returned from `Levels()` to block. +type Hook interface { + Levels() []Level + Fire(*Entry) error +} + +// Internal type for storing the hooks on a logger instance. +type LevelHooks map[Level][]Hook + +// Add a hook to an instance of logger. This is called with +// `log.Hooks.Add(new(MyHook))` where `MyHook` implements the `Hook` interface. +func (hooks LevelHooks) Add(hook Hook) { + for _, level := range hook.Levels() { + hooks[level] = append(hooks[level], hook) + } +} + +// Fire all the hooks for the passed level. Used by `entry.log` to fire +// appropriate hooks for a log entry. +func (hooks LevelHooks) Fire(level Level, entry *Entry) error { + for _, hook := range hooks[level] { + if err := hook.Fire(entry); err != nil { + return err + } + } + + return nil +} diff --git a/vendor/github.com/sirupsen/logrus/json_formatter.go b/vendor/github.com/sirupsen/logrus/json_formatter.go new file mode 100644 index 0000000..098a21a --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/json_formatter.go @@ -0,0 +1,121 @@ +package logrus + +import ( + "bytes" + "encoding/json" + "fmt" + "runtime" +) + +type fieldKey string + +// FieldMap allows customization of the key names for default fields. +type FieldMap map[fieldKey]string + +func (f FieldMap) resolve(key fieldKey) string { + if k, ok := f[key]; ok { + return k + } + + return string(key) +} + +// JSONFormatter formats logs into parsable json +type JSONFormatter struct { + // TimestampFormat sets the format used for marshaling timestamps. + TimestampFormat string + + // DisableTimestamp allows disabling automatic timestamps in output + DisableTimestamp bool + + // DataKey allows users to put all the log entry parameters into a nested dictionary at a given key. + DataKey string + + // FieldMap allows users to customize the names of keys for default fields. + // As an example: + // formatter := &JSONFormatter{ + // FieldMap: FieldMap{ + // FieldKeyTime: "@timestamp", + // FieldKeyLevel: "@level", + // FieldKeyMsg: "@message", + // FieldKeyFunc: "@caller", + // }, + // } + FieldMap FieldMap + + // CallerPrettyfier can be set by the user to modify the content + // of the function and file keys in the json data when ReportCaller is + // activated. If any of the returned value is the empty string the + // corresponding key will be removed from json fields. + CallerPrettyfier func(*runtime.Frame) (function string, file string) + + // PrettyPrint will indent all json logs + PrettyPrint bool +} + +// Format renders a single log entry +func (f *JSONFormatter) Format(entry *Entry) ([]byte, error) { + data := make(Fields, len(entry.Data)+4) + for k, v := range entry.Data { + switch v := v.(type) { + case error: + // Otherwise errors are ignored by `encoding/json` + // https://github.com/sirupsen/logrus/issues/137 + data[k] = v.Error() + default: + data[k] = v + } + } + + if f.DataKey != "" { + newData := make(Fields, 4) + newData[f.DataKey] = data + data = newData + } + + prefixFieldClashes(data, f.FieldMap, entry.HasCaller()) + + timestampFormat := f.TimestampFormat + if timestampFormat == "" { + timestampFormat = defaultTimestampFormat + } + + if entry.err != "" { + data[f.FieldMap.resolve(FieldKeyLogrusError)] = entry.err + } + if !f.DisableTimestamp { + data[f.FieldMap.resolve(FieldKeyTime)] = entry.Time.Format(timestampFormat) + } + data[f.FieldMap.resolve(FieldKeyMsg)] = entry.Message + data[f.FieldMap.resolve(FieldKeyLevel)] = entry.Level.String() + if entry.HasCaller() { + funcVal := entry.Caller.Function + fileVal := fmt.Sprintf("%s:%d", entry.Caller.File, entry.Caller.Line) + if f.CallerPrettyfier != nil { + funcVal, fileVal = f.CallerPrettyfier(entry.Caller) + } + if funcVal != "" { + data[f.FieldMap.resolve(FieldKeyFunc)] = funcVal + } + if fileVal != "" { + data[f.FieldMap.resolve(FieldKeyFile)] = fileVal + } + } + + var b *bytes.Buffer + if entry.Buffer != nil { + b = entry.Buffer + } else { + b = &bytes.Buffer{} + } + + encoder := json.NewEncoder(b) + if f.PrettyPrint { + encoder.SetIndent("", " ") + } + if err := encoder.Encode(data); err != nil { + return nil, fmt.Errorf("failed to marshal fields to JSON, %v", err) + } + + return b.Bytes(), nil +} diff --git a/vendor/github.com/sirupsen/logrus/logger.go b/vendor/github.com/sirupsen/logrus/logger.go new file mode 100644 index 0000000..c0c0b1e --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/logger.go @@ -0,0 +1,351 @@ +package logrus + +import ( + "context" + "io" + "os" + "sync" + "sync/atomic" + "time" +) + +type Logger struct { + // The logs are `io.Copy`'d to this in a mutex. It's common to set this to a + // file, or leave it default which is `os.Stderr`. You can also set this to + // something more adventurous, such as logging to Kafka. + Out io.Writer + // Hooks for the logger instance. These allow firing events based on logging + // levels and log entries. For example, to send errors to an error tracking + // service, log to StatsD or dump the core on fatal errors. + Hooks LevelHooks + // All log entries pass through the formatter before logged to Out. The + // included formatters are `TextFormatter` and `JSONFormatter` for which + // TextFormatter is the default. In development (when a TTY is attached) it + // logs with colors, but to a file it wouldn't. You can easily implement your + // own that implements the `Formatter` interface, see the `README` or included + // formatters for examples. + Formatter Formatter + + // Flag for whether to log caller info (off by default) + ReportCaller bool + + // The logging level the logger should log at. This is typically (and defaults + // to) `logrus.Info`, which allows Info(), Warn(), Error() and Fatal() to be + // logged. + Level Level + // Used to sync writing to the log. Locking is enabled by Default + mu MutexWrap + // Reusable empty entry + entryPool sync.Pool + // Function to exit the application, defaults to `os.Exit()` + ExitFunc exitFunc +} + +type exitFunc func(int) + +type MutexWrap struct { + lock sync.Mutex + disabled bool +} + +func (mw *MutexWrap) Lock() { + if !mw.disabled { + mw.lock.Lock() + } +} + +func (mw *MutexWrap) Unlock() { + if !mw.disabled { + mw.lock.Unlock() + } +} + +func (mw *MutexWrap) Disable() { + mw.disabled = true +} + +// Creates a new logger. Configuration should be set by changing `Formatter`, +// `Out` and `Hooks` directly on the default logger instance. You can also just +// instantiate your own: +// +// var log = &Logger{ +// Out: os.Stderr, +// Formatter: new(JSONFormatter), +// Hooks: make(LevelHooks), +// Level: logrus.DebugLevel, +// } +// +// It's recommended to make this a global instance called `log`. +func New() *Logger { + return &Logger{ + Out: os.Stderr, + Formatter: new(TextFormatter), + Hooks: make(LevelHooks), + Level: InfoLevel, + ExitFunc: os.Exit, + ReportCaller: false, + } +} + +func (logger *Logger) newEntry() *Entry { + entry, ok := logger.entryPool.Get().(*Entry) + if ok { + return entry + } + return NewEntry(logger) +} + +func (logger *Logger) releaseEntry(entry *Entry) { + entry.Data = map[string]interface{}{} + logger.entryPool.Put(entry) +} + +// Adds a field to the log entry, note that it doesn't log until you call +// Debug, Print, Info, Warn, Error, Fatal or Panic. It only creates a log entry. +// If you want multiple fields, use `WithFields`. +func (logger *Logger) WithField(key string, value interface{}) *Entry { + entry := logger.newEntry() + defer logger.releaseEntry(entry) + return entry.WithField(key, value) +} + +// Adds a struct of fields to the log entry. All it does is call `WithField` for +// each `Field`. +func (logger *Logger) WithFields(fields Fields) *Entry { + entry := logger.newEntry() + defer logger.releaseEntry(entry) + return entry.WithFields(fields) +} + +// Add an error as single field to the log entry. All it does is call +// `WithError` for the given `error`. +func (logger *Logger) WithError(err error) *Entry { + entry := logger.newEntry() + defer logger.releaseEntry(entry) + return entry.WithError(err) +} + +// Add a context to the log entry. +func (logger *Logger) WithContext(ctx context.Context) *Entry { + entry := logger.newEntry() + defer logger.releaseEntry(entry) + return entry.WithContext(ctx) +} + +// Overrides the time of the log entry. +func (logger *Logger) WithTime(t time.Time) *Entry { + entry := logger.newEntry() + defer logger.releaseEntry(entry) + return entry.WithTime(t) +} + +func (logger *Logger) Logf(level Level, format string, args ...interface{}) { + if logger.IsLevelEnabled(level) { + entry := logger.newEntry() + entry.Logf(level, format, args...) + logger.releaseEntry(entry) + } +} + +func (logger *Logger) Tracef(format string, args ...interface{}) { + logger.Logf(TraceLevel, format, args...) +} + +func (logger *Logger) Debugf(format string, args ...interface{}) { + logger.Logf(DebugLevel, format, args...) +} + +func (logger *Logger) Infof(format string, args ...interface{}) { + logger.Logf(InfoLevel, format, args...) +} + +func (logger *Logger) Printf(format string, args ...interface{}) { + entry := logger.newEntry() + entry.Printf(format, args...) + logger.releaseEntry(entry) +} + +func (logger *Logger) Warnf(format string, args ...interface{}) { + logger.Logf(WarnLevel, format, args...) +} + +func (logger *Logger) Warningf(format string, args ...interface{}) { + logger.Warnf(format, args...) +} + +func (logger *Logger) Errorf(format string, args ...interface{}) { + logger.Logf(ErrorLevel, format, args...) +} + +func (logger *Logger) Fatalf(format string, args ...interface{}) { + logger.Logf(FatalLevel, format, args...) + logger.Exit(1) +} + +func (logger *Logger) Panicf(format string, args ...interface{}) { + logger.Logf(PanicLevel, format, args...) +} + +func (logger *Logger) Log(level Level, args ...interface{}) { + if logger.IsLevelEnabled(level) { + entry := logger.newEntry() + entry.Log(level, args...) + logger.releaseEntry(entry) + } +} + +func (logger *Logger) Trace(args ...interface{}) { + logger.Log(TraceLevel, args...) +} + +func (logger *Logger) Debug(args ...interface{}) { + logger.Log(DebugLevel, args...) +} + +func (logger *Logger) Info(args ...interface{}) { + logger.Log(InfoLevel, args...) +} + +func (logger *Logger) Print(args ...interface{}) { + entry := logger.newEntry() + entry.Print(args...) + logger.releaseEntry(entry) +} + +func (logger *Logger) Warn(args ...interface{}) { + logger.Log(WarnLevel, args...) +} + +func (logger *Logger) Warning(args ...interface{}) { + logger.Warn(args...) +} + +func (logger *Logger) Error(args ...interface{}) { + logger.Log(ErrorLevel, args...) +} + +func (logger *Logger) Fatal(args ...interface{}) { + logger.Log(FatalLevel, args...) + logger.Exit(1) +} + +func (logger *Logger) Panic(args ...interface{}) { + logger.Log(PanicLevel, args...) +} + +func (logger *Logger) Logln(level Level, args ...interface{}) { + if logger.IsLevelEnabled(level) { + entry := logger.newEntry() + entry.Logln(level, args...) + logger.releaseEntry(entry) + } +} + +func (logger *Logger) Traceln(args ...interface{}) { + logger.Logln(TraceLevel, args...) +} + +func (logger *Logger) Debugln(args ...interface{}) { + logger.Logln(DebugLevel, args...) +} + +func (logger *Logger) Infoln(args ...interface{}) { + logger.Logln(InfoLevel, args...) +} + +func (logger *Logger) Println(args ...interface{}) { + entry := logger.newEntry() + entry.Println(args...) + logger.releaseEntry(entry) +} + +func (logger *Logger) Warnln(args ...interface{}) { + logger.Logln(WarnLevel, args...) +} + +func (logger *Logger) Warningln(args ...interface{}) { + logger.Warnln(args...) +} + +func (logger *Logger) Errorln(args ...interface{}) { + logger.Logln(ErrorLevel, args...) +} + +func (logger *Logger) Fatalln(args ...interface{}) { + logger.Logln(FatalLevel, args...) + logger.Exit(1) +} + +func (logger *Logger) Panicln(args ...interface{}) { + logger.Logln(PanicLevel, args...) +} + +func (logger *Logger) Exit(code int) { + runHandlers() + if logger.ExitFunc == nil { + logger.ExitFunc = os.Exit + } + logger.ExitFunc(code) +} + +//When file is opened with appending mode, it's safe to +//write concurrently to a file (within 4k message on Linux). +//In these cases user can choose to disable the lock. +func (logger *Logger) SetNoLock() { + logger.mu.Disable() +} + +func (logger *Logger) level() Level { + return Level(atomic.LoadUint32((*uint32)(&logger.Level))) +} + +// SetLevel sets the logger level. +func (logger *Logger) SetLevel(level Level) { + atomic.StoreUint32((*uint32)(&logger.Level), uint32(level)) +} + +// GetLevel returns the logger level. +func (logger *Logger) GetLevel() Level { + return logger.level() +} + +// AddHook adds a hook to the logger hooks. +func (logger *Logger) AddHook(hook Hook) { + logger.mu.Lock() + defer logger.mu.Unlock() + logger.Hooks.Add(hook) +} + +// IsLevelEnabled checks if the log level of the logger is greater than the level param +func (logger *Logger) IsLevelEnabled(level Level) bool { + return logger.level() >= level +} + +// SetFormatter sets the logger formatter. +func (logger *Logger) SetFormatter(formatter Formatter) { + logger.mu.Lock() + defer logger.mu.Unlock() + logger.Formatter = formatter +} + +// SetOutput sets the logger output. +func (logger *Logger) SetOutput(output io.Writer) { + logger.mu.Lock() + defer logger.mu.Unlock() + logger.Out = output +} + +func (logger *Logger) SetReportCaller(reportCaller bool) { + logger.mu.Lock() + defer logger.mu.Unlock() + logger.ReportCaller = reportCaller +} + +// ReplaceHooks replaces the logger hooks and returns the old ones +func (logger *Logger) ReplaceHooks(hooks LevelHooks) LevelHooks { + logger.mu.Lock() + oldHooks := logger.Hooks + logger.Hooks = hooks + logger.mu.Unlock() + return oldHooks +} diff --git a/vendor/github.com/sirupsen/logrus/logrus.go b/vendor/github.com/sirupsen/logrus/logrus.go new file mode 100644 index 0000000..8644761 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/logrus.go @@ -0,0 +1,186 @@ +package logrus + +import ( + "fmt" + "log" + "strings" +) + +// Fields type, used to pass to `WithFields`. +type Fields map[string]interface{} + +// Level type +type Level uint32 + +// Convert the Level to a string. E.g. PanicLevel becomes "panic". +func (level Level) String() string { + if b, err := level.MarshalText(); err == nil { + return string(b) + } else { + return "unknown" + } +} + +// ParseLevel takes a string level and returns the Logrus log level constant. +func ParseLevel(lvl string) (Level, error) { + switch strings.ToLower(lvl) { + case "panic": + return PanicLevel, nil + case "fatal": + return FatalLevel, nil + case "error": + return ErrorLevel, nil + case "warn", "warning": + return WarnLevel, nil + case "info": + return InfoLevel, nil + case "debug": + return DebugLevel, nil + case "trace": + return TraceLevel, nil + } + + var l Level + return l, fmt.Errorf("not a valid logrus Level: %q", lvl) +} + +// UnmarshalText implements encoding.TextUnmarshaler. +func (level *Level) UnmarshalText(text []byte) error { + l, err := ParseLevel(string(text)) + if err != nil { + return err + } + + *level = Level(l) + + return nil +} + +func (level Level) MarshalText() ([]byte, error) { + switch level { + case TraceLevel: + return []byte("trace"), nil + case DebugLevel: + return []byte("debug"), nil + case InfoLevel: + return []byte("info"), nil + case WarnLevel: + return []byte("warning"), nil + case ErrorLevel: + return []byte("error"), nil + case FatalLevel: + return []byte("fatal"), nil + case PanicLevel: + return []byte("panic"), nil + } + + return nil, fmt.Errorf("not a valid logrus level %d", level) +} + +// A constant exposing all logging levels +var AllLevels = []Level{ + PanicLevel, + FatalLevel, + ErrorLevel, + WarnLevel, + InfoLevel, + DebugLevel, + TraceLevel, +} + +// These are the different logging levels. You can set the logging level to log +// on your instance of logger, obtained with `logrus.New()`. +const ( + // PanicLevel level, highest level of severity. Logs and then calls panic with the + // message passed to Debug, Info, ... + PanicLevel Level = iota + // FatalLevel level. Logs and then calls `logger.Exit(1)`. It will exit even if the + // logging level is set to Panic. + FatalLevel + // ErrorLevel level. Logs. Used for errors that should definitely be noted. + // Commonly used for hooks to send errors to an error tracking service. + ErrorLevel + // WarnLevel level. Non-critical entries that deserve eyes. + WarnLevel + // InfoLevel level. General operational entries about what's going on inside the + // application. + InfoLevel + // DebugLevel level. Usually only enabled when debugging. Very verbose logging. + DebugLevel + // TraceLevel level. Designates finer-grained informational events than the Debug. + TraceLevel +) + +// Won't compile if StdLogger can't be realized by a log.Logger +var ( + _ StdLogger = &log.Logger{} + _ StdLogger = &Entry{} + _ StdLogger = &Logger{} +) + +// StdLogger is what your logrus-enabled library should take, that way +// it'll accept a stdlib logger and a logrus logger. There's no standard +// interface, this is the closest we get, unfortunately. +type StdLogger interface { + Print(...interface{}) + Printf(string, ...interface{}) + Println(...interface{}) + + Fatal(...interface{}) + Fatalf(string, ...interface{}) + Fatalln(...interface{}) + + Panic(...interface{}) + Panicf(string, ...interface{}) + Panicln(...interface{}) +} + +// The FieldLogger interface generalizes the Entry and Logger types +type FieldLogger interface { + WithField(key string, value interface{}) *Entry + WithFields(fields Fields) *Entry + WithError(err error) *Entry + + Debugf(format string, args ...interface{}) + Infof(format string, args ...interface{}) + Printf(format string, args ...interface{}) + Warnf(format string, args ...interface{}) + Warningf(format string, args ...interface{}) + Errorf(format string, args ...interface{}) + Fatalf(format string, args ...interface{}) + Panicf(format string, args ...interface{}) + + Debug(args ...interface{}) + Info(args ...interface{}) + Print(args ...interface{}) + Warn(args ...interface{}) + Warning(args ...interface{}) + Error(args ...interface{}) + Fatal(args ...interface{}) + Panic(args ...interface{}) + + Debugln(args ...interface{}) + Infoln(args ...interface{}) + Println(args ...interface{}) + Warnln(args ...interface{}) + Warningln(args ...interface{}) + Errorln(args ...interface{}) + Fatalln(args ...interface{}) + Panicln(args ...interface{}) + + // IsDebugEnabled() bool + // IsInfoEnabled() bool + // IsWarnEnabled() bool + // IsErrorEnabled() bool + // IsFatalEnabled() bool + // IsPanicEnabled() bool +} + +// Ext1FieldLogger (the first extension to FieldLogger) is superfluous, it is +// here for consistancy. Do not use. Use Logger or Entry instead. +type Ext1FieldLogger interface { + FieldLogger + Tracef(format string, args ...interface{}) + Trace(args ...interface{}) + Traceln(args ...interface{}) +} diff --git a/vendor/github.com/sirupsen/logrus/terminal_check_appengine.go b/vendor/github.com/sirupsen/logrus/terminal_check_appengine.go new file mode 100644 index 0000000..2403de9 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/terminal_check_appengine.go @@ -0,0 +1,11 @@ +// +build appengine + +package logrus + +import ( + "io" +) + +func checkIfTerminal(w io.Writer) bool { + return true +} diff --git a/vendor/github.com/sirupsen/logrus/terminal_check_bsd.go b/vendor/github.com/sirupsen/logrus/terminal_check_bsd.go new file mode 100644 index 0000000..3c4f43f --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/terminal_check_bsd.go @@ -0,0 +1,13 @@ +// +build darwin dragonfly freebsd netbsd openbsd + +package logrus + +import "golang.org/x/sys/unix" + +const ioctlReadTermios = unix.TIOCGETA + +func isTerminal(fd int) bool { + _, err := unix.IoctlGetTermios(fd, ioctlReadTermios) + return err == nil +} + diff --git a/vendor/github.com/sirupsen/logrus/terminal_check_js.go b/vendor/github.com/sirupsen/logrus/terminal_check_js.go new file mode 100644 index 0000000..0c20975 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/terminal_check_js.go @@ -0,0 +1,11 @@ +// +build js + +package logrus + +import ( + "io" +) + +func checkIfTerminal(w io.Writer) bool { + return false +} diff --git a/vendor/github.com/sirupsen/logrus/terminal_check_notappengine.go b/vendor/github.com/sirupsen/logrus/terminal_check_notappengine.go new file mode 100644 index 0000000..7be2d87 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/terminal_check_notappengine.go @@ -0,0 +1,17 @@ +// +build !appengine,!js,!windows + +package logrus + +import ( + "io" + "os" +) + +func checkIfTerminal(w io.Writer) bool { + switch v := w.(type) { + case *os.File: + return isTerminal(int(v.Fd())) + default: + return false + } +} diff --git a/vendor/github.com/sirupsen/logrus/terminal_check_unix.go b/vendor/github.com/sirupsen/logrus/terminal_check_unix.go new file mode 100644 index 0000000..355dc96 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/terminal_check_unix.go @@ -0,0 +1,13 @@ +// +build linux aix + +package logrus + +import "golang.org/x/sys/unix" + +const ioctlReadTermios = unix.TCGETS + +func isTerminal(fd int) bool { + _, err := unix.IoctlGetTermios(fd, ioctlReadTermios) + return err == nil +} + diff --git a/vendor/github.com/sirupsen/logrus/terminal_check_windows.go b/vendor/github.com/sirupsen/logrus/terminal_check_windows.go new file mode 100644 index 0000000..3b9d286 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/terminal_check_windows.go @@ -0,0 +1,20 @@ +// +build !appengine,!js,windows + +package logrus + +import ( + "io" + "os" + "syscall" +) + +func checkIfTerminal(w io.Writer) bool { + switch v := w.(type) { + case *os.File: + var mode uint32 + err := syscall.GetConsoleMode(syscall.Handle(v.Fd()), &mode) + return err == nil + default: + return false + } +} diff --git a/vendor/github.com/sirupsen/logrus/terminal_notwindows.go b/vendor/github.com/sirupsen/logrus/terminal_notwindows.go new file mode 100644 index 0000000..3dbd237 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/terminal_notwindows.go @@ -0,0 +1,8 @@ +// +build !windows + +package logrus + +import "io" + +func initTerminal(w io.Writer) { +} diff --git a/vendor/github.com/sirupsen/logrus/terminal_windows.go b/vendor/github.com/sirupsen/logrus/terminal_windows.go new file mode 100644 index 0000000..b4ef528 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/terminal_windows.go @@ -0,0 +1,18 @@ +// +build !appengine,!js,windows + +package logrus + +import ( + "io" + "os" + "syscall" + + sequences "github.com/konsorten/go-windows-terminal-sequences" +) + +func initTerminal(w io.Writer) { + switch v := w.(type) { + case *os.File: + sequences.EnableVirtualTerminalProcessing(syscall.Handle(v.Fd()), true) + } +} diff --git a/vendor/github.com/sirupsen/logrus/text_formatter.go b/vendor/github.com/sirupsen/logrus/text_formatter.go new file mode 100644 index 0000000..1569161 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/text_formatter.go @@ -0,0 +1,299 @@ +package logrus + +import ( + "bytes" + "fmt" + "os" + "runtime" + "sort" + "strings" + "sync" + "time" +) + +const ( + red = 31 + yellow = 33 + blue = 36 + gray = 37 +) + +var baseTimestamp time.Time + +func init() { + baseTimestamp = time.Now() +} + +// TextFormatter formats logs into text +type TextFormatter struct { + // Set to true to bypass checking for a TTY before outputting colors. + ForceColors bool + + // Force disabling colors. + DisableColors bool + + // Override coloring based on CLICOLOR and CLICOLOR_FORCE. - https://bixense.com/clicolors/ + EnvironmentOverrideColors bool + + // Disable timestamp logging. useful when output is redirected to logging + // system that already adds timestamps. + DisableTimestamp bool + + // Enable logging the full timestamp when a TTY is attached instead of just + // the time passed since beginning of execution. + FullTimestamp bool + + // TimestampFormat to use for display when a full timestamp is printed + TimestampFormat string + + // The fields are sorted by default for a consistent output. For applications + // that log extremely frequently and don't use the JSON formatter this may not + // be desired. + DisableSorting bool + + // The keys sorting function, when uninitialized it uses sort.Strings. + SortingFunc func([]string) + + // Disables the truncation of the level text to 4 characters. + DisableLevelTruncation bool + + // QuoteEmptyFields will wrap empty fields in quotes if true + QuoteEmptyFields bool + + // Whether the logger's out is to a terminal + isTerminal bool + + // FieldMap allows users to customize the names of keys for default fields. + // As an example: + // formatter := &TextFormatter{ + // FieldMap: FieldMap{ + // FieldKeyTime: "@timestamp", + // FieldKeyLevel: "@level", + // FieldKeyMsg: "@message"}} + FieldMap FieldMap + + // CallerPrettyfier can be set by the user to modify the content + // of the function and file keys in the data when ReportCaller is + // activated. If any of the returned value is the empty string the + // corresponding key will be removed from fields. + CallerPrettyfier func(*runtime.Frame) (function string, file string) + + terminalInitOnce sync.Once +} + +func (f *TextFormatter) init(entry *Entry) { + if entry.Logger != nil { + f.isTerminal = checkIfTerminal(entry.Logger.Out) + + if f.isTerminal { + initTerminal(entry.Logger.Out) + } + } +} + +func (f *TextFormatter) isColored() bool { + isColored := f.ForceColors || (f.isTerminal && (runtime.GOOS != "windows")) + + if f.EnvironmentOverrideColors { + if force, ok := os.LookupEnv("CLICOLOR_FORCE"); ok && force != "0" { + isColored = true + } else if ok && force == "0" { + isColored = false + } else if os.Getenv("CLICOLOR") == "0" { + isColored = false + } + } + + return isColored && !f.DisableColors +} + +// Format renders a single log entry +func (f *TextFormatter) Format(entry *Entry) ([]byte, error) { + data := make(Fields) + for k, v := range entry.Data { + data[k] = v + } + prefixFieldClashes(data, f.FieldMap, entry.HasCaller()) + keys := make([]string, 0, len(data)) + for k := range data { + keys = append(keys, k) + } + + var funcVal, fileVal string + + fixedKeys := make([]string, 0, 4+len(data)) + if !f.DisableTimestamp { + fixedKeys = append(fixedKeys, f.FieldMap.resolve(FieldKeyTime)) + } + fixedKeys = append(fixedKeys, f.FieldMap.resolve(FieldKeyLevel)) + if entry.Message != "" { + fixedKeys = append(fixedKeys, f.FieldMap.resolve(FieldKeyMsg)) + } + if entry.err != "" { + fixedKeys = append(fixedKeys, f.FieldMap.resolve(FieldKeyLogrusError)) + } + if entry.HasCaller() { + if f.CallerPrettyfier != nil { + funcVal, fileVal = f.CallerPrettyfier(entry.Caller) + } else { + funcVal = entry.Caller.Function + fileVal = fmt.Sprintf("%s:%d", entry.Caller.File, entry.Caller.Line) + } + + if funcVal != "" { + fixedKeys = append(fixedKeys, f.FieldMap.resolve(FieldKeyFunc)) + } + if fileVal != "" { + fixedKeys = append(fixedKeys, f.FieldMap.resolve(FieldKeyFile)) + } + } + + if !f.DisableSorting { + if f.SortingFunc == nil { + sort.Strings(keys) + fixedKeys = append(fixedKeys, keys...) + } else { + if !f.isColored() { + fixedKeys = append(fixedKeys, keys...) + f.SortingFunc(fixedKeys) + } else { + f.SortingFunc(keys) + } + } + } else { + fixedKeys = append(fixedKeys, keys...) + } + + var b *bytes.Buffer + if entry.Buffer != nil { + b = entry.Buffer + } else { + b = &bytes.Buffer{} + } + + f.terminalInitOnce.Do(func() { f.init(entry) }) + + timestampFormat := f.TimestampFormat + if timestampFormat == "" { + timestampFormat = defaultTimestampFormat + } + if f.isColored() { + f.printColored(b, entry, keys, data, timestampFormat) + } else { + + for _, key := range fixedKeys { + var value interface{} + switch { + case key == f.FieldMap.resolve(FieldKeyTime): + value = entry.Time.Format(timestampFormat) + case key == f.FieldMap.resolve(FieldKeyLevel): + value = entry.Level.String() + case key == f.FieldMap.resolve(FieldKeyMsg): + value = entry.Message + case key == f.FieldMap.resolve(FieldKeyLogrusError): + value = entry.err + case key == f.FieldMap.resolve(FieldKeyFunc) && entry.HasCaller(): + value = funcVal + case key == f.FieldMap.resolve(FieldKeyFile) && entry.HasCaller(): + value = fileVal + default: + value = data[key] + } + f.appendKeyValue(b, key, value) + } + } + + b.WriteByte('\n') + return b.Bytes(), nil +} + +func (f *TextFormatter) printColored(b *bytes.Buffer, entry *Entry, keys []string, data Fields, timestampFormat string) { + var levelColor int + switch entry.Level { + case DebugLevel, TraceLevel: + levelColor = gray + case WarnLevel: + levelColor = yellow + case ErrorLevel, FatalLevel, PanicLevel: + levelColor = red + default: + levelColor = blue + } + + levelText := strings.ToUpper(entry.Level.String()) + if !f.DisableLevelTruncation { + levelText = levelText[0:4] + } + + // Remove a single newline if it already exists in the message to keep + // the behavior of logrus text_formatter the same as the stdlib log package + entry.Message = strings.TrimSuffix(entry.Message, "\n") + + caller := "" + if entry.HasCaller() { + funcVal := fmt.Sprintf("%s()", entry.Caller.Function) + fileVal := fmt.Sprintf("%s:%d", entry.Caller.File, entry.Caller.Line) + + if f.CallerPrettyfier != nil { + funcVal, fileVal = f.CallerPrettyfier(entry.Caller) + } + + if fileVal == "" { + caller = funcVal + } else if funcVal == "" { + caller = fileVal + } else { + caller = fileVal + " " + funcVal + } + } + + if f.DisableTimestamp { + fmt.Fprintf(b, "\x1b[%dm%s\x1b[0m%s %-44s ", levelColor, levelText, caller, entry.Message) + } else if !f.FullTimestamp { + fmt.Fprintf(b, "\x1b[%dm%s\x1b[0m[%04d]%s %-44s ", levelColor, levelText, int(entry.Time.Sub(baseTimestamp)/time.Second), caller, entry.Message) + } else { + fmt.Fprintf(b, "\x1b[%dm%s\x1b[0m[%s]%s %-44s ", levelColor, levelText, entry.Time.Format(timestampFormat), caller, entry.Message) + } + for _, k := range keys { + v := data[k] + fmt.Fprintf(b, " \x1b[%dm%s\x1b[0m=", levelColor, k) + f.appendValue(b, v) + } +} + +func (f *TextFormatter) needsQuoting(text string) bool { + if f.QuoteEmptyFields && len(text) == 0 { + return true + } + for _, ch := range text { + if !((ch >= 'a' && ch <= 'z') || + (ch >= 'A' && ch <= 'Z') || + (ch >= '0' && ch <= '9') || + ch == '-' || ch == '.' || ch == '_' || ch == '/' || ch == '@' || ch == '^' || ch == '+') { + return true + } + } + return false +} + +func (f *TextFormatter) appendKeyValue(b *bytes.Buffer, key string, value interface{}) { + if b.Len() > 0 { + b.WriteByte(' ') + } + b.WriteString(key) + b.WriteByte('=') + f.appendValue(b, value) +} + +func (f *TextFormatter) appendValue(b *bytes.Buffer, value interface{}) { + stringVal, ok := value.(string) + if !ok { + stringVal = fmt.Sprint(value) + } + + if !f.needsQuoting(stringVal) { + b.WriteString(stringVal) + } else { + b.WriteString(fmt.Sprintf("%q", stringVal)) + } +} diff --git a/vendor/github.com/sirupsen/logrus/writer.go b/vendor/github.com/sirupsen/logrus/writer.go new file mode 100644 index 0000000..9e1f751 --- /dev/null +++ b/vendor/github.com/sirupsen/logrus/writer.go @@ -0,0 +1,64 @@ +package logrus + +import ( + "bufio" + "io" + "runtime" +) + +func (logger *Logger) Writer() *io.PipeWriter { + return logger.WriterLevel(InfoLevel) +} + +func (logger *Logger) WriterLevel(level Level) *io.PipeWriter { + return NewEntry(logger).WriterLevel(level) +} + +func (entry *Entry) Writer() *io.PipeWriter { + return entry.WriterLevel(InfoLevel) +} + +func (entry *Entry) WriterLevel(level Level) *io.PipeWriter { + reader, writer := io.Pipe() + + var printFunc func(args ...interface{}) + + switch level { + case TraceLevel: + printFunc = entry.Trace + case DebugLevel: + printFunc = entry.Debug + case InfoLevel: + printFunc = entry.Info + case WarnLevel: + printFunc = entry.Warn + case ErrorLevel: + printFunc = entry.Error + case FatalLevel: + printFunc = entry.Fatal + case PanicLevel: + printFunc = entry.Panic + default: + printFunc = entry.Print + } + + go entry.writerScanner(reader, printFunc) + runtime.SetFinalizer(writer, writerFinalizer) + + return writer +} + +func (entry *Entry) writerScanner(reader *io.PipeReader, printFunc func(args ...interface{})) { + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + printFunc(scanner.Text()) + } + if err := scanner.Err(); err != nil { + entry.Errorf("Error while reading from Writer: %s", err) + } + reader.Close() +} + +func writerFinalizer(writer *io.PipeWriter) { + writer.Close() +} diff --git a/vendor/github.com/syndtr/gocapability/LICENSE b/vendor/github.com/syndtr/gocapability/LICENSE new file mode 100644 index 0000000..80dd96d --- /dev/null +++ b/vendor/github.com/syndtr/gocapability/LICENSE @@ -0,0 +1,24 @@ +Copyright 2013 Suryandaru Triandana +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/syndtr/gocapability/capability/capability.go b/vendor/github.com/syndtr/gocapability/capability/capability.go new file mode 100644 index 0000000..61a9077 --- /dev/null +++ b/vendor/github.com/syndtr/gocapability/capability/capability.go @@ -0,0 +1,133 @@ +// Copyright (c) 2013, Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package capability provides utilities for manipulating POSIX capabilities. +package capability + +type Capabilities interface { + // Get check whether a capability present in the given + // capabilities set. The 'which' value should be one of EFFECTIVE, + // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. + Get(which CapType, what Cap) bool + + // Empty check whether all capability bits of the given capabilities + // set are zero. The 'which' value should be one of EFFECTIVE, + // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. + Empty(which CapType) bool + + // Full check whether all capability bits of the given capabilities + // set are one. The 'which' value should be one of EFFECTIVE, + // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. + Full(which CapType) bool + + // Set sets capabilities of the given capabilities sets. The + // 'which' value should be one or combination (OR'ed) of EFFECTIVE, + // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. + Set(which CapType, caps ...Cap) + + // Unset unsets capabilities of the given capabilities sets. The + // 'which' value should be one or combination (OR'ed) of EFFECTIVE, + // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. + Unset(which CapType, caps ...Cap) + + // Fill sets all bits of the given capabilities kind to one. The + // 'kind' value should be one or combination (OR'ed) of CAPS, + // BOUNDS or AMBS. + Fill(kind CapType) + + // Clear sets all bits of the given capabilities kind to zero. The + // 'kind' value should be one or combination (OR'ed) of CAPS, + // BOUNDS or AMBS. + Clear(kind CapType) + + // String return current capabilities state of the given capabilities + // set as string. The 'which' value should be one of EFFECTIVE, + // PERMITTED, INHERITABLE BOUNDING or AMBIENT + StringCap(which CapType) string + + // String return current capabilities state as string. + String() string + + // Load load actual capabilities value. This will overwrite all + // outstanding changes. + Load() error + + // Apply apply the capabilities settings, so all changes will take + // effect. + Apply(kind CapType) error +} + +// NewPid initializes a new Capabilities object for given pid when +// it is nonzero, or for the current process if pid is 0. +// +// Deprecated: Replace with NewPid2. For example, replace: +// +// c, err := NewPid(0) +// if err != nil { +// return err +// } +// +// with: +// +// c, err := NewPid2(0) +// if err != nil { +// return err +// } +// err = c.Load() +// if err != nil { +// return err +// } +func NewPid(pid int) (Capabilities, error) { + c, err := newPid(pid) + if err != nil { + return c, err + } + err = c.Load() + return c, err +} + +// NewPid2 initializes a new Capabilities object for given pid when +// it is nonzero, or for the current process if pid is 0. This +// does not load the process's current capabilities; to do that you +// must call Load explicitly. +func NewPid2(pid int) (Capabilities, error) { + return newPid(pid) +} + +// NewFile initializes a new Capabilities object for given file path. +// +// Deprecated: Replace with NewFile2. For example, replace: +// +// c, err := NewFile(path) +// if err != nil { +// return err +// } +// +// with: +// +// c, err := NewFile2(path) +// if err != nil { +// return err +// } +// err = c.Load() +// if err != nil { +// return err +// } +func NewFile(path string) (Capabilities, error) { + c, err := newFile(path) + if err != nil { + return c, err + } + err = c.Load() + return c, err +} + +// NewFile2 creates a new initialized Capabilities object for given +// file path. This does not load the process's current capabilities; +// to do that you must call Load explicitly. +func NewFile2(path string) (Capabilities, error) { + return newFile(path) +} diff --git a/vendor/github.com/syndtr/gocapability/capability/capability_linux.go b/vendor/github.com/syndtr/gocapability/capability/capability_linux.go new file mode 100644 index 0000000..1567dc8 --- /dev/null +++ b/vendor/github.com/syndtr/gocapability/capability/capability_linux.go @@ -0,0 +1,642 @@ +// Copyright (c) 2013, Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package capability + +import ( + "bufio" + "errors" + "fmt" + "io" + "os" + "strings" + "syscall" +) + +var errUnknownVers = errors.New("unknown capability version") + +const ( + linuxCapVer1 = 0x19980330 + linuxCapVer2 = 0x20071026 + linuxCapVer3 = 0x20080522 +) + +var ( + capVers uint32 + capLastCap Cap +) + +func init() { + var hdr capHeader + capget(&hdr, nil) + capVers = hdr.version + + if initLastCap() == nil { + CAP_LAST_CAP = capLastCap + if capLastCap > 31 { + capUpperMask = (uint32(1) << (uint(capLastCap) - 31)) - 1 + } else { + capUpperMask = 0 + } + } +} + +func initLastCap() error { + if capLastCap != 0 { + return nil + } + + f, err := os.Open("/proc/sys/kernel/cap_last_cap") + if err != nil { + return err + } + defer f.Close() + + var b []byte = make([]byte, 11) + _, err = f.Read(b) + if err != nil { + return err + } + + fmt.Sscanf(string(b), "%d", &capLastCap) + + return nil +} + +func mkStringCap(c Capabilities, which CapType) (ret string) { + for i, first := Cap(0), true; i <= CAP_LAST_CAP; i++ { + if !c.Get(which, i) { + continue + } + if first { + first = false + } else { + ret += ", " + } + ret += i.String() + } + return +} + +func mkString(c Capabilities, max CapType) (ret string) { + ret = "{" + for i := CapType(1); i <= max; i <<= 1 { + ret += " " + i.String() + "=\"" + if c.Empty(i) { + ret += "empty" + } else if c.Full(i) { + ret += "full" + } else { + ret += c.StringCap(i) + } + ret += "\"" + } + ret += " }" + return +} + +func newPid(pid int) (c Capabilities, err error) { + switch capVers { + case linuxCapVer1: + p := new(capsV1) + p.hdr.version = capVers + p.hdr.pid = int32(pid) + c = p + case linuxCapVer2, linuxCapVer3: + p := new(capsV3) + p.hdr.version = capVers + p.hdr.pid = int32(pid) + c = p + default: + err = errUnknownVers + return + } + return +} + +type capsV1 struct { + hdr capHeader + data capData +} + +func (c *capsV1) Get(which CapType, what Cap) bool { + if what > 32 { + return false + } + + switch which { + case EFFECTIVE: + return (1< 32 { + continue + } + + if which&EFFECTIVE != 0 { + c.data.effective |= 1 << uint(what) + } + if which&PERMITTED != 0 { + c.data.permitted |= 1 << uint(what) + } + if which&INHERITABLE != 0 { + c.data.inheritable |= 1 << uint(what) + } + } +} + +func (c *capsV1) Unset(which CapType, caps ...Cap) { + for _, what := range caps { + if what > 32 { + continue + } + + if which&EFFECTIVE != 0 { + c.data.effective &= ^(1 << uint(what)) + } + if which&PERMITTED != 0 { + c.data.permitted &= ^(1 << uint(what)) + } + if which&INHERITABLE != 0 { + c.data.inheritable &= ^(1 << uint(what)) + } + } +} + +func (c *capsV1) Fill(kind CapType) { + if kind&CAPS == CAPS { + c.data.effective = 0x7fffffff + c.data.permitted = 0x7fffffff + c.data.inheritable = 0 + } +} + +func (c *capsV1) Clear(kind CapType) { + if kind&CAPS == CAPS { + c.data.effective = 0 + c.data.permitted = 0 + c.data.inheritable = 0 + } +} + +func (c *capsV1) StringCap(which CapType) (ret string) { + return mkStringCap(c, which) +} + +func (c *capsV1) String() (ret string) { + return mkString(c, BOUNDING) +} + +func (c *capsV1) Load() (err error) { + return capget(&c.hdr, &c.data) +} + +func (c *capsV1) Apply(kind CapType) error { + if kind&CAPS == CAPS { + return capset(&c.hdr, &c.data) + } + return nil +} + +type capsV3 struct { + hdr capHeader + data [2]capData + bounds [2]uint32 + ambient [2]uint32 +} + +func (c *capsV3) Get(which CapType, what Cap) bool { + var i uint + if what > 31 { + i = uint(what) >> 5 + what %= 32 + } + + switch which { + case EFFECTIVE: + return (1< 31 { + i = uint(what) >> 5 + what %= 32 + } + + if which&EFFECTIVE != 0 { + c.data[i].effective |= 1 << uint(what) + } + if which&PERMITTED != 0 { + c.data[i].permitted |= 1 << uint(what) + } + if which&INHERITABLE != 0 { + c.data[i].inheritable |= 1 << uint(what) + } + if which&BOUNDING != 0 { + c.bounds[i] |= 1 << uint(what) + } + if which&AMBIENT != 0 { + c.ambient[i] |= 1 << uint(what) + } + } +} + +func (c *capsV3) Unset(which CapType, caps ...Cap) { + for _, what := range caps { + var i uint + if what > 31 { + i = uint(what) >> 5 + what %= 32 + } + + if which&EFFECTIVE != 0 { + c.data[i].effective &= ^(1 << uint(what)) + } + if which&PERMITTED != 0 { + c.data[i].permitted &= ^(1 << uint(what)) + } + if which&INHERITABLE != 0 { + c.data[i].inheritable &= ^(1 << uint(what)) + } + if which&BOUNDING != 0 { + c.bounds[i] &= ^(1 << uint(what)) + } + if which&AMBIENT != 0 { + c.ambient[i] &= ^(1 << uint(what)) + } + } +} + +func (c *capsV3) Fill(kind CapType) { + if kind&CAPS == CAPS { + c.data[0].effective = 0xffffffff + c.data[0].permitted = 0xffffffff + c.data[0].inheritable = 0 + c.data[1].effective = 0xffffffff + c.data[1].permitted = 0xffffffff + c.data[1].inheritable = 0 + } + + if kind&BOUNDS == BOUNDS { + c.bounds[0] = 0xffffffff + c.bounds[1] = 0xffffffff + } + if kind&AMBS == AMBS { + c.ambient[0] = 0xffffffff + c.ambient[1] = 0xffffffff + } +} + +func (c *capsV3) Clear(kind CapType) { + if kind&CAPS == CAPS { + c.data[0].effective = 0 + c.data[0].permitted = 0 + c.data[0].inheritable = 0 + c.data[1].effective = 0 + c.data[1].permitted = 0 + c.data[1].inheritable = 0 + } + + if kind&BOUNDS == BOUNDS { + c.bounds[0] = 0 + c.bounds[1] = 0 + } + if kind&AMBS == AMBS { + c.ambient[0] = 0 + c.ambient[1] = 0 + } +} + +func (c *capsV3) StringCap(which CapType) (ret string) { + return mkStringCap(c, which) +} + +func (c *capsV3) String() (ret string) { + return mkString(c, BOUNDING) +} + +func (c *capsV3) Load() (err error) { + err = capget(&c.hdr, &c.data[0]) + if err != nil { + return + } + + var status_path string + + if c.hdr.pid == 0 { + status_path = fmt.Sprintf("/proc/self/status") + } else { + status_path = fmt.Sprintf("/proc/%d/status", c.hdr.pid) + } + + f, err := os.Open(status_path) + if err != nil { + return + } + b := bufio.NewReader(f) + for { + line, e := b.ReadString('\n') + if e != nil { + if e != io.EOF { + err = e + } + break + } + if strings.HasPrefix(line, "CapB") { + fmt.Sscanf(line[4:], "nd: %08x%08x", &c.bounds[1], &c.bounds[0]) + continue + } + if strings.HasPrefix(line, "CapA") { + fmt.Sscanf(line[4:], "mb: %08x%08x", &c.ambient[1], &c.ambient[0]) + continue + } + } + f.Close() + + return +} + +func (c *capsV3) Apply(kind CapType) (err error) { + if kind&BOUNDS == BOUNDS { + var data [2]capData + err = capget(&c.hdr, &data[0]) + if err != nil { + return + } + if (1< 31 { + if c.data.version == 1 { + return false + } + i = uint(what) >> 5 + what %= 32 + } + + switch which { + case EFFECTIVE: + return (1< 31 { + if c.data.version == 1 { + continue + } + i = uint(what) >> 5 + what %= 32 + } + + if which&EFFECTIVE != 0 { + c.data.effective[i] |= 1 << uint(what) + } + if which&PERMITTED != 0 { + c.data.data[i].permitted |= 1 << uint(what) + } + if which&INHERITABLE != 0 { + c.data.data[i].inheritable |= 1 << uint(what) + } + } +} + +func (c *capsFile) Unset(which CapType, caps ...Cap) { + for _, what := range caps { + var i uint + if what > 31 { + if c.data.version == 1 { + continue + } + i = uint(what) >> 5 + what %= 32 + } + + if which&EFFECTIVE != 0 { + c.data.effective[i] &= ^(1 << uint(what)) + } + if which&PERMITTED != 0 { + c.data.data[i].permitted &= ^(1 << uint(what)) + } + if which&INHERITABLE != 0 { + c.data.data[i].inheritable &= ^(1 << uint(what)) + } + } +} + +func (c *capsFile) Fill(kind CapType) { + if kind&CAPS == CAPS { + c.data.effective[0] = 0xffffffff + c.data.data[0].permitted = 0xffffffff + c.data.data[0].inheritable = 0 + if c.data.version == 2 { + c.data.effective[1] = 0xffffffff + c.data.data[1].permitted = 0xffffffff + c.data.data[1].inheritable = 0 + } + } +} + +func (c *capsFile) Clear(kind CapType) { + if kind&CAPS == CAPS { + c.data.effective[0] = 0 + c.data.data[0].permitted = 0 + c.data.data[0].inheritable = 0 + if c.data.version == 2 { + c.data.effective[1] = 0 + c.data.data[1].permitted = 0 + c.data.data[1].inheritable = 0 + } + } +} + +func (c *capsFile) StringCap(which CapType) (ret string) { + return mkStringCap(c, which) +} + +func (c *capsFile) String() (ret string) { + return mkString(c, INHERITABLE) +} + +func (c *capsFile) Load() (err error) { + return getVfsCap(c.path, &c.data) +} + +func (c *capsFile) Apply(kind CapType) (err error) { + if kind&CAPS == CAPS { + return setVfsCap(c.path, &c.data) + } + return +} diff --git a/vendor/github.com/syndtr/gocapability/capability/capability_noop.go b/vendor/github.com/syndtr/gocapability/capability/capability_noop.go new file mode 100644 index 0000000..9bb3070 --- /dev/null +++ b/vendor/github.com/syndtr/gocapability/capability/capability_noop.go @@ -0,0 +1,19 @@ +// Copyright (c) 2013, Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// +build !linux + +package capability + +import "errors" + +func newPid(pid int) (Capabilities, error) { + return nil, errors.New("not supported") +} + +func newFile(path string) (Capabilities, error) { + return nil, errors.New("not supported") +} diff --git a/vendor/github.com/syndtr/gocapability/capability/enum.go b/vendor/github.com/syndtr/gocapability/capability/enum.go new file mode 100644 index 0000000..6938173 --- /dev/null +++ b/vendor/github.com/syndtr/gocapability/capability/enum.go @@ -0,0 +1,268 @@ +// Copyright (c) 2013, Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package capability + +type CapType uint + +func (c CapType) String() string { + switch c { + case EFFECTIVE: + return "effective" + case PERMITTED: + return "permitted" + case INHERITABLE: + return "inheritable" + case BOUNDING: + return "bounding" + case CAPS: + return "caps" + case AMBIENT: + return "ambient" + } + return "unknown" +} + +const ( + EFFECTIVE CapType = 1 << iota + PERMITTED + INHERITABLE + BOUNDING + AMBIENT + + CAPS = EFFECTIVE | PERMITTED | INHERITABLE + BOUNDS = BOUNDING + AMBS = AMBIENT +) + +//go:generate go run enumgen/gen.go +type Cap int + +// POSIX-draft defined capabilities. +const ( + // In a system with the [_POSIX_CHOWN_RESTRICTED] option defined, this + // overrides the restriction of changing file ownership and group + // ownership. + CAP_CHOWN = Cap(0) + + // Override all DAC access, including ACL execute access if + // [_POSIX_ACL] is defined. Excluding DAC access covered by + // CAP_LINUX_IMMUTABLE. + CAP_DAC_OVERRIDE = Cap(1) + + // Overrides all DAC restrictions regarding read and search on files + // and directories, including ACL restrictions if [_POSIX_ACL] is + // defined. Excluding DAC access covered by CAP_LINUX_IMMUTABLE. + CAP_DAC_READ_SEARCH = Cap(2) + + // Overrides all restrictions about allowed operations on files, where + // file owner ID must be equal to the user ID, except where CAP_FSETID + // is applicable. It doesn't override MAC and DAC restrictions. + CAP_FOWNER = Cap(3) + + // Overrides the following restrictions that the effective user ID + // shall match the file owner ID when setting the S_ISUID and S_ISGID + // bits on that file; that the effective group ID (or one of the + // supplementary group IDs) shall match the file owner ID when setting + // the S_ISGID bit on that file; that the S_ISUID and S_ISGID bits are + // cleared on successful return from chown(2) (not implemented). + CAP_FSETID = Cap(4) + + // Overrides the restriction that the real or effective user ID of a + // process sending a signal must match the real or effective user ID + // of the process receiving the signal. + CAP_KILL = Cap(5) + + // Allows setgid(2) manipulation + // Allows setgroups(2) + // Allows forged gids on socket credentials passing. + CAP_SETGID = Cap(6) + + // Allows set*uid(2) manipulation (including fsuid). + // Allows forged pids on socket credentials passing. + CAP_SETUID = Cap(7) + + // Linux-specific capabilities + + // Without VFS support for capabilities: + // Transfer any capability in your permitted set to any pid, + // remove any capability in your permitted set from any pid + // With VFS support for capabilities (neither of above, but) + // Add any capability from current's capability bounding set + // to the current process' inheritable set + // Allow taking bits out of capability bounding set + // Allow modification of the securebits for a process + CAP_SETPCAP = Cap(8) + + // Allow modification of S_IMMUTABLE and S_APPEND file attributes + CAP_LINUX_IMMUTABLE = Cap(9) + + // Allows binding to TCP/UDP sockets below 1024 + // Allows binding to ATM VCIs below 32 + CAP_NET_BIND_SERVICE = Cap(10) + + // Allow broadcasting, listen to multicast + CAP_NET_BROADCAST = Cap(11) + + // Allow interface configuration + // Allow administration of IP firewall, masquerading and accounting + // Allow setting debug option on sockets + // Allow modification of routing tables + // Allow setting arbitrary process / process group ownership on + // sockets + // Allow binding to any address for transparent proxying (also via NET_RAW) + // Allow setting TOS (type of service) + // Allow setting promiscuous mode + // Allow clearing driver statistics + // Allow multicasting + // Allow read/write of device-specific registers + // Allow activation of ATM control sockets + CAP_NET_ADMIN = Cap(12) + + // Allow use of RAW sockets + // Allow use of PACKET sockets + // Allow binding to any address for transparent proxying (also via NET_ADMIN) + CAP_NET_RAW = Cap(13) + + // Allow locking of shared memory segments + // Allow mlock and mlockall (which doesn't really have anything to do + // with IPC) + CAP_IPC_LOCK = Cap(14) + + // Override IPC ownership checks + CAP_IPC_OWNER = Cap(15) + + // Insert and remove kernel modules - modify kernel without limit + CAP_SYS_MODULE = Cap(16) + + // Allow ioperm/iopl access + // Allow sending USB messages to any device via /proc/bus/usb + CAP_SYS_RAWIO = Cap(17) + + // Allow use of chroot() + CAP_SYS_CHROOT = Cap(18) + + // Allow ptrace() of any process + CAP_SYS_PTRACE = Cap(19) + + // Allow configuration of process accounting + CAP_SYS_PACCT = Cap(20) + + // Allow configuration of the secure attention key + // Allow administration of the random device + // Allow examination and configuration of disk quotas + // Allow setting the domainname + // Allow setting the hostname + // Allow calling bdflush() + // Allow mount() and umount(), setting up new smb connection + // Allow some autofs root ioctls + // Allow nfsservctl + // Allow VM86_REQUEST_IRQ + // Allow to read/write pci config on alpha + // Allow irix_prctl on mips (setstacksize) + // Allow flushing all cache on m68k (sys_cacheflush) + // Allow removing semaphores + // Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores + // and shared memory + // Allow locking/unlocking of shared memory segment + // Allow turning swap on/off + // Allow forged pids on socket credentials passing + // Allow setting readahead and flushing buffers on block devices + // Allow setting geometry in floppy driver + // Allow turning DMA on/off in xd driver + // Allow administration of md devices (mostly the above, but some + // extra ioctls) + // Allow tuning the ide driver + // Allow access to the nvram device + // Allow administration of apm_bios, serial and bttv (TV) device + // Allow manufacturer commands in isdn CAPI support driver + // Allow reading non-standardized portions of pci configuration space + // Allow DDI debug ioctl on sbpcd driver + // Allow setting up serial ports + // Allow sending raw qic-117 commands + // Allow enabling/disabling tagged queuing on SCSI controllers and sending + // arbitrary SCSI commands + // Allow setting encryption key on loopback filesystem + // Allow setting zone reclaim policy + CAP_SYS_ADMIN = Cap(21) + + // Allow use of reboot() + CAP_SYS_BOOT = Cap(22) + + // Allow raising priority and setting priority on other (different + // UID) processes + // Allow use of FIFO and round-robin (realtime) scheduling on own + // processes and setting the scheduling algorithm used by another + // process. + // Allow setting cpu affinity on other processes + CAP_SYS_NICE = Cap(23) + + // Override resource limits. Set resource limits. + // Override quota limits. + // Override reserved space on ext2 filesystem + // Modify data journaling mode on ext3 filesystem (uses journaling + // resources) + // NOTE: ext2 honors fsuid when checking for resource overrides, so + // you can override using fsuid too + // Override size restrictions on IPC message queues + // Allow more than 64hz interrupts from the real-time clock + // Override max number of consoles on console allocation + // Override max number of keymaps + CAP_SYS_RESOURCE = Cap(24) + + // Allow manipulation of system clock + // Allow irix_stime on mips + // Allow setting the real-time clock + CAP_SYS_TIME = Cap(25) + + // Allow configuration of tty devices + // Allow vhangup() of tty + CAP_SYS_TTY_CONFIG = Cap(26) + + // Allow the privileged aspects of mknod() + CAP_MKNOD = Cap(27) + + // Allow taking of leases on files + CAP_LEASE = Cap(28) + + CAP_AUDIT_WRITE = Cap(29) + CAP_AUDIT_CONTROL = Cap(30) + CAP_SETFCAP = Cap(31) + + // Override MAC access. + // The base kernel enforces no MAC policy. + // An LSM may enforce a MAC policy, and if it does and it chooses + // to implement capability based overrides of that policy, this is + // the capability it should use to do so. + CAP_MAC_OVERRIDE = Cap(32) + + // Allow MAC configuration or state changes. + // The base kernel requires no MAC configuration. + // An LSM may enforce a MAC policy, and if it does and it chooses + // to implement capability based checks on modifications to that + // policy or the data required to maintain it, this is the + // capability it should use to do so. + CAP_MAC_ADMIN = Cap(33) + + // Allow configuring the kernel's syslog (printk behaviour) + CAP_SYSLOG = Cap(34) + + // Allow triggering something that will wake the system + CAP_WAKE_ALARM = Cap(35) + + // Allow preventing system suspends + CAP_BLOCK_SUSPEND = Cap(36) + + // Allow reading audit messages from the kernel + CAP_AUDIT_READ = Cap(37) +) + +var ( + // Highest valid capability of the running kernel. + CAP_LAST_CAP = Cap(63) + + capUpperMask = ^uint32(0) +) diff --git a/vendor/github.com/syndtr/gocapability/capability/enum_gen.go b/vendor/github.com/syndtr/gocapability/capability/enum_gen.go new file mode 100644 index 0000000..b9e6d2d --- /dev/null +++ b/vendor/github.com/syndtr/gocapability/capability/enum_gen.go @@ -0,0 +1,129 @@ +// generated file; DO NOT EDIT - use go generate in directory with source + +package capability + +func (c Cap) String() string { + switch c { + case CAP_CHOWN: + return "chown" + case CAP_DAC_OVERRIDE: + return "dac_override" + case CAP_DAC_READ_SEARCH: + return "dac_read_search" + case CAP_FOWNER: + return "fowner" + case CAP_FSETID: + return "fsetid" + case CAP_KILL: + return "kill" + case CAP_SETGID: + return "setgid" + case CAP_SETUID: + return "setuid" + case CAP_SETPCAP: + return "setpcap" + case CAP_LINUX_IMMUTABLE: + return "linux_immutable" + case CAP_NET_BIND_SERVICE: + return "net_bind_service" + case CAP_NET_BROADCAST: + return "net_broadcast" + case CAP_NET_ADMIN: + return "net_admin" + case CAP_NET_RAW: + return "net_raw" + case CAP_IPC_LOCK: + return "ipc_lock" + case CAP_IPC_OWNER: + return "ipc_owner" + case CAP_SYS_MODULE: + return "sys_module" + case CAP_SYS_RAWIO: + return "sys_rawio" + case CAP_SYS_CHROOT: + return "sys_chroot" + case CAP_SYS_PTRACE: + return "sys_ptrace" + case CAP_SYS_PACCT: + return "sys_pacct" + case CAP_SYS_ADMIN: + return "sys_admin" + case CAP_SYS_BOOT: + return "sys_boot" + case CAP_SYS_NICE: + return "sys_nice" + case CAP_SYS_RESOURCE: + return "sys_resource" + case CAP_SYS_TIME: + return "sys_time" + case CAP_SYS_TTY_CONFIG: + return "sys_tty_config" + case CAP_MKNOD: + return "mknod" + case CAP_LEASE: + return "lease" + case CAP_AUDIT_WRITE: + return "audit_write" + case CAP_AUDIT_CONTROL: + return "audit_control" + case CAP_SETFCAP: + return "setfcap" + case CAP_MAC_OVERRIDE: + return "mac_override" + case CAP_MAC_ADMIN: + return "mac_admin" + case CAP_SYSLOG: + return "syslog" + case CAP_WAKE_ALARM: + return "wake_alarm" + case CAP_BLOCK_SUSPEND: + return "block_suspend" + case CAP_AUDIT_READ: + return "audit_read" + } + return "unknown" +} + +// List returns list of all supported capabilities +func List() []Cap { + return []Cap{ + CAP_CHOWN, + CAP_DAC_OVERRIDE, + CAP_DAC_READ_SEARCH, + CAP_FOWNER, + CAP_FSETID, + CAP_KILL, + CAP_SETGID, + CAP_SETUID, + CAP_SETPCAP, + CAP_LINUX_IMMUTABLE, + CAP_NET_BIND_SERVICE, + CAP_NET_BROADCAST, + CAP_NET_ADMIN, + CAP_NET_RAW, + CAP_IPC_LOCK, + CAP_IPC_OWNER, + CAP_SYS_MODULE, + CAP_SYS_RAWIO, + CAP_SYS_CHROOT, + CAP_SYS_PTRACE, + CAP_SYS_PACCT, + CAP_SYS_ADMIN, + CAP_SYS_BOOT, + CAP_SYS_NICE, + CAP_SYS_RESOURCE, + CAP_SYS_TIME, + CAP_SYS_TTY_CONFIG, + CAP_MKNOD, + CAP_LEASE, + CAP_AUDIT_WRITE, + CAP_AUDIT_CONTROL, + CAP_SETFCAP, + CAP_MAC_OVERRIDE, + CAP_MAC_ADMIN, + CAP_SYSLOG, + CAP_WAKE_ALARM, + CAP_BLOCK_SUSPEND, + CAP_AUDIT_READ, + } +} diff --git a/vendor/github.com/syndtr/gocapability/capability/syscall_linux.go b/vendor/github.com/syndtr/gocapability/capability/syscall_linux.go new file mode 100644 index 0000000..3d2bf69 --- /dev/null +++ b/vendor/github.com/syndtr/gocapability/capability/syscall_linux.go @@ -0,0 +1,154 @@ +// Copyright (c) 2013, Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package capability + +import ( + "syscall" + "unsafe" +) + +type capHeader struct { + version uint32 + pid int32 +} + +type capData struct { + effective uint32 + permitted uint32 + inheritable uint32 +} + +func capget(hdr *capHeader, data *capData) (err error) { + _, _, e1 := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(hdr)), uintptr(unsafe.Pointer(data)), 0) + if e1 != 0 { + err = e1 + } + return +} + +func capset(hdr *capHeader, data *capData) (err error) { + _, _, e1 := syscall.Syscall(syscall.SYS_CAPSET, uintptr(unsafe.Pointer(hdr)), uintptr(unsafe.Pointer(data)), 0) + if e1 != 0 { + err = e1 + } + return +} + +// not yet in syscall +const ( + pr_CAP_AMBIENT = 47 + pr_CAP_AMBIENT_IS_SET = uintptr(1) + pr_CAP_AMBIENT_RAISE = uintptr(2) + pr_CAP_AMBIENT_LOWER = uintptr(3) + pr_CAP_AMBIENT_CLEAR_ALL = uintptr(4) +) + +func prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) { + _, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0) + if e1 != 0 { + err = e1 + } + return +} + +const ( + vfsXattrName = "security.capability" + + vfsCapVerMask = 0xff000000 + vfsCapVer1 = 0x01000000 + vfsCapVer2 = 0x02000000 + + vfsCapFlagMask = ^vfsCapVerMask + vfsCapFlageffective = 0x000001 + + vfscapDataSizeV1 = 4 * (1 + 2*1) + vfscapDataSizeV2 = 4 * (1 + 2*2) +) + +type vfscapData struct { + magic uint32 + data [2]struct { + permitted uint32 + inheritable uint32 + } + effective [2]uint32 + version int8 +} + +var ( + _vfsXattrName *byte +) + +func init() { + _vfsXattrName, _ = syscall.BytePtrFromString(vfsXattrName) +} + +func getVfsCap(path string, dest *vfscapData) (err error) { + var _p0 *byte + _p0, err = syscall.BytePtrFromString(path) + if err != nil { + return + } + r0, _, e1 := syscall.Syscall6(syscall.SYS_GETXATTR, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_vfsXattrName)), uintptr(unsafe.Pointer(dest)), vfscapDataSizeV2, 0, 0) + if e1 != 0 { + if e1 == syscall.ENODATA { + dest.version = 2 + return + } + err = e1 + } + switch dest.magic & vfsCapVerMask { + case vfsCapVer1: + dest.version = 1 + if r0 != vfscapDataSizeV1 { + return syscall.EINVAL + } + dest.data[1].permitted = 0 + dest.data[1].inheritable = 0 + case vfsCapVer2: + dest.version = 2 + if r0 != vfscapDataSizeV2 { + return syscall.EINVAL + } + default: + return syscall.EINVAL + } + if dest.magic&vfsCapFlageffective != 0 { + dest.effective[0] = dest.data[0].permitted | dest.data[0].inheritable + dest.effective[1] = dest.data[1].permitted | dest.data[1].inheritable + } else { + dest.effective[0] = 0 + dest.effective[1] = 0 + } + return +} + +func setVfsCap(path string, data *vfscapData) (err error) { + var _p0 *byte + _p0, err = syscall.BytePtrFromString(path) + if err != nil { + return + } + var size uintptr + if data.version == 1 { + data.magic = vfsCapVer1 + size = vfscapDataSizeV1 + } else if data.version == 2 { + data.magic = vfsCapVer2 + if data.effective[0] != 0 || data.effective[1] != 0 { + data.magic |= vfsCapFlageffective + } + size = vfscapDataSizeV2 + } else { + return syscall.EINVAL + } + _, _, e1 := syscall.Syscall6(syscall.SYS_SETXATTR, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_vfsXattrName)), uintptr(unsafe.Pointer(data)), size, 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/vendor/github.com/urfave/cli/LICENSE b/vendor/github.com/urfave/cli/LICENSE new file mode 100644 index 0000000..42a597e --- /dev/null +++ b/vendor/github.com/urfave/cli/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 Jeremy Saenz & Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/urfave/cli/README.md b/vendor/github.com/urfave/cli/README.md new file mode 100644 index 0000000..2bbbd8e --- /dev/null +++ b/vendor/github.com/urfave/cli/README.md @@ -0,0 +1,1381 @@ +cli +=== + +[![Build Status](https://travis-ci.org/urfave/cli.svg?branch=master)](https://travis-ci.org/urfave/cli) +[![Windows Build Status](https://ci.appveyor.com/api/projects/status/rtgk5xufi932pb2v?svg=true)](https://ci.appveyor.com/project/urfave/cli) +[![GoDoc](https://godoc.org/github.com/urfave/cli?status.svg)](https://godoc.org/github.com/urfave/cli) +[![codebeat](https://codebeat.co/badges/0a8f30aa-f975-404b-b878-5fab3ae1cc5f)](https://codebeat.co/projects/github-com-urfave-cli) +[![Go Report Card](https://goreportcard.com/badge/urfave/cli)](https://goreportcard.com/report/urfave/cli) +[![top level coverage](https://gocover.io/_badge/github.com/urfave/cli?0 "top level coverage")](http://gocover.io/github.com/urfave/cli) / +[![altsrc coverage](https://gocover.io/_badge/github.com/urfave/cli/altsrc?0 "altsrc coverage")](http://gocover.io/github.com/urfave/cli/altsrc) + +**Notice:** This is the library formerly known as +`github.com/codegangsta/cli` -- Github will automatically redirect requests +to this repository, but we recommend updating your references for clarity. + +cli is a simple, fast, and fun package for building command line apps in Go. The +goal is to enable developers to write fast and distributable command line +applications in an expressive way. + + + +- [Overview](#overview) +- [Installation](#installation) + * [Supported platforms](#supported-platforms) + * [Using the `v2` branch](#using-the-v2-branch) + * [Pinning to the `v1` releases](#pinning-to-the-v1-releases) +- [Getting Started](#getting-started) +- [Examples](#examples) + * [Arguments](#arguments) + * [Flags](#flags) + + [Placeholder Values](#placeholder-values) + + [Alternate Names](#alternate-names) + + [Ordering](#ordering) + + [Values from the Environment](#values-from-the-environment) + + [Values from alternate input sources (YAML, TOML, and others)](#values-from-alternate-input-sources-yaml-toml-and-others) + * [Subcommands](#subcommands) + * [Subcommands categories](#subcommands-categories) + * [Exit code](#exit-code) + * [Bash Completion](#bash-completion) + + [Enabling](#enabling) + + [Distribution](#distribution) + + [Customization](#customization) + * [Generated Help Text](#generated-help-text) + + [Customization](#customization-1) + * [Version Flag](#version-flag) + + [Customization](#customization-2) + + [Full API Example](#full-api-example) +- [Contribution Guidelines](#contribution-guidelines) + + + +## Overview + +Command line apps are usually so tiny that there is absolutely no reason why +your code should *not* be self-documenting. Things like generating help text and +parsing command flags/options should not hinder productivity when writing a +command line app. + +**This is where cli comes into play.** cli makes command line programming fun, +organized, and expressive! + +## Installation + +Make sure you have a working Go environment. Go version 1.2+ is supported. [See +the install instructions for Go](http://golang.org/doc/install.html). + +To install cli, simply run: +``` +$ go get github.com/urfave/cli +``` + +Make sure your `PATH` includes the `$GOPATH/bin` directory so your commands can +be easily used: +``` +export PATH=$PATH:$GOPATH/bin +``` + +### Supported platforms + +cli is tested against multiple versions of Go on Linux, and against the latest +released version of Go on OS X and Windows. For full details, see +[`./.travis.yml`](./.travis.yml) and [`./appveyor.yml`](./appveyor.yml). + +### Using the `v2` branch + +**Warning**: The `v2` branch is currently unreleased and considered unstable. + +There is currently a long-lived branch named `v2` that is intended to land as +the new `master` branch once development there has settled down. The current +`master` branch (mirrored as `v1`) is being manually merged into `v2` on +an irregular human-based schedule, but generally if one wants to "upgrade" to +`v2` *now* and accept the volatility (read: "awesomeness") that comes along with +that, please use whatever version pinning of your preference, such as via +`gopkg.in`: + +``` +$ go get gopkg.in/urfave/cli.v2 +``` + +``` go +... +import ( + "gopkg.in/urfave/cli.v2" // imports as package "cli" +) +... +``` + +### Pinning to the `v1` releases + +Similarly to the section above describing use of the `v2` branch, if one wants +to avoid any unexpected compatibility pains once `v2` becomes `master`, then +pinning to `v1` is an acceptable option, e.g.: + +``` +$ go get gopkg.in/urfave/cli.v1 +``` + +``` go +... +import ( + "gopkg.in/urfave/cli.v1" // imports as package "cli" +) +... +``` + +This will pull the latest tagged `v1` release (e.g. `v1.18.1` at the time of writing). + +## Getting Started + +One of the philosophies behind cli is that an API should be playful and full of +discovery. So a cli app can be as little as one line of code in `main()`. + + +``` go +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +func main() { + cli.NewApp().Run(os.Args) +} +``` + +This app will run and show help text, but is not very useful. Let's give an +action to execute and some help documentation: + + +``` go +package main + +import ( + "fmt" + "os" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + app.Name = "boom" + app.Usage = "make an explosive entrance" + app.Action = func(c *cli.Context) error { + fmt.Println("boom! I say!") + return nil + } + + app.Run(os.Args) +} +``` + +Running this already gives you a ton of functionality, plus support for things +like subcommands and flags, which are covered below. + +## Examples + +Being a programmer can be a lonely job. Thankfully by the power of automation +that is not the case! Let's create a greeter app to fend off our demons of +loneliness! + +Start by creating a directory named `greet`, and within it, add a file, +`greet.go` with the following code in it: + + +``` go +package main + +import ( + "fmt" + "os" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + app.Name = "greet" + app.Usage = "fight the loneliness!" + app.Action = func(c *cli.Context) error { + fmt.Println("Hello friend!") + return nil + } + + app.Run(os.Args) +} +``` + +Install our command to the `$GOPATH/bin` directory: + +``` +$ go install +``` + +Finally run our new command: + +``` +$ greet +Hello friend! +``` + +cli also generates neat help text: + +``` +$ greet help +NAME: + greet - fight the loneliness! + +USAGE: + greet [global options] command [command options] [arguments...] + +VERSION: + 0.0.0 + +COMMANDS: + help, h Shows a list of commands or help for one command + +GLOBAL OPTIONS + --version Shows version information +``` + +### Arguments + +You can lookup arguments by calling the `Args` function on `cli.Context`, e.g.: + + +``` go +package main + +import ( + "fmt" + "os" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + + app.Action = func(c *cli.Context) error { + fmt.Printf("Hello %q", c.Args().Get(0)) + return nil + } + + app.Run(os.Args) +} +``` + +### Flags + +Setting and querying flags is simple. + + +``` go +package main + +import ( + "fmt" + "os" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + + app.Flags = []cli.Flag { + cli.StringFlag{ + Name: "lang", + Value: "english", + Usage: "language for the greeting", + }, + } + + app.Action = func(c *cli.Context) error { + name := "Nefertiti" + if c.NArg() > 0 { + name = c.Args().Get(0) + } + if c.String("lang") == "spanish" { + fmt.Println("Hola", name) + } else { + fmt.Println("Hello", name) + } + return nil + } + + app.Run(os.Args) +} +``` + +You can also set a destination variable for a flag, to which the content will be +scanned. + + +``` go +package main + +import ( + "os" + "fmt" + + "github.com/urfave/cli" +) + +func main() { + var language string + + app := cli.NewApp() + + app.Flags = []cli.Flag { + cli.StringFlag{ + Name: "lang", + Value: "english", + Usage: "language for the greeting", + Destination: &language, + }, + } + + app.Action = func(c *cli.Context) error { + name := "someone" + if c.NArg() > 0 { + name = c.Args()[0] + } + if language == "spanish" { + fmt.Println("Hola", name) + } else { + fmt.Println("Hello", name) + } + return nil + } + + app.Run(os.Args) +} +``` + +See full list of flags at http://godoc.org/github.com/urfave/cli + +#### Placeholder Values + +Sometimes it's useful to specify a flag's value within the usage string itself. +Such placeholders are indicated with back quotes. + +For example this: + + +```go +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + + app.Flags = []cli.Flag{ + cli.StringFlag{ + Name: "config, c", + Usage: "Load configuration from `FILE`", + }, + } + + app.Run(os.Args) +} +``` + +Will result in help output like: + +``` +--config FILE, -c FILE Load configuration from FILE +``` + +Note that only the first placeholder is used. Subsequent back-quoted words will +be left as-is. + +#### Alternate Names + +You can set alternate (or short) names for flags by providing a comma-delimited +list for the `Name`. e.g. + + +``` go +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + + app.Flags = []cli.Flag { + cli.StringFlag{ + Name: "lang, l", + Value: "english", + Usage: "language for the greeting", + }, + } + + app.Run(os.Args) +} +``` + +That flag can then be set with `--lang spanish` or `-l spanish`. Note that +giving two different forms of the same flag in the same command invocation is an +error. + +#### Ordering + +Flags for the application and commands are shown in the order they are defined. +However, it's possible to sort them from outside this library by using `FlagsByName` +or `CommandsByName` with `sort`. + +For example this: + + +``` go +package main + +import ( + "os" + "sort" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + + app.Flags = []cli.Flag { + cli.StringFlag{ + Name: "lang, l", + Value: "english", + Usage: "Language for the greeting", + }, + cli.StringFlag{ + Name: "config, c", + Usage: "Load configuration from `FILE`", + }, + } + + app.Commands = []cli.Command{ + { + Name: "complete", + Aliases: []string{"c"}, + Usage: "complete a task on the list", + Action: func(c *cli.Context) error { + return nil + }, + }, + { + Name: "add", + Aliases: []string{"a"}, + Usage: "add a task to the list", + Action: func(c *cli.Context) error { + return nil + }, + }, + } + + sort.Sort(cli.FlagsByName(app.Flags)) + sort.Sort(cli.CommandsByName(app.Commands)) + + app.Run(os.Args) +} +``` + +Will result in help output like: + +``` +--config FILE, -c FILE Load configuration from FILE +--lang value, -l value Language for the greeting (default: "english") +``` + +#### Values from the Environment + +You can also have the default value set from the environment via `EnvVar`. e.g. + + +``` go +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + + app.Flags = []cli.Flag { + cli.StringFlag{ + Name: "lang, l", + Value: "english", + Usage: "language for the greeting", + EnvVar: "APP_LANG", + }, + } + + app.Run(os.Args) +} +``` + +The `EnvVar` may also be given as a comma-delimited "cascade", where the first +environment variable that resolves is used as the default. + + +``` go +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + + app.Flags = []cli.Flag { + cli.StringFlag{ + Name: "lang, l", + Value: "english", + Usage: "language for the greeting", + EnvVar: "LEGACY_COMPAT_LANG,APP_LANG,LANG", + }, + } + + app.Run(os.Args) +} +``` + +#### Values from alternate input sources (YAML, TOML, and others) + +There is a separate package altsrc that adds support for getting flag values +from other file input sources. + +Currently supported input source formats: +* YAML +* TOML + +In order to get values for a flag from an alternate input source the following +code would be added to wrap an existing cli.Flag like below: + +``` go + altsrc.NewIntFlag(cli.IntFlag{Name: "test"}) +``` + +Initialization must also occur for these flags. Below is an example initializing +getting data from a yaml file below. + +``` go + command.Before = altsrc.InitInputSourceWithContext(command.Flags, NewYamlSourceFromFlagFunc("load")) +``` + +The code above will use the "load" string as a flag name to get the file name of +a yaml file from the cli.Context. It will then use that file name to initialize +the yaml input source for any flags that are defined on that command. As a note +the "load" flag used would also have to be defined on the command flags in order +for this code snipped to work. + +Currently only the aboved specified formats are supported but developers can +add support for other input sources by implementing the +altsrc.InputSourceContext for their given sources. + +Here is a more complete sample of a command using YAML support: + + +``` go +package notmain + +import ( + "fmt" + "os" + + "github.com/urfave/cli" + "github.com/urfave/cli/altsrc" +) + +func main() { + app := cli.NewApp() + + flags := []cli.Flag{ + altsrc.NewIntFlag(cli.IntFlag{Name: "test"}), + cli.StringFlag{Name: "load"}, + } + + app.Action = func(c *cli.Context) error { + fmt.Println("yaml ist rad") + return nil + } + + app.Before = altsrc.InitInputSourceWithContext(flags, altsrc.NewYamlSourceFromFlagFunc("load")) + app.Flags = flags + + app.Run(os.Args) +} +``` + +### Subcommands + +Subcommands can be defined for a more git-like command line app. + + +```go +package main + +import ( + "fmt" + "os" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + + app.Commands = []cli.Command{ + { + Name: "add", + Aliases: []string{"a"}, + Usage: "add a task to the list", + Action: func(c *cli.Context) error { + fmt.Println("added task: ", c.Args().First()) + return nil + }, + }, + { + Name: "complete", + Aliases: []string{"c"}, + Usage: "complete a task on the list", + Action: func(c *cli.Context) error { + fmt.Println("completed task: ", c.Args().First()) + return nil + }, + }, + { + Name: "template", + Aliases: []string{"t"}, + Usage: "options for task templates", + Subcommands: []cli.Command{ + { + Name: "add", + Usage: "add a new template", + Action: func(c *cli.Context) error { + fmt.Println("new task template: ", c.Args().First()) + return nil + }, + }, + { + Name: "remove", + Usage: "remove an existing template", + Action: func(c *cli.Context) error { + fmt.Println("removed task template: ", c.Args().First()) + return nil + }, + }, + }, + }, + } + + app.Run(os.Args) +} +``` + +### Subcommands categories + +For additional organization in apps that have many subcommands, you can +associate a category for each command to group them together in the help +output. + +E.g. + +```go +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + + app.Commands = []cli.Command{ + { + Name: "noop", + }, + { + Name: "add", + Category: "template", + }, + { + Name: "remove", + Category: "template", + }, + } + + app.Run(os.Args) +} +``` + +Will include: + +``` +COMMANDS: + noop + + Template actions: + add + remove +``` + +### Exit code + +Calling `App.Run` will not automatically call `os.Exit`, which means that by +default the exit code will "fall through" to being `0`. An explicit exit code +may be set by returning a non-nil error that fulfills `cli.ExitCoder`, *or* a +`cli.MultiError` that includes an error that fulfills `cli.ExitCoder`, e.g.: + +``` go +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +func main() { + app := cli.NewApp() + app.Flags = []cli.Flag{ + cli.BoolTFlag{ + Name: "ginger-crouton", + Usage: "is it in the soup?", + }, + } + app.Action = func(ctx *cli.Context) error { + if !ctx.Bool("ginger-crouton") { + return cli.NewExitError("it is not in the soup", 86) + } + return nil + } + + app.Run(os.Args) +} +``` + +### Bash Completion + +You can enable completion commands by setting the `EnableBashCompletion` +flag on the `App` object. By default, this setting will only auto-complete to +show an app's subcommands, but you can write your own completion methods for +the App or its subcommands. + + +``` go +package main + +import ( + "fmt" + "os" + + "github.com/urfave/cli" +) + +func main() { + tasks := []string{"cook", "clean", "laundry", "eat", "sleep", "code"} + + app := cli.NewApp() + app.EnableBashCompletion = true + app.Commands = []cli.Command{ + { + Name: "complete", + Aliases: []string{"c"}, + Usage: "complete a task on the list", + Action: func(c *cli.Context) error { + fmt.Println("completed task: ", c.Args().First()) + return nil + }, + BashComplete: func(c *cli.Context) { + // This will complete if no args are passed + if c.NArg() > 0 { + return + } + for _, t := range tasks { + fmt.Println(t) + } + }, + }, + } + + app.Run(os.Args) +} +``` + +#### Enabling + +Source the `autocomplete/bash_autocomplete` file in your `.bashrc` file while +setting the `PROG` variable to the name of your program: + +`PROG=myprogram source /.../cli/autocomplete/bash_autocomplete` + +#### Distribution + +Copy `autocomplete/bash_autocomplete` into `/etc/bash_completion.d/` and rename +it to the name of the program you wish to add autocomplete support for (or +automatically install it there if you are distributing a package). Don't forget +to source the file to make it active in the current shell. + +``` +sudo cp src/bash_autocomplete /etc/bash_completion.d/ +source /etc/bash_completion.d/ +``` + +Alternatively, you can just document that users should source the generic +`autocomplete/bash_autocomplete` in their bash configuration with `$PROG` set +to the name of their program (as above). + +#### Customization + +The default bash completion flag (`--generate-bash-completion`) is defined as +`cli.BashCompletionFlag`, and may be redefined if desired, e.g.: + + +``` go +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +func main() { + cli.BashCompletionFlag = cli.BoolFlag{ + Name: "compgen", + Hidden: true, + } + + app := cli.NewApp() + app.EnableBashCompletion = true + app.Commands = []cli.Command{ + { + Name: "wat", + }, + } + app.Run(os.Args) +} +``` + +### Generated Help Text + +The default help flag (`-h/--help`) is defined as `cli.HelpFlag` and is checked +by the cli internals in order to print generated help text for the app, command, +or subcommand, and break execution. + +#### Customization + +All of the help text generation may be customized, and at multiple levels. The +templates are exposed as variables `AppHelpTemplate`, `CommandHelpTemplate`, and +`SubcommandHelpTemplate` which may be reassigned or augmented, and full override +is possible by assigning a compatible func to the `cli.HelpPrinter` variable, +e.g.: + + +``` go +package main + +import ( + "fmt" + "io" + "os" + + "github.com/urfave/cli" +) + +func main() { + // EXAMPLE: Append to an existing template + cli.AppHelpTemplate = fmt.Sprintf(`%s + +WEBSITE: http://awesometown.example.com + +SUPPORT: support@awesometown.example.com + +`, cli.AppHelpTemplate) + + // EXAMPLE: Override a template + cli.AppHelpTemplate = `NAME: + {{.Name}} - {{.Usage}} +USAGE: + {{.HelpName}} {{if .VisibleFlags}}[global options]{{end}}{{if .Commands}} command [command options]{{end}} {{if .ArgsUsage}}{{.ArgsUsage}}{{else}}[arguments...]{{end}} + {{if len .Authors}} +AUTHOR: + {{range .Authors}}{{ . }}{{end}} + {{end}}{{if .Commands}} +COMMANDS: +{{range .Commands}}{{if not .HideHelp}} {{join .Names ", "}}{{ "\t"}}{{.Usage}}{{ "\n" }}{{end}}{{end}}{{end}}{{if .VisibleFlags}} +GLOBAL OPTIONS: + {{range .VisibleFlags}}{{.}} + {{end}}{{end}}{{if .Copyright }} +COPYRIGHT: + {{.Copyright}} + {{end}}{{if .Version}} +VERSION: + {{.Version}} + {{end}} +` + + // EXAMPLE: Replace the `HelpPrinter` func + cli.HelpPrinter = func(w io.Writer, templ string, data interface{}) { + fmt.Println("Ha HA. I pwnd the help!!1") + } + + cli.NewApp().Run(os.Args) +} +``` + +The default flag may be customized to something other than `-h/--help` by +setting `cli.HelpFlag`, e.g.: + + +``` go +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +func main() { + cli.HelpFlag = cli.BoolFlag{ + Name: "halp, haaaaalp", + Usage: "HALP", + EnvVar: "SHOW_HALP,HALPPLZ", + } + + cli.NewApp().Run(os.Args) +} +``` + +### Version Flag + +The default version flag (`-v/--version`) is defined as `cli.VersionFlag`, which +is checked by the cli internals in order to print the `App.Version` via +`cli.VersionPrinter` and break execution. + +#### Customization + +The default flag may be customized to something other than `-v/--version` by +setting `cli.VersionFlag`, e.g.: + + +``` go +package main + +import ( + "os" + + "github.com/urfave/cli" +) + +func main() { + cli.VersionFlag = cli.BoolFlag{ + Name: "print-version, V", + Usage: "print only the version", + } + + app := cli.NewApp() + app.Name = "partay" + app.Version = "19.99.0" + app.Run(os.Args) +} +``` + +Alternatively, the version printer at `cli.VersionPrinter` may be overridden, e.g.: + + +``` go +package main + +import ( + "fmt" + "os" + + "github.com/urfave/cli" +) + +var ( + Revision = "fafafaf" +) + +func main() { + cli.VersionPrinter = func(c *cli.Context) { + fmt.Printf("version=%s revision=%s\n", c.App.Version, Revision) + } + + app := cli.NewApp() + app.Name = "partay" + app.Version = "19.99.0" + app.Run(os.Args) +} +``` + +#### Full API Example + +**Notice**: This is a contrived (functioning) example meant strictly for API +demonstration purposes. Use of one's imagination is encouraged. + + +``` go +package main + +import ( + "errors" + "flag" + "fmt" + "io" + "io/ioutil" + "os" + "time" + + "github.com/urfave/cli" +) + +func init() { + cli.AppHelpTemplate += "\nCUSTOMIZED: you bet ur muffins\n" + cli.CommandHelpTemplate += "\nYMMV\n" + cli.SubcommandHelpTemplate += "\nor something\n" + + cli.HelpFlag = cli.BoolFlag{Name: "halp"} + cli.BashCompletionFlag = cli.BoolFlag{Name: "compgen", Hidden: true} + cli.VersionFlag = cli.BoolFlag{Name: "print-version, V"} + + cli.HelpPrinter = func(w io.Writer, templ string, data interface{}) { + fmt.Fprintf(w, "best of luck to you\n") + } + cli.VersionPrinter = func(c *cli.Context) { + fmt.Fprintf(c.App.Writer, "version=%s\n", c.App.Version) + } + cli.OsExiter = func(c int) { + fmt.Fprintf(cli.ErrWriter, "refusing to exit %d\n", c) + } + cli.ErrWriter = ioutil.Discard + cli.FlagStringer = func(fl cli.Flag) string { + return fmt.Sprintf("\t\t%s", fl.GetName()) + } +} + +type hexWriter struct{} + +func (w *hexWriter) Write(p []byte) (int, error) { + for _, b := range p { + fmt.Printf("%x", b) + } + fmt.Printf("\n") + + return len(p), nil +} + +type genericType struct{ + s string +} + +func (g *genericType) Set(value string) error { + g.s = value + return nil +} + +func (g *genericType) String() string { + return g.s +} + +func main() { + app := cli.NewApp() + app.Name = "kənˈtrīv" + app.Version = "19.99.0" + app.Compiled = time.Now() + app.Authors = []cli.Author{ + cli.Author{ + Name: "Example Human", + Email: "human@example.com", + }, + } + app.Copyright = "(c) 1999 Serious Enterprise" + app.HelpName = "contrive" + app.Usage = "demonstrate available API" + app.UsageText = "contrive - demonstrating the available API" + app.ArgsUsage = "[args and such]" + app.Commands = []cli.Command{ + cli.Command{ + Name: "doo", + Aliases: []string{"do"}, + Category: "motion", + Usage: "do the doo", + UsageText: "doo - does the dooing", + Description: "no really, there is a lot of dooing to be done", + ArgsUsage: "[arrgh]", + Flags: []cli.Flag{ + cli.BoolFlag{Name: "forever, forevvarr"}, + }, + Subcommands: cli.Commands{ + cli.Command{ + Name: "wop", + Action: wopAction, + }, + }, + SkipFlagParsing: false, + HideHelp: false, + Hidden: false, + HelpName: "doo!", + BashComplete: func(c *cli.Context) { + fmt.Fprintf(c.App.Writer, "--better\n") + }, + Before: func(c *cli.Context) error { + fmt.Fprintf(c.App.Writer, "brace for impact\n") + return nil + }, + After: func(c *cli.Context) error { + fmt.Fprintf(c.App.Writer, "did we lose anyone?\n") + return nil + }, + Action: func(c *cli.Context) error { + c.Command.FullName() + c.Command.HasName("wop") + c.Command.Names() + c.Command.VisibleFlags() + fmt.Fprintf(c.App.Writer, "dodododododoodododddooooododododooo\n") + if c.Bool("forever") { + c.Command.Run(c) + } + return nil + }, + OnUsageError: func(c *cli.Context, err error, isSubcommand bool) error { + fmt.Fprintf(c.App.Writer, "for shame\n") + return err + }, + }, + } + app.Flags = []cli.Flag{ + cli.BoolFlag{Name: "fancy"}, + cli.BoolTFlag{Name: "fancier"}, + cli.DurationFlag{Name: "howlong, H", Value: time.Second * 3}, + cli.Float64Flag{Name: "howmuch"}, + cli.GenericFlag{Name: "wat", Value: &genericType{}}, + cli.Int64Flag{Name: "longdistance"}, + cli.Int64SliceFlag{Name: "intervals"}, + cli.IntFlag{Name: "distance"}, + cli.IntSliceFlag{Name: "times"}, + cli.StringFlag{Name: "dance-move, d"}, + cli.StringSliceFlag{Name: "names, N"}, + cli.UintFlag{Name: "age"}, + cli.Uint64Flag{Name: "bigage"}, + } + app.EnableBashCompletion = true + app.HideHelp = false + app.HideVersion = false + app.BashComplete = func(c *cli.Context) { + fmt.Fprintf(c.App.Writer, "lipstick\nkiss\nme\nlipstick\nringo\n") + } + app.Before = func(c *cli.Context) error { + fmt.Fprintf(c.App.Writer, "HEEEERE GOES\n") + return nil + } + app.After = func(c *cli.Context) error { + fmt.Fprintf(c.App.Writer, "Phew!\n") + return nil + } + app.CommandNotFound = func(c *cli.Context, command string) { + fmt.Fprintf(c.App.Writer, "Thar be no %q here.\n", command) + } + app.OnUsageError = func(c *cli.Context, err error, isSubcommand bool) error { + if isSubcommand { + return err + } + + fmt.Fprintf(c.App.Writer, "WRONG: %#v\n", err) + return nil + } + app.Action = func(c *cli.Context) error { + cli.DefaultAppComplete(c) + cli.HandleExitCoder(errors.New("not an exit coder, though")) + cli.ShowAppHelp(c) + cli.ShowCommandCompletions(c, "nope") + cli.ShowCommandHelp(c, "also-nope") + cli.ShowCompletions(c) + cli.ShowSubcommandHelp(c) + cli.ShowVersion(c) + + categories := c.App.Categories() + categories.AddCommand("sounds", cli.Command{ + Name: "bloop", + }) + + for _, category := range c.App.Categories() { + fmt.Fprintf(c.App.Writer, "%s\n", category.Name) + fmt.Fprintf(c.App.Writer, "%#v\n", category.Commands) + fmt.Fprintf(c.App.Writer, "%#v\n", category.VisibleCommands()) + } + + fmt.Printf("%#v\n", c.App.Command("doo")) + if c.Bool("infinite") { + c.App.Run([]string{"app", "doo", "wop"}) + } + + if c.Bool("forevar") { + c.App.RunAsSubcommand(c) + } + c.App.Setup() + fmt.Printf("%#v\n", c.App.VisibleCategories()) + fmt.Printf("%#v\n", c.App.VisibleCommands()) + fmt.Printf("%#v\n", c.App.VisibleFlags()) + + fmt.Printf("%#v\n", c.Args().First()) + if len(c.Args()) > 0 { + fmt.Printf("%#v\n", c.Args()[1]) + } + fmt.Printf("%#v\n", c.Args().Present()) + fmt.Printf("%#v\n", c.Args().Tail()) + + set := flag.NewFlagSet("contrive", 0) + nc := cli.NewContext(c.App, set, c) + + fmt.Printf("%#v\n", nc.Args()) + fmt.Printf("%#v\n", nc.Bool("nope")) + fmt.Printf("%#v\n", nc.BoolT("nerp")) + fmt.Printf("%#v\n", nc.Duration("howlong")) + fmt.Printf("%#v\n", nc.Float64("hay")) + fmt.Printf("%#v\n", nc.Generic("bloop")) + fmt.Printf("%#v\n", nc.Int64("bonk")) + fmt.Printf("%#v\n", nc.Int64Slice("burnks")) + fmt.Printf("%#v\n", nc.Int("bips")) + fmt.Printf("%#v\n", nc.IntSlice("blups")) + fmt.Printf("%#v\n", nc.String("snurt")) + fmt.Printf("%#v\n", nc.StringSlice("snurkles")) + fmt.Printf("%#v\n", nc.Uint("flub")) + fmt.Printf("%#v\n", nc.Uint64("florb")) + fmt.Printf("%#v\n", nc.GlobalBool("global-nope")) + fmt.Printf("%#v\n", nc.GlobalBoolT("global-nerp")) + fmt.Printf("%#v\n", nc.GlobalDuration("global-howlong")) + fmt.Printf("%#v\n", nc.GlobalFloat64("global-hay")) + fmt.Printf("%#v\n", nc.GlobalGeneric("global-bloop")) + fmt.Printf("%#v\n", nc.GlobalInt("global-bips")) + fmt.Printf("%#v\n", nc.GlobalIntSlice("global-blups")) + fmt.Printf("%#v\n", nc.GlobalString("global-snurt")) + fmt.Printf("%#v\n", nc.GlobalStringSlice("global-snurkles")) + + fmt.Printf("%#v\n", nc.FlagNames()) + fmt.Printf("%#v\n", nc.GlobalFlagNames()) + fmt.Printf("%#v\n", nc.GlobalIsSet("wat")) + fmt.Printf("%#v\n", nc.GlobalSet("wat", "nope")) + fmt.Printf("%#v\n", nc.NArg()) + fmt.Printf("%#v\n", nc.NumFlags()) + fmt.Printf("%#v\n", nc.Parent()) + + nc.Set("wat", "also-nope") + + ec := cli.NewExitError("ohwell", 86) + fmt.Fprintf(c.App.Writer, "%d", ec.ExitCode()) + fmt.Printf("made it!\n") + return ec + } + + if os.Getenv("HEXY") != "" { + app.Writer = &hexWriter{} + app.ErrWriter = &hexWriter{} + } + + app.Metadata = map[string]interface{}{ + "layers": "many", + "explicable": false, + "whatever-values": 19.99, + } + + app.Run(os.Args) +} + +func wopAction(c *cli.Context) error { + fmt.Fprintf(c.App.Writer, ":wave: over here, eh\n") + return nil +} +``` + +## Contribution Guidelines + +Feel free to put up a pull request to fix a bug or maybe add a feature. I will +give it a code review and make sure that it does not break backwards +compatibility. If I or any other collaborators agree that it is in line with +the vision of the project, we will work with you to get the code into +a mergeable state and merge it into the master branch. + +If you have contributed something significant to the project, we will most +likely add you as a collaborator. As a collaborator you are given the ability +to merge others pull requests. It is very important that new code does not +break existing code, so be careful about what code you do choose to merge. + +If you feel like you have contributed to the project but have not yet been +added as a collaborator, we probably forgot to add you, please open an issue. diff --git a/vendor/github.com/urfave/cli/app.go b/vendor/github.com/urfave/cli/app.go new file mode 100644 index 0000000..51fc45d --- /dev/null +++ b/vendor/github.com/urfave/cli/app.go @@ -0,0 +1,497 @@ +package cli + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "sort" + "time" +) + +var ( + changeLogURL = "https://github.com/urfave/cli/blob/master/CHANGELOG.md" + appActionDeprecationURL = fmt.Sprintf("%s#deprecated-cli-app-action-signature", changeLogURL) + runAndExitOnErrorDeprecationURL = fmt.Sprintf("%s#deprecated-cli-app-runandexitonerror", changeLogURL) + + contactSysadmin = "This is an error in the application. Please contact the distributor of this application if this is not you." + + errInvalidActionType = NewExitError("ERROR invalid Action type. "+ + fmt.Sprintf("Must be `func(*Context`)` or `func(*Context) error). %s", contactSysadmin)+ + fmt.Sprintf("See %s", appActionDeprecationURL), 2) +) + +// App is the main structure of a cli application. It is recommended that +// an app be created with the cli.NewApp() function +type App struct { + // The name of the program. Defaults to path.Base(os.Args[0]) + Name string + // Full name of command for help, defaults to Name + HelpName string + // Description of the program. + Usage string + // Text to override the USAGE section of help + UsageText string + // Description of the program argument format. + ArgsUsage string + // Version of the program + Version string + // Description of the program + Description string + // List of commands to execute + Commands []Command + // List of flags to parse + Flags []Flag + // Boolean to enable bash completion commands + EnableBashCompletion bool + // Boolean to hide built-in help command + HideHelp bool + // Boolean to hide built-in version flag and the VERSION section of help + HideVersion bool + // Populate on app startup, only gettable through method Categories() + categories CommandCategories + // An action to execute when the bash-completion flag is set + BashComplete BashCompleteFunc + // An action to execute before any subcommands are run, but after the context is ready + // If a non-nil error is returned, no subcommands are run + Before BeforeFunc + // An action to execute after any subcommands are run, but after the subcommand has finished + // It is run even if Action() panics + After AfterFunc + + // The action to execute when no subcommands are specified + // Expects a `cli.ActionFunc` but will accept the *deprecated* signature of `func(*cli.Context) {}` + // *Note*: support for the deprecated `Action` signature will be removed in a future version + Action interface{} + + // Execute this function if the proper command cannot be found + CommandNotFound CommandNotFoundFunc + // Execute this function if an usage error occurs + OnUsageError OnUsageErrorFunc + // Compilation date + Compiled time.Time + // List of all authors who contributed + Authors []Author + // Copyright of the binary if any + Copyright string + // Name of Author (Note: Use App.Authors, this is deprecated) + Author string + // Email of Author (Note: Use App.Authors, this is deprecated) + Email string + // Writer writer to write output to + Writer io.Writer + // ErrWriter writes error output + ErrWriter io.Writer + // Other custom info + Metadata map[string]interface{} + // Carries a function which returns app specific info. + ExtraInfo func() map[string]string + // CustomAppHelpTemplate the text template for app help topic. + // cli.go uses text/template to render templates. You can + // render custom help text by setting this variable. + CustomAppHelpTemplate string + + didSetup bool +} + +// Tries to find out when this binary was compiled. +// Returns the current time if it fails to find it. +func compileTime() time.Time { + info, err := os.Stat(os.Args[0]) + if err != nil { + return time.Now() + } + return info.ModTime() +} + +// NewApp creates a new cli Application with some reasonable defaults for Name, +// Usage, Version and Action. +func NewApp() *App { + return &App{ + Name: filepath.Base(os.Args[0]), + HelpName: filepath.Base(os.Args[0]), + Usage: "A new cli application", + UsageText: "", + Version: "0.0.0", + BashComplete: DefaultAppComplete, + Action: helpCommand.Action, + Compiled: compileTime(), + Writer: os.Stdout, + } +} + +// Setup runs initialization code to ensure all data structures are ready for +// `Run` or inspection prior to `Run`. It is internally called by `Run`, but +// will return early if setup has already happened. +func (a *App) Setup() { + if a.didSetup { + return + } + + a.didSetup = true + + if a.Author != "" || a.Email != "" { + a.Authors = append(a.Authors, Author{Name: a.Author, Email: a.Email}) + } + + newCmds := []Command{} + for _, c := range a.Commands { + if c.HelpName == "" { + c.HelpName = fmt.Sprintf("%s %s", a.HelpName, c.Name) + } + newCmds = append(newCmds, c) + } + a.Commands = newCmds + + if a.Command(helpCommand.Name) == nil && !a.HideHelp { + a.Commands = append(a.Commands, helpCommand) + if (HelpFlag != BoolFlag{}) { + a.appendFlag(HelpFlag) + } + } + + if !a.HideVersion { + a.appendFlag(VersionFlag) + } + + a.categories = CommandCategories{} + for _, command := range a.Commands { + a.categories = a.categories.AddCommand(command.Category, command) + } + sort.Sort(a.categories) + + if a.Metadata == nil { + a.Metadata = make(map[string]interface{}) + } + + if a.Writer == nil { + a.Writer = os.Stdout + } +} + +// Run is the entry point to the cli app. Parses the arguments slice and routes +// to the proper flag/args combination +func (a *App) Run(arguments []string) (err error) { + a.Setup() + + // handle the completion flag separately from the flagset since + // completion could be attempted after a flag, but before its value was put + // on the command line. this causes the flagset to interpret the completion + // flag name as the value of the flag before it which is undesirable + // note that we can only do this because the shell autocomplete function + // always appends the completion flag at the end of the command + shellComplete, arguments := checkShellCompleteFlag(a, arguments) + + // parse flags + set, err := flagSet(a.Name, a.Flags) + if err != nil { + return err + } + + set.SetOutput(ioutil.Discard) + err = set.Parse(arguments[1:]) + nerr := normalizeFlags(a.Flags, set) + context := NewContext(a, set, nil) + if nerr != nil { + fmt.Fprintln(a.Writer, nerr) + ShowAppHelp(context) + return nerr + } + context.shellComplete = shellComplete + + if checkCompletions(context) { + return nil + } + + if err != nil { + if a.OnUsageError != nil { + err := a.OnUsageError(context, err, false) + HandleExitCoder(err) + return err + } + fmt.Fprintf(a.Writer, "%s %s\n\n", "Incorrect Usage.", err.Error()) + ShowAppHelp(context) + return err + } + + if !a.HideHelp && checkHelp(context) { + ShowAppHelp(context) + return nil + } + + if !a.HideVersion && checkVersion(context) { + ShowVersion(context) + return nil + } + + if a.After != nil { + defer func() { + if afterErr := a.After(context); afterErr != nil { + if err != nil { + err = NewMultiError(err, afterErr) + } else { + err = afterErr + } + } + }() + } + + if a.Before != nil { + beforeErr := a.Before(context) + if beforeErr != nil { + ShowAppHelp(context) + HandleExitCoder(beforeErr) + err = beforeErr + return err + } + } + + args := context.Args() + if args.Present() { + name := args.First() + c := a.Command(name) + if c != nil { + return c.Run(context) + } + } + + if a.Action == nil { + a.Action = helpCommand.Action + } + + // Run default Action + err = HandleAction(a.Action, context) + + HandleExitCoder(err) + return err +} + +// RunAndExitOnError calls .Run() and exits non-zero if an error was returned +// +// Deprecated: instead you should return an error that fulfills cli.ExitCoder +// to cli.App.Run. This will cause the application to exit with the given eror +// code in the cli.ExitCoder +func (a *App) RunAndExitOnError() { + if err := a.Run(os.Args); err != nil { + fmt.Fprintln(a.errWriter(), err) + OsExiter(1) + } +} + +// RunAsSubcommand invokes the subcommand given the context, parses ctx.Args() to +// generate command-specific flags +func (a *App) RunAsSubcommand(ctx *Context) (err error) { + // append help to commands + if len(a.Commands) > 0 { + if a.Command(helpCommand.Name) == nil && !a.HideHelp { + a.Commands = append(a.Commands, helpCommand) + if (HelpFlag != BoolFlag{}) { + a.appendFlag(HelpFlag) + } + } + } + + newCmds := []Command{} + for _, c := range a.Commands { + if c.HelpName == "" { + c.HelpName = fmt.Sprintf("%s %s", a.HelpName, c.Name) + } + newCmds = append(newCmds, c) + } + a.Commands = newCmds + + // parse flags + set, err := flagSet(a.Name, a.Flags) + if err != nil { + return err + } + + set.SetOutput(ioutil.Discard) + err = set.Parse(ctx.Args().Tail()) + nerr := normalizeFlags(a.Flags, set) + context := NewContext(a, set, ctx) + + if nerr != nil { + fmt.Fprintln(a.Writer, nerr) + fmt.Fprintln(a.Writer) + if len(a.Commands) > 0 { + ShowSubcommandHelp(context) + } else { + ShowCommandHelp(ctx, context.Args().First()) + } + return nerr + } + + if checkCompletions(context) { + return nil + } + + if err != nil { + if a.OnUsageError != nil { + err = a.OnUsageError(context, err, true) + HandleExitCoder(err) + return err + } + fmt.Fprintf(a.Writer, "%s %s\n\n", "Incorrect Usage.", err.Error()) + ShowSubcommandHelp(context) + return err + } + + if len(a.Commands) > 0 { + if checkSubcommandHelp(context) { + return nil + } + } else { + if checkCommandHelp(ctx, context.Args().First()) { + return nil + } + } + + if a.After != nil { + defer func() { + afterErr := a.After(context) + if afterErr != nil { + HandleExitCoder(err) + if err != nil { + err = NewMultiError(err, afterErr) + } else { + err = afterErr + } + } + }() + } + + if a.Before != nil { + beforeErr := a.Before(context) + if beforeErr != nil { + HandleExitCoder(beforeErr) + err = beforeErr + return err + } + } + + args := context.Args() + if args.Present() { + name := args.First() + c := a.Command(name) + if c != nil { + return c.Run(context) + } + } + + // Run default Action + err = HandleAction(a.Action, context) + + HandleExitCoder(err) + return err +} + +// Command returns the named command on App. Returns nil if the command does not exist +func (a *App) Command(name string) *Command { + for _, c := range a.Commands { + if c.HasName(name) { + return &c + } + } + + return nil +} + +// Categories returns a slice containing all the categories with the commands they contain +func (a *App) Categories() CommandCategories { + return a.categories +} + +// VisibleCategories returns a slice of categories and commands that are +// Hidden=false +func (a *App) VisibleCategories() []*CommandCategory { + ret := []*CommandCategory{} + for _, category := range a.categories { + if visible := func() *CommandCategory { + for _, command := range category.Commands { + if !command.Hidden { + return category + } + } + return nil + }(); visible != nil { + ret = append(ret, visible) + } + } + return ret +} + +// VisibleCommands returns a slice of the Commands with Hidden=false +func (a *App) VisibleCommands() []Command { + ret := []Command{} + for _, command := range a.Commands { + if !command.Hidden { + ret = append(ret, command) + } + } + return ret +} + +// VisibleFlags returns a slice of the Flags with Hidden=false +func (a *App) VisibleFlags() []Flag { + return visibleFlags(a.Flags) +} + +func (a *App) hasFlag(flag Flag) bool { + for _, f := range a.Flags { + if flag == f { + return true + } + } + + return false +} + +func (a *App) errWriter() io.Writer { + + // When the app ErrWriter is nil use the package level one. + if a.ErrWriter == nil { + return ErrWriter + } + + return a.ErrWriter +} + +func (a *App) appendFlag(flag Flag) { + if !a.hasFlag(flag) { + a.Flags = append(a.Flags, flag) + } +} + +// Author represents someone who has contributed to a cli project. +type Author struct { + Name string // The Authors name + Email string // The Authors email +} + +// String makes Author comply to the Stringer interface, to allow an easy print in the templating process +func (a Author) String() string { + e := "" + if a.Email != "" { + e = " <" + a.Email + ">" + } + + return fmt.Sprintf("%v%v", a.Name, e) +} + +// HandleAction attempts to figure out which Action signature was used. If +// it's an ActionFunc or a func with the legacy signature for Action, the func +// is run! +func HandleAction(action interface{}, context *Context) (err error) { + if a, ok := action.(ActionFunc); ok { + return a(context) + } else if a, ok := action.(func(*Context) error); ok { + return a(context) + } else if a, ok := action.(func(*Context)); ok { // deprecated function signature + a(context) + return nil + } else { + return errInvalidActionType + } +} diff --git a/vendor/github.com/urfave/cli/category.go b/vendor/github.com/urfave/cli/category.go new file mode 100644 index 0000000..1a60550 --- /dev/null +++ b/vendor/github.com/urfave/cli/category.go @@ -0,0 +1,44 @@ +package cli + +// CommandCategories is a slice of *CommandCategory. +type CommandCategories []*CommandCategory + +// CommandCategory is a category containing commands. +type CommandCategory struct { + Name string + Commands Commands +} + +func (c CommandCategories) Less(i, j int) bool { + return c[i].Name < c[j].Name +} + +func (c CommandCategories) Len() int { + return len(c) +} + +func (c CommandCategories) Swap(i, j int) { + c[i], c[j] = c[j], c[i] +} + +// AddCommand adds a command to a category. +func (c CommandCategories) AddCommand(category string, command Command) CommandCategories { + for _, commandCategory := range c { + if commandCategory.Name == category { + commandCategory.Commands = append(commandCategory.Commands, command) + return c + } + } + return append(c, &CommandCategory{Name: category, Commands: []Command{command}}) +} + +// VisibleCommands returns a slice of the Commands with Hidden=false +func (c *CommandCategory) VisibleCommands() []Command { + ret := []Command{} + for _, command := range c.Commands { + if !command.Hidden { + ret = append(ret, command) + } + } + return ret +} diff --git a/vendor/github.com/urfave/cli/cli.go b/vendor/github.com/urfave/cli/cli.go new file mode 100644 index 0000000..90c07eb --- /dev/null +++ b/vendor/github.com/urfave/cli/cli.go @@ -0,0 +1,22 @@ +// Package cli provides a minimal framework for creating and organizing command line +// Go applications. cli is designed to be easy to understand and write, the most simple +// cli application can be written as follows: +// func main() { +// cli.NewApp().Run(os.Args) +// } +// +// Of course this application does not do much, so let's make this an actual application: +// func main() { +// app := cli.NewApp() +// app.Name = "greet" +// app.Usage = "say a greeting" +// app.Action = func(c *cli.Context) error { +// println("Greetings") +// return nil +// } +// +// app.Run(os.Args) +// } +package cli + +//go:generate python ./generate-flag-types cli -i flag-types.json -o flag_generated.go diff --git a/vendor/github.com/urfave/cli/command.go b/vendor/github.com/urfave/cli/command.go new file mode 100644 index 0000000..23de294 --- /dev/null +++ b/vendor/github.com/urfave/cli/command.go @@ -0,0 +1,304 @@ +package cli + +import ( + "fmt" + "io/ioutil" + "sort" + "strings" +) + +// Command is a subcommand for a cli.App. +type Command struct { + // The name of the command + Name string + // short name of the command. Typically one character (deprecated, use `Aliases`) + ShortName string + // A list of aliases for the command + Aliases []string + // A short description of the usage of this command + Usage string + // Custom text to show on USAGE section of help + UsageText string + // A longer explanation of how the command works + Description string + // A short description of the arguments of this command + ArgsUsage string + // The category the command is part of + Category string + // The function to call when checking for bash command completions + BashComplete BashCompleteFunc + // An action to execute before any sub-subcommands are run, but after the context is ready + // If a non-nil error is returned, no sub-subcommands are run + Before BeforeFunc + // An action to execute after any subcommands are run, but after the subcommand has finished + // It is run even if Action() panics + After AfterFunc + // The function to call when this command is invoked + Action interface{} + // TODO: replace `Action: interface{}` with `Action: ActionFunc` once some kind + // of deprecation period has passed, maybe? + + // Execute this function if a usage error occurs. + OnUsageError OnUsageErrorFunc + // List of child commands + Subcommands Commands + // List of flags to parse + Flags []Flag + // Treat all flags as normal arguments if true + SkipFlagParsing bool + // Skip argument reordering which attempts to move flags before arguments, + // but only works if all flags appear after all arguments. This behavior was + // removed n version 2 since it only works under specific conditions so we + // backport here by exposing it as an option for compatibility. + SkipArgReorder bool + // Boolean to hide built-in help command + HideHelp bool + // Boolean to hide this command from help or completion + Hidden bool + + // Full name of command for help, defaults to full command name, including parent commands. + HelpName string + commandNamePath []string + + // CustomHelpTemplate the text template for the command help topic. + // cli.go uses text/template to render templates. You can + // render custom help text by setting this variable. + CustomHelpTemplate string +} + +type CommandsByName []Command + +func (c CommandsByName) Len() int { + return len(c) +} + +func (c CommandsByName) Less(i, j int) bool { + return c[i].Name < c[j].Name +} + +func (c CommandsByName) Swap(i, j int) { + c[i], c[j] = c[j], c[i] +} + +// FullName returns the full name of the command. +// For subcommands this ensures that parent commands are part of the command path +func (c Command) FullName() string { + if c.commandNamePath == nil { + return c.Name + } + return strings.Join(c.commandNamePath, " ") +} + +// Commands is a slice of Command +type Commands []Command + +// Run invokes the command given the context, parses ctx.Args() to generate command-specific flags +func (c Command) Run(ctx *Context) (err error) { + if len(c.Subcommands) > 0 { + return c.startApp(ctx) + } + + if !c.HideHelp && (HelpFlag != BoolFlag{}) { + // append help to flags + c.Flags = append( + c.Flags, + HelpFlag, + ) + } + + set, err := flagSet(c.Name, c.Flags) + if err != nil { + return err + } + set.SetOutput(ioutil.Discard) + + if c.SkipFlagParsing { + err = set.Parse(append([]string{"--"}, ctx.Args().Tail()...)) + } else if !c.SkipArgReorder { + firstFlagIndex := -1 + terminatorIndex := -1 + for index, arg := range ctx.Args() { + if arg == "--" { + terminatorIndex = index + break + } else if arg == "-" { + // Do nothing. A dash alone is not really a flag. + continue + } else if strings.HasPrefix(arg, "-") && firstFlagIndex == -1 { + firstFlagIndex = index + } + } + + if firstFlagIndex > -1 { + args := ctx.Args() + regularArgs := make([]string, len(args[1:firstFlagIndex])) + copy(regularArgs, args[1:firstFlagIndex]) + + var flagArgs []string + if terminatorIndex > -1 { + flagArgs = args[firstFlagIndex:terminatorIndex] + regularArgs = append(regularArgs, args[terminatorIndex:]...) + } else { + flagArgs = args[firstFlagIndex:] + } + + err = set.Parse(append(flagArgs, regularArgs...)) + } else { + err = set.Parse(ctx.Args().Tail()) + } + } else { + err = set.Parse(ctx.Args().Tail()) + } + + nerr := normalizeFlags(c.Flags, set) + if nerr != nil { + fmt.Fprintln(ctx.App.Writer, nerr) + fmt.Fprintln(ctx.App.Writer) + ShowCommandHelp(ctx, c.Name) + return nerr + } + + context := NewContext(ctx.App, set, ctx) + context.Command = c + if checkCommandCompletions(context, c.Name) { + return nil + } + + if err != nil { + if c.OnUsageError != nil { + err := c.OnUsageError(context, err, false) + HandleExitCoder(err) + return err + } + fmt.Fprintln(context.App.Writer, "Incorrect Usage:", err.Error()) + fmt.Fprintln(context.App.Writer) + ShowCommandHelp(context, c.Name) + return err + } + + if checkCommandHelp(context, c.Name) { + return nil + } + + if c.After != nil { + defer func() { + afterErr := c.After(context) + if afterErr != nil { + HandleExitCoder(err) + if err != nil { + err = NewMultiError(err, afterErr) + } else { + err = afterErr + } + } + }() + } + + if c.Before != nil { + err = c.Before(context) + if err != nil { + ShowCommandHelp(context, c.Name) + HandleExitCoder(err) + return err + } + } + + if c.Action == nil { + c.Action = helpSubcommand.Action + } + + err = HandleAction(c.Action, context) + + if err != nil { + HandleExitCoder(err) + } + return err +} + +// Names returns the names including short names and aliases. +func (c Command) Names() []string { + names := []string{c.Name} + + if c.ShortName != "" { + names = append(names, c.ShortName) + } + + return append(names, c.Aliases...) +} + +// HasName returns true if Command.Name or Command.ShortName matches given name +func (c Command) HasName(name string) bool { + for _, n := range c.Names() { + if n == name { + return true + } + } + return false +} + +func (c Command) startApp(ctx *Context) error { + app := NewApp() + app.Metadata = ctx.App.Metadata + // set the name and usage + app.Name = fmt.Sprintf("%s %s", ctx.App.Name, c.Name) + if c.HelpName == "" { + app.HelpName = c.HelpName + } else { + app.HelpName = app.Name + } + + app.Usage = c.Usage + app.Description = c.Description + app.ArgsUsage = c.ArgsUsage + + // set CommandNotFound + app.CommandNotFound = ctx.App.CommandNotFound + app.CustomAppHelpTemplate = c.CustomHelpTemplate + + // set the flags and commands + app.Commands = c.Subcommands + app.Flags = c.Flags + app.HideHelp = c.HideHelp + + app.Version = ctx.App.Version + app.HideVersion = ctx.App.HideVersion + app.Compiled = ctx.App.Compiled + app.Author = ctx.App.Author + app.Email = ctx.App.Email + app.Writer = ctx.App.Writer + app.ErrWriter = ctx.App.ErrWriter + + app.categories = CommandCategories{} + for _, command := range c.Subcommands { + app.categories = app.categories.AddCommand(command.Category, command) + } + + sort.Sort(app.categories) + + // bash completion + app.EnableBashCompletion = ctx.App.EnableBashCompletion + if c.BashComplete != nil { + app.BashComplete = c.BashComplete + } + + // set the actions + app.Before = c.Before + app.After = c.After + if c.Action != nil { + app.Action = c.Action + } else { + app.Action = helpSubcommand.Action + } + app.OnUsageError = c.OnUsageError + + for index, cc := range app.Commands { + app.Commands[index].commandNamePath = []string{c.Name, cc.Name} + } + + return app.RunAsSubcommand(ctx) +} + +// VisibleFlags returns a slice of the Flags with Hidden=false +func (c Command) VisibleFlags() []Flag { + return visibleFlags(c.Flags) +} diff --git a/vendor/github.com/urfave/cli/context.go b/vendor/github.com/urfave/cli/context.go new file mode 100644 index 0000000..db94191 --- /dev/null +++ b/vendor/github.com/urfave/cli/context.go @@ -0,0 +1,278 @@ +package cli + +import ( + "errors" + "flag" + "reflect" + "strings" + "syscall" +) + +// Context is a type that is passed through to +// each Handler action in a cli application. Context +// can be used to retrieve context-specific Args and +// parsed command-line options. +type Context struct { + App *App + Command Command + shellComplete bool + flagSet *flag.FlagSet + setFlags map[string]bool + parentContext *Context +} + +// NewContext creates a new context. For use in when invoking an App or Command action. +func NewContext(app *App, set *flag.FlagSet, parentCtx *Context) *Context { + c := &Context{App: app, flagSet: set, parentContext: parentCtx} + + if parentCtx != nil { + c.shellComplete = parentCtx.shellComplete + } + + return c +} + +// NumFlags returns the number of flags set +func (c *Context) NumFlags() int { + return c.flagSet.NFlag() +} + +// Set sets a context flag to a value. +func (c *Context) Set(name, value string) error { + c.setFlags = nil + return c.flagSet.Set(name, value) +} + +// GlobalSet sets a context flag to a value on the global flagset +func (c *Context) GlobalSet(name, value string) error { + globalContext(c).setFlags = nil + return globalContext(c).flagSet.Set(name, value) +} + +// IsSet determines if the flag was actually set +func (c *Context) IsSet(name string) bool { + if c.setFlags == nil { + c.setFlags = make(map[string]bool) + + c.flagSet.Visit(func(f *flag.Flag) { + c.setFlags[f.Name] = true + }) + + c.flagSet.VisitAll(func(f *flag.Flag) { + if _, ok := c.setFlags[f.Name]; ok { + return + } + c.setFlags[f.Name] = false + }) + + // XXX hack to support IsSet for flags with EnvVar + // + // There isn't an easy way to do this with the current implementation since + // whether a flag was set via an environment variable is very difficult to + // determine here. Instead, we intend to introduce a backwards incompatible + // change in version 2 to add `IsSet` to the Flag interface to push the + // responsibility closer to where the information required to determine + // whether a flag is set by non-standard means such as environment + // variables is avaliable. + // + // See https://github.com/urfave/cli/issues/294 for additional discussion + flags := c.Command.Flags + if c.Command.Name == "" { // cannot == Command{} since it contains slice types + if c.App != nil { + flags = c.App.Flags + } + } + for _, f := range flags { + eachName(f.GetName(), func(name string) { + if isSet, ok := c.setFlags[name]; isSet || !ok { + return + } + + val := reflect.ValueOf(f) + if val.Kind() == reflect.Ptr { + val = val.Elem() + } + + envVarValue := val.FieldByName("EnvVar") + if !envVarValue.IsValid() { + return + } + + eachName(envVarValue.String(), func(envVar string) { + envVar = strings.TrimSpace(envVar) + if _, ok := syscall.Getenv(envVar); ok { + c.setFlags[name] = true + return + } + }) + }) + } + } + + return c.setFlags[name] +} + +// GlobalIsSet determines if the global flag was actually set +func (c *Context) GlobalIsSet(name string) bool { + ctx := c + if ctx.parentContext != nil { + ctx = ctx.parentContext + } + + for ; ctx != nil; ctx = ctx.parentContext { + if ctx.IsSet(name) { + return true + } + } + return false +} + +// FlagNames returns a slice of flag names used in this context. +func (c *Context) FlagNames() (names []string) { + for _, flag := range c.Command.Flags { + name := strings.Split(flag.GetName(), ",")[0] + if name == "help" { + continue + } + names = append(names, name) + } + return +} + +// GlobalFlagNames returns a slice of global flag names used by the app. +func (c *Context) GlobalFlagNames() (names []string) { + for _, flag := range c.App.Flags { + name := strings.Split(flag.GetName(), ",")[0] + if name == "help" || name == "version" { + continue + } + names = append(names, name) + } + return +} + +// Parent returns the parent context, if any +func (c *Context) Parent() *Context { + return c.parentContext +} + +// value returns the value of the flag coressponding to `name` +func (c *Context) value(name string) interface{} { + return c.flagSet.Lookup(name).Value.(flag.Getter).Get() +} + +// Args contains apps console arguments +type Args []string + +// Args returns the command line arguments associated with the context. +func (c *Context) Args() Args { + args := Args(c.flagSet.Args()) + return args +} + +// NArg returns the number of the command line arguments. +func (c *Context) NArg() int { + return len(c.Args()) +} + +// Get returns the nth argument, or else a blank string +func (a Args) Get(n int) string { + if len(a) > n { + return a[n] + } + return "" +} + +// First returns the first argument, or else a blank string +func (a Args) First() string { + return a.Get(0) +} + +// Tail returns the rest of the arguments (not the first one) +// or else an empty string slice +func (a Args) Tail() []string { + if len(a) >= 2 { + return []string(a)[1:] + } + return []string{} +} + +// Present checks if there are any arguments present +func (a Args) Present() bool { + return len(a) != 0 +} + +// Swap swaps arguments at the given indexes +func (a Args) Swap(from, to int) error { + if from >= len(a) || to >= len(a) { + return errors.New("index out of range") + } + a[from], a[to] = a[to], a[from] + return nil +} + +func globalContext(ctx *Context) *Context { + if ctx == nil { + return nil + } + + for { + if ctx.parentContext == nil { + return ctx + } + ctx = ctx.parentContext + } +} + +func lookupGlobalFlagSet(name string, ctx *Context) *flag.FlagSet { + if ctx.parentContext != nil { + ctx = ctx.parentContext + } + for ; ctx != nil; ctx = ctx.parentContext { + if f := ctx.flagSet.Lookup(name); f != nil { + return ctx.flagSet + } + } + return nil +} + +func copyFlag(name string, ff *flag.Flag, set *flag.FlagSet) { + switch ff.Value.(type) { + case *StringSlice: + default: + set.Set(name, ff.Value.String()) + } +} + +func normalizeFlags(flags []Flag, set *flag.FlagSet) error { + visited := make(map[string]bool) + set.Visit(func(f *flag.Flag) { + visited[f.Name] = true + }) + for _, f := range flags { + parts := strings.Split(f.GetName(), ",") + if len(parts) == 1 { + continue + } + var ff *flag.Flag + for _, name := range parts { + name = strings.Trim(name, " ") + if visited[name] { + if ff != nil { + return errors.New("Cannot use two forms of the same flag: " + name + " " + ff.Name) + } + ff = set.Lookup(name) + } + } + if ff == nil { + continue + } + for _, name := range parts { + name = strings.Trim(name, " ") + if !visited[name] { + copyFlag(name, ff, set) + } + } + } + return nil +} diff --git a/vendor/github.com/urfave/cli/errors.go b/vendor/github.com/urfave/cli/errors.go new file mode 100644 index 0000000..562b295 --- /dev/null +++ b/vendor/github.com/urfave/cli/errors.go @@ -0,0 +1,115 @@ +package cli + +import ( + "fmt" + "io" + "os" + "strings" +) + +// OsExiter is the function used when the app exits. If not set defaults to os.Exit. +var OsExiter = os.Exit + +// ErrWriter is used to write errors to the user. This can be anything +// implementing the io.Writer interface and defaults to os.Stderr. +var ErrWriter io.Writer = os.Stderr + +// MultiError is an error that wraps multiple errors. +type MultiError struct { + Errors []error +} + +// NewMultiError creates a new MultiError. Pass in one or more errors. +func NewMultiError(err ...error) MultiError { + return MultiError{Errors: err} +} + +// Error implements the error interface. +func (m MultiError) Error() string { + errs := make([]string, len(m.Errors)) + for i, err := range m.Errors { + errs[i] = err.Error() + } + + return strings.Join(errs, "\n") +} + +type ErrorFormatter interface { + Format(s fmt.State, verb rune) +} + +// ExitCoder is the interface checked by `App` and `Command` for a custom exit +// code +type ExitCoder interface { + error + ExitCode() int +} + +// ExitError fulfills both the builtin `error` interface and `ExitCoder` +type ExitError struct { + exitCode int + message interface{} +} + +// NewExitError makes a new *ExitError +func NewExitError(message interface{}, exitCode int) *ExitError { + return &ExitError{ + exitCode: exitCode, + message: message, + } +} + +// Error returns the string message, fulfilling the interface required by +// `error` +func (ee *ExitError) Error() string { + return fmt.Sprintf("%v", ee.message) +} + +// ExitCode returns the exit code, fulfilling the interface required by +// `ExitCoder` +func (ee *ExitError) ExitCode() int { + return ee.exitCode +} + +// HandleExitCoder checks if the error fulfills the ExitCoder interface, and if +// so prints the error to stderr (if it is non-empty) and calls OsExiter with the +// given exit code. If the given error is a MultiError, then this func is +// called on all members of the Errors slice and calls OsExiter with the last exit code. +func HandleExitCoder(err error) { + if err == nil { + return + } + + if exitErr, ok := err.(ExitCoder); ok { + if err.Error() != "" { + if _, ok := exitErr.(ErrorFormatter); ok { + fmt.Fprintf(ErrWriter, "%+v\n", err) + } else { + fmt.Fprintln(ErrWriter, err) + } + } + OsExiter(exitErr.ExitCode()) + return + } + + if multiErr, ok := err.(MultiError); ok { + code := handleMultiError(multiErr) + OsExiter(code) + return + } +} + +func handleMultiError(multiErr MultiError) int { + code := 1 + for _, merr := range multiErr.Errors { + if multiErr2, ok := merr.(MultiError); ok { + code = handleMultiError(multiErr2) + } else { + fmt.Fprintln(ErrWriter, merr) + if exitErr, ok := merr.(ExitCoder); ok { + code = exitErr.ExitCode() + } + } + } + return code +} diff --git a/vendor/github.com/urfave/cli/flag.go b/vendor/github.com/urfave/cli/flag.go new file mode 100644 index 0000000..877ff35 --- /dev/null +++ b/vendor/github.com/urfave/cli/flag.go @@ -0,0 +1,799 @@ +package cli + +import ( + "flag" + "fmt" + "reflect" + "runtime" + "strconv" + "strings" + "syscall" + "time" +) + +const defaultPlaceholder = "value" + +// BashCompletionFlag enables bash-completion for all commands and subcommands +var BashCompletionFlag Flag = BoolFlag{ + Name: "generate-bash-completion", + Hidden: true, +} + +// VersionFlag prints the version for the application +var VersionFlag Flag = BoolFlag{ + Name: "version, v", + Usage: "print the version", +} + +// HelpFlag prints the help for all commands and subcommands +// Set to the zero value (BoolFlag{}) to disable flag -- keeps subcommand +// unless HideHelp is set to true) +var HelpFlag Flag = BoolFlag{ + Name: "help, h", + Usage: "show help", +} + +// FlagStringer converts a flag definition to a string. This is used by help +// to display a flag. +var FlagStringer FlagStringFunc = stringifyFlag + +// FlagsByName is a slice of Flag. +type FlagsByName []Flag + +func (f FlagsByName) Len() int { + return len(f) +} + +func (f FlagsByName) Less(i, j int) bool { + return f[i].GetName() < f[j].GetName() +} + +func (f FlagsByName) Swap(i, j int) { + f[i], f[j] = f[j], f[i] +} + +// Flag is a common interface related to parsing flags in cli. +// For more advanced flag parsing techniques, it is recommended that +// this interface be implemented. +type Flag interface { + fmt.Stringer + // Apply Flag settings to the given flag set + Apply(*flag.FlagSet) + GetName() string +} + +// errorableFlag is an interface that allows us to return errors during apply +// it allows flags defined in this library to return errors in a fashion backwards compatible +// TODO remove in v2 and modify the existing Flag interface to return errors +type errorableFlag interface { + Flag + + ApplyWithError(*flag.FlagSet) error +} + +func flagSet(name string, flags []Flag) (*flag.FlagSet, error) { + set := flag.NewFlagSet(name, flag.ContinueOnError) + + for _, f := range flags { + //TODO remove in v2 when errorableFlag is removed + if ef, ok := f.(errorableFlag); ok { + if err := ef.ApplyWithError(set); err != nil { + return nil, err + } + } else { + f.Apply(set) + } + } + return set, nil +} + +func eachName(longName string, fn func(string)) { + parts := strings.Split(longName, ",") + for _, name := range parts { + name = strings.Trim(name, " ") + fn(name) + } +} + +// Generic is a generic parseable type identified by a specific flag +type Generic interface { + Set(value string) error + String() string +} + +// Apply takes the flagset and calls Set on the generic flag with the value +// provided by the user for parsing by the flag +// Ignores parsing errors +func (f GenericFlag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError takes the flagset and calls Set on the generic flag with the value +// provided by the user for parsing by the flag +func (f GenericFlag) ApplyWithError(set *flag.FlagSet) error { + val := f.Value + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + if err := val.Set(envVal); err != nil { + return fmt.Errorf("could not parse %s as value for flag %s: %s", envVal, f.Name, err) + } + break + } + } + } + + eachName(f.Name, func(name string) { + set.Var(f.Value, name, f.Usage) + }) + + return nil +} + +// StringSlice is an opaque type for []string to satisfy flag.Value and flag.Getter +type StringSlice []string + +// Set appends the string value to the list of values +func (f *StringSlice) Set(value string) error { + *f = append(*f, value) + return nil +} + +// String returns a readable representation of this value (for usage defaults) +func (f *StringSlice) String() string { + return fmt.Sprintf("%s", *f) +} + +// Value returns the slice of strings set by this flag +func (f *StringSlice) Value() []string { + return *f +} + +// Get returns the slice of strings set by this flag +func (f *StringSlice) Get() interface{} { + return *f +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f StringSliceFlag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f StringSliceFlag) ApplyWithError(set *flag.FlagSet) error { + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + newVal := &StringSlice{} + for _, s := range strings.Split(envVal, ",") { + s = strings.TrimSpace(s) + if err := newVal.Set(s); err != nil { + return fmt.Errorf("could not parse %s as string value for flag %s: %s", envVal, f.Name, err) + } + } + f.Value = newVal + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Value == nil { + f.Value = &StringSlice{} + } + set.Var(f.Value, name, f.Usage) + }) + + return nil +} + +// IntSlice is an opaque type for []int to satisfy flag.Value and flag.Getter +type IntSlice []int + +// Set parses the value into an integer and appends it to the list of values +func (f *IntSlice) Set(value string) error { + tmp, err := strconv.Atoi(value) + if err != nil { + return err + } + *f = append(*f, tmp) + return nil +} + +// String returns a readable representation of this value (for usage defaults) +func (f *IntSlice) String() string { + return fmt.Sprintf("%#v", *f) +} + +// Value returns the slice of ints set by this flag +func (f *IntSlice) Value() []int { + return *f +} + +// Get returns the slice of ints set by this flag +func (f *IntSlice) Get() interface{} { + return *f +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f IntSliceFlag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f IntSliceFlag) ApplyWithError(set *flag.FlagSet) error { + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + newVal := &IntSlice{} + for _, s := range strings.Split(envVal, ",") { + s = strings.TrimSpace(s) + if err := newVal.Set(s); err != nil { + return fmt.Errorf("could not parse %s as int slice value for flag %s: %s", envVal, f.Name, err) + } + } + f.Value = newVal + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Value == nil { + f.Value = &IntSlice{} + } + set.Var(f.Value, name, f.Usage) + }) + + return nil +} + +// Int64Slice is an opaque type for []int to satisfy flag.Value and flag.Getter +type Int64Slice []int64 + +// Set parses the value into an integer and appends it to the list of values +func (f *Int64Slice) Set(value string) error { + tmp, err := strconv.ParseInt(value, 10, 64) + if err != nil { + return err + } + *f = append(*f, tmp) + return nil +} + +// String returns a readable representation of this value (for usage defaults) +func (f *Int64Slice) String() string { + return fmt.Sprintf("%#v", *f) +} + +// Value returns the slice of ints set by this flag +func (f *Int64Slice) Value() []int64 { + return *f +} + +// Get returns the slice of ints set by this flag +func (f *Int64Slice) Get() interface{} { + return *f +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f Int64SliceFlag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f Int64SliceFlag) ApplyWithError(set *flag.FlagSet) error { + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + newVal := &Int64Slice{} + for _, s := range strings.Split(envVal, ",") { + s = strings.TrimSpace(s) + if err := newVal.Set(s); err != nil { + return fmt.Errorf("could not parse %s as int64 slice value for flag %s: %s", envVal, f.Name, err) + } + } + f.Value = newVal + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Value == nil { + f.Value = &Int64Slice{} + } + set.Var(f.Value, name, f.Usage) + }) + return nil +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f BoolFlag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f BoolFlag) ApplyWithError(set *flag.FlagSet) error { + val := false + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + if envVal == "" { + val = false + break + } + + envValBool, err := strconv.ParseBool(envVal) + if err != nil { + return fmt.Errorf("could not parse %s as bool value for flag %s: %s", envVal, f.Name, err) + } + + val = envValBool + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Destination != nil { + set.BoolVar(f.Destination, name, val, f.Usage) + return + } + set.Bool(name, val, f.Usage) + }) + + return nil +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f BoolTFlag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f BoolTFlag) ApplyWithError(set *flag.FlagSet) error { + val := true + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + if envVal == "" { + val = false + break + } + + envValBool, err := strconv.ParseBool(envVal) + if err != nil { + return fmt.Errorf("could not parse %s as bool value for flag %s: %s", envVal, f.Name, err) + } + + val = envValBool + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Destination != nil { + set.BoolVar(f.Destination, name, val, f.Usage) + return + } + set.Bool(name, val, f.Usage) + }) + + return nil +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f StringFlag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f StringFlag) ApplyWithError(set *flag.FlagSet) error { + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + f.Value = envVal + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Destination != nil { + set.StringVar(f.Destination, name, f.Value, f.Usage) + return + } + set.String(name, f.Value, f.Usage) + }) + + return nil +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f IntFlag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f IntFlag) ApplyWithError(set *flag.FlagSet) error { + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + envValInt, err := strconv.ParseInt(envVal, 0, 64) + if err != nil { + return fmt.Errorf("could not parse %s as int value for flag %s: %s", envVal, f.Name, err) + } + f.Value = int(envValInt) + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Destination != nil { + set.IntVar(f.Destination, name, f.Value, f.Usage) + return + } + set.Int(name, f.Value, f.Usage) + }) + + return nil +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f Int64Flag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f Int64Flag) ApplyWithError(set *flag.FlagSet) error { + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + envValInt, err := strconv.ParseInt(envVal, 0, 64) + if err != nil { + return fmt.Errorf("could not parse %s as int value for flag %s: %s", envVal, f.Name, err) + } + + f.Value = envValInt + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Destination != nil { + set.Int64Var(f.Destination, name, f.Value, f.Usage) + return + } + set.Int64(name, f.Value, f.Usage) + }) + + return nil +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f UintFlag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f UintFlag) ApplyWithError(set *flag.FlagSet) error { + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + envValInt, err := strconv.ParseUint(envVal, 0, 64) + if err != nil { + return fmt.Errorf("could not parse %s as uint value for flag %s: %s", envVal, f.Name, err) + } + + f.Value = uint(envValInt) + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Destination != nil { + set.UintVar(f.Destination, name, f.Value, f.Usage) + return + } + set.Uint(name, f.Value, f.Usage) + }) + + return nil +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f Uint64Flag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f Uint64Flag) ApplyWithError(set *flag.FlagSet) error { + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + envValInt, err := strconv.ParseUint(envVal, 0, 64) + if err != nil { + return fmt.Errorf("could not parse %s as uint64 value for flag %s: %s", envVal, f.Name, err) + } + + f.Value = uint64(envValInt) + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Destination != nil { + set.Uint64Var(f.Destination, name, f.Value, f.Usage) + return + } + set.Uint64(name, f.Value, f.Usage) + }) + + return nil +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f DurationFlag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f DurationFlag) ApplyWithError(set *flag.FlagSet) error { + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + envValDuration, err := time.ParseDuration(envVal) + if err != nil { + return fmt.Errorf("could not parse %s as duration for flag %s: %s", envVal, f.Name, err) + } + + f.Value = envValDuration + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Destination != nil { + set.DurationVar(f.Destination, name, f.Value, f.Usage) + return + } + set.Duration(name, f.Value, f.Usage) + }) + + return nil +} + +// Apply populates the flag given the flag set and environment +// Ignores errors +func (f Float64Flag) Apply(set *flag.FlagSet) { + f.ApplyWithError(set) +} + +// ApplyWithError populates the flag given the flag set and environment +func (f Float64Flag) ApplyWithError(set *flag.FlagSet) error { + if f.EnvVar != "" { + for _, envVar := range strings.Split(f.EnvVar, ",") { + envVar = strings.TrimSpace(envVar) + if envVal, ok := syscall.Getenv(envVar); ok { + envValFloat, err := strconv.ParseFloat(envVal, 10) + if err != nil { + return fmt.Errorf("could not parse %s as float64 value for flag %s: %s", envVal, f.Name, err) + } + + f.Value = float64(envValFloat) + break + } + } + } + + eachName(f.Name, func(name string) { + if f.Destination != nil { + set.Float64Var(f.Destination, name, f.Value, f.Usage) + return + } + set.Float64(name, f.Value, f.Usage) + }) + + return nil +} + +func visibleFlags(fl []Flag) []Flag { + visible := []Flag{} + for _, flag := range fl { + field := flagValue(flag).FieldByName("Hidden") + if !field.IsValid() || !field.Bool() { + visible = append(visible, flag) + } + } + return visible +} + +func prefixFor(name string) (prefix string) { + if len(name) == 1 { + prefix = "-" + } else { + prefix = "--" + } + + return +} + +// Returns the placeholder, if any, and the unquoted usage string. +func unquoteUsage(usage string) (string, string) { + for i := 0; i < len(usage); i++ { + if usage[i] == '`' { + for j := i + 1; j < len(usage); j++ { + if usage[j] == '`' { + name := usage[i+1 : j] + usage = usage[:i] + name + usage[j+1:] + return name, usage + } + } + break + } + } + return "", usage +} + +func prefixedNames(fullName, placeholder string) string { + var prefixed string + parts := strings.Split(fullName, ",") + for i, name := range parts { + name = strings.Trim(name, " ") + prefixed += prefixFor(name) + name + if placeholder != "" { + prefixed += " " + placeholder + } + if i < len(parts)-1 { + prefixed += ", " + } + } + return prefixed +} + +func withEnvHint(envVar, str string) string { + envText := "" + if envVar != "" { + prefix := "$" + suffix := "" + sep := ", $" + if runtime.GOOS == "windows" { + prefix = "%" + suffix = "%" + sep = "%, %" + } + envText = fmt.Sprintf(" [%s%s%s]", prefix, strings.Join(strings.Split(envVar, ","), sep), suffix) + } + return str + envText +} + +func flagValue(f Flag) reflect.Value { + fv := reflect.ValueOf(f) + for fv.Kind() == reflect.Ptr { + fv = reflect.Indirect(fv) + } + return fv +} + +func stringifyFlag(f Flag) string { + fv := flagValue(f) + + switch f.(type) { + case IntSliceFlag: + return withEnvHint(fv.FieldByName("EnvVar").String(), + stringifyIntSliceFlag(f.(IntSliceFlag))) + case Int64SliceFlag: + return withEnvHint(fv.FieldByName("EnvVar").String(), + stringifyInt64SliceFlag(f.(Int64SliceFlag))) + case StringSliceFlag: + return withEnvHint(fv.FieldByName("EnvVar").String(), + stringifyStringSliceFlag(f.(StringSliceFlag))) + } + + placeholder, usage := unquoteUsage(fv.FieldByName("Usage").String()) + + needsPlaceholder := false + defaultValueString := "" + + if val := fv.FieldByName("Value"); val.IsValid() { + needsPlaceholder = true + defaultValueString = fmt.Sprintf(" (default: %v)", val.Interface()) + + if val.Kind() == reflect.String && val.String() != "" { + defaultValueString = fmt.Sprintf(" (default: %q)", val.String()) + } + } + + if defaultValueString == " (default: )" { + defaultValueString = "" + } + + if needsPlaceholder && placeholder == "" { + placeholder = defaultPlaceholder + } + + usageWithDefault := strings.TrimSpace(fmt.Sprintf("%s%s", usage, defaultValueString)) + + return withEnvHint(fv.FieldByName("EnvVar").String(), + fmt.Sprintf("%s\t%s", prefixedNames(fv.FieldByName("Name").String(), placeholder), usageWithDefault)) +} + +func stringifyIntSliceFlag(f IntSliceFlag) string { + defaultVals := []string{} + if f.Value != nil && len(f.Value.Value()) > 0 { + for _, i := range f.Value.Value() { + defaultVals = append(defaultVals, fmt.Sprintf("%d", i)) + } + } + + return stringifySliceFlag(f.Usage, f.Name, defaultVals) +} + +func stringifyInt64SliceFlag(f Int64SliceFlag) string { + defaultVals := []string{} + if f.Value != nil && len(f.Value.Value()) > 0 { + for _, i := range f.Value.Value() { + defaultVals = append(defaultVals, fmt.Sprintf("%d", i)) + } + } + + return stringifySliceFlag(f.Usage, f.Name, defaultVals) +} + +func stringifyStringSliceFlag(f StringSliceFlag) string { + defaultVals := []string{} + if f.Value != nil && len(f.Value.Value()) > 0 { + for _, s := range f.Value.Value() { + if len(s) > 0 { + defaultVals = append(defaultVals, fmt.Sprintf("%q", s)) + } + } + } + + return stringifySliceFlag(f.Usage, f.Name, defaultVals) +} + +func stringifySliceFlag(usage, name string, defaultVals []string) string { + placeholder, usage := unquoteUsage(usage) + if placeholder == "" { + placeholder = defaultPlaceholder + } + + defaultVal := "" + if len(defaultVals) > 0 { + defaultVal = fmt.Sprintf(" (default: %s)", strings.Join(defaultVals, ", ")) + } + + usageWithDefault := strings.TrimSpace(fmt.Sprintf("%s%s", usage, defaultVal)) + return fmt.Sprintf("%s\t%s", prefixedNames(name, placeholder), usageWithDefault) +} diff --git a/vendor/github.com/urfave/cli/flag_generated.go b/vendor/github.com/urfave/cli/flag_generated.go new file mode 100644 index 0000000..491b619 --- /dev/null +++ b/vendor/github.com/urfave/cli/flag_generated.go @@ -0,0 +1,627 @@ +package cli + +import ( + "flag" + "strconv" + "time" +) + +// WARNING: This file is generated! + +// BoolFlag is a flag with type bool +type BoolFlag struct { + Name string + Usage string + EnvVar string + Hidden bool + Destination *bool +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f BoolFlag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f BoolFlag) GetName() string { + return f.Name +} + +// Bool looks up the value of a local BoolFlag, returns +// false if not found +func (c *Context) Bool(name string) bool { + return lookupBool(name, c.flagSet) +} + +// GlobalBool looks up the value of a global BoolFlag, returns +// false if not found +func (c *Context) GlobalBool(name string) bool { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupBool(name, fs) + } + return false +} + +func lookupBool(name string, set *flag.FlagSet) bool { + f := set.Lookup(name) + if f != nil { + parsed, err := strconv.ParseBool(f.Value.String()) + if err != nil { + return false + } + return parsed + } + return false +} + +// BoolTFlag is a flag with type bool that is true by default +type BoolTFlag struct { + Name string + Usage string + EnvVar string + Hidden bool + Destination *bool +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f BoolTFlag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f BoolTFlag) GetName() string { + return f.Name +} + +// BoolT looks up the value of a local BoolTFlag, returns +// false if not found +func (c *Context) BoolT(name string) bool { + return lookupBoolT(name, c.flagSet) +} + +// GlobalBoolT looks up the value of a global BoolTFlag, returns +// false if not found +func (c *Context) GlobalBoolT(name string) bool { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupBoolT(name, fs) + } + return false +} + +func lookupBoolT(name string, set *flag.FlagSet) bool { + f := set.Lookup(name) + if f != nil { + parsed, err := strconv.ParseBool(f.Value.String()) + if err != nil { + return false + } + return parsed + } + return false +} + +// DurationFlag is a flag with type time.Duration (see https://golang.org/pkg/time/#ParseDuration) +type DurationFlag struct { + Name string + Usage string + EnvVar string + Hidden bool + Value time.Duration + Destination *time.Duration +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f DurationFlag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f DurationFlag) GetName() string { + return f.Name +} + +// Duration looks up the value of a local DurationFlag, returns +// 0 if not found +func (c *Context) Duration(name string) time.Duration { + return lookupDuration(name, c.flagSet) +} + +// GlobalDuration looks up the value of a global DurationFlag, returns +// 0 if not found +func (c *Context) GlobalDuration(name string) time.Duration { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupDuration(name, fs) + } + return 0 +} + +func lookupDuration(name string, set *flag.FlagSet) time.Duration { + f := set.Lookup(name) + if f != nil { + parsed, err := time.ParseDuration(f.Value.String()) + if err != nil { + return 0 + } + return parsed + } + return 0 +} + +// Float64Flag is a flag with type float64 +type Float64Flag struct { + Name string + Usage string + EnvVar string + Hidden bool + Value float64 + Destination *float64 +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f Float64Flag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f Float64Flag) GetName() string { + return f.Name +} + +// Float64 looks up the value of a local Float64Flag, returns +// 0 if not found +func (c *Context) Float64(name string) float64 { + return lookupFloat64(name, c.flagSet) +} + +// GlobalFloat64 looks up the value of a global Float64Flag, returns +// 0 if not found +func (c *Context) GlobalFloat64(name string) float64 { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupFloat64(name, fs) + } + return 0 +} + +func lookupFloat64(name string, set *flag.FlagSet) float64 { + f := set.Lookup(name) + if f != nil { + parsed, err := strconv.ParseFloat(f.Value.String(), 64) + if err != nil { + return 0 + } + return parsed + } + return 0 +} + +// GenericFlag is a flag with type Generic +type GenericFlag struct { + Name string + Usage string + EnvVar string + Hidden bool + Value Generic +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f GenericFlag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f GenericFlag) GetName() string { + return f.Name +} + +// Generic looks up the value of a local GenericFlag, returns +// nil if not found +func (c *Context) Generic(name string) interface{} { + return lookupGeneric(name, c.flagSet) +} + +// GlobalGeneric looks up the value of a global GenericFlag, returns +// nil if not found +func (c *Context) GlobalGeneric(name string) interface{} { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupGeneric(name, fs) + } + return nil +} + +func lookupGeneric(name string, set *flag.FlagSet) interface{} { + f := set.Lookup(name) + if f != nil { + parsed, err := f.Value, error(nil) + if err != nil { + return nil + } + return parsed + } + return nil +} + +// Int64Flag is a flag with type int64 +type Int64Flag struct { + Name string + Usage string + EnvVar string + Hidden bool + Value int64 + Destination *int64 +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f Int64Flag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f Int64Flag) GetName() string { + return f.Name +} + +// Int64 looks up the value of a local Int64Flag, returns +// 0 if not found +func (c *Context) Int64(name string) int64 { + return lookupInt64(name, c.flagSet) +} + +// GlobalInt64 looks up the value of a global Int64Flag, returns +// 0 if not found +func (c *Context) GlobalInt64(name string) int64 { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupInt64(name, fs) + } + return 0 +} + +func lookupInt64(name string, set *flag.FlagSet) int64 { + f := set.Lookup(name) + if f != nil { + parsed, err := strconv.ParseInt(f.Value.String(), 0, 64) + if err != nil { + return 0 + } + return parsed + } + return 0 +} + +// IntFlag is a flag with type int +type IntFlag struct { + Name string + Usage string + EnvVar string + Hidden bool + Value int + Destination *int +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f IntFlag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f IntFlag) GetName() string { + return f.Name +} + +// Int looks up the value of a local IntFlag, returns +// 0 if not found +func (c *Context) Int(name string) int { + return lookupInt(name, c.flagSet) +} + +// GlobalInt looks up the value of a global IntFlag, returns +// 0 if not found +func (c *Context) GlobalInt(name string) int { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupInt(name, fs) + } + return 0 +} + +func lookupInt(name string, set *flag.FlagSet) int { + f := set.Lookup(name) + if f != nil { + parsed, err := strconv.ParseInt(f.Value.String(), 0, 64) + if err != nil { + return 0 + } + return int(parsed) + } + return 0 +} + +// IntSliceFlag is a flag with type *IntSlice +type IntSliceFlag struct { + Name string + Usage string + EnvVar string + Hidden bool + Value *IntSlice +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f IntSliceFlag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f IntSliceFlag) GetName() string { + return f.Name +} + +// IntSlice looks up the value of a local IntSliceFlag, returns +// nil if not found +func (c *Context) IntSlice(name string) []int { + return lookupIntSlice(name, c.flagSet) +} + +// GlobalIntSlice looks up the value of a global IntSliceFlag, returns +// nil if not found +func (c *Context) GlobalIntSlice(name string) []int { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupIntSlice(name, fs) + } + return nil +} + +func lookupIntSlice(name string, set *flag.FlagSet) []int { + f := set.Lookup(name) + if f != nil { + parsed, err := (f.Value.(*IntSlice)).Value(), error(nil) + if err != nil { + return nil + } + return parsed + } + return nil +} + +// Int64SliceFlag is a flag with type *Int64Slice +type Int64SliceFlag struct { + Name string + Usage string + EnvVar string + Hidden bool + Value *Int64Slice +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f Int64SliceFlag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f Int64SliceFlag) GetName() string { + return f.Name +} + +// Int64Slice looks up the value of a local Int64SliceFlag, returns +// nil if not found +func (c *Context) Int64Slice(name string) []int64 { + return lookupInt64Slice(name, c.flagSet) +} + +// GlobalInt64Slice looks up the value of a global Int64SliceFlag, returns +// nil if not found +func (c *Context) GlobalInt64Slice(name string) []int64 { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupInt64Slice(name, fs) + } + return nil +} + +func lookupInt64Slice(name string, set *flag.FlagSet) []int64 { + f := set.Lookup(name) + if f != nil { + parsed, err := (f.Value.(*Int64Slice)).Value(), error(nil) + if err != nil { + return nil + } + return parsed + } + return nil +} + +// StringFlag is a flag with type string +type StringFlag struct { + Name string + Usage string + EnvVar string + Hidden bool + Value string + Destination *string +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f StringFlag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f StringFlag) GetName() string { + return f.Name +} + +// String looks up the value of a local StringFlag, returns +// "" if not found +func (c *Context) String(name string) string { + return lookupString(name, c.flagSet) +} + +// GlobalString looks up the value of a global StringFlag, returns +// "" if not found +func (c *Context) GlobalString(name string) string { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupString(name, fs) + } + return "" +} + +func lookupString(name string, set *flag.FlagSet) string { + f := set.Lookup(name) + if f != nil { + parsed, err := f.Value.String(), error(nil) + if err != nil { + return "" + } + return parsed + } + return "" +} + +// StringSliceFlag is a flag with type *StringSlice +type StringSliceFlag struct { + Name string + Usage string + EnvVar string + Hidden bool + Value *StringSlice +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f StringSliceFlag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f StringSliceFlag) GetName() string { + return f.Name +} + +// StringSlice looks up the value of a local StringSliceFlag, returns +// nil if not found +func (c *Context) StringSlice(name string) []string { + return lookupStringSlice(name, c.flagSet) +} + +// GlobalStringSlice looks up the value of a global StringSliceFlag, returns +// nil if not found +func (c *Context) GlobalStringSlice(name string) []string { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupStringSlice(name, fs) + } + return nil +} + +func lookupStringSlice(name string, set *flag.FlagSet) []string { + f := set.Lookup(name) + if f != nil { + parsed, err := (f.Value.(*StringSlice)).Value(), error(nil) + if err != nil { + return nil + } + return parsed + } + return nil +} + +// Uint64Flag is a flag with type uint64 +type Uint64Flag struct { + Name string + Usage string + EnvVar string + Hidden bool + Value uint64 + Destination *uint64 +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f Uint64Flag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f Uint64Flag) GetName() string { + return f.Name +} + +// Uint64 looks up the value of a local Uint64Flag, returns +// 0 if not found +func (c *Context) Uint64(name string) uint64 { + return lookupUint64(name, c.flagSet) +} + +// GlobalUint64 looks up the value of a global Uint64Flag, returns +// 0 if not found +func (c *Context) GlobalUint64(name string) uint64 { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupUint64(name, fs) + } + return 0 +} + +func lookupUint64(name string, set *flag.FlagSet) uint64 { + f := set.Lookup(name) + if f != nil { + parsed, err := strconv.ParseUint(f.Value.String(), 0, 64) + if err != nil { + return 0 + } + return parsed + } + return 0 +} + +// UintFlag is a flag with type uint +type UintFlag struct { + Name string + Usage string + EnvVar string + Hidden bool + Value uint + Destination *uint +} + +// String returns a readable representation of this value +// (for usage defaults) +func (f UintFlag) String() string { + return FlagStringer(f) +} + +// GetName returns the name of the flag +func (f UintFlag) GetName() string { + return f.Name +} + +// Uint looks up the value of a local UintFlag, returns +// 0 if not found +func (c *Context) Uint(name string) uint { + return lookupUint(name, c.flagSet) +} + +// GlobalUint looks up the value of a global UintFlag, returns +// 0 if not found +func (c *Context) GlobalUint(name string) uint { + if fs := lookupGlobalFlagSet(name, c); fs != nil { + return lookupUint(name, fs) + } + return 0 +} + +func lookupUint(name string, set *flag.FlagSet) uint { + f := set.Lookup(name) + if f != nil { + parsed, err := strconv.ParseUint(f.Value.String(), 0, 64) + if err != nil { + return 0 + } + return uint(parsed) + } + return 0 +} diff --git a/vendor/github.com/urfave/cli/funcs.go b/vendor/github.com/urfave/cli/funcs.go new file mode 100644 index 0000000..cba5e6c --- /dev/null +++ b/vendor/github.com/urfave/cli/funcs.go @@ -0,0 +1,28 @@ +package cli + +// BashCompleteFunc is an action to execute when the bash-completion flag is set +type BashCompleteFunc func(*Context) + +// BeforeFunc is an action to execute before any subcommands are run, but after +// the context is ready if a non-nil error is returned, no subcommands are run +type BeforeFunc func(*Context) error + +// AfterFunc is an action to execute after any subcommands are run, but after the +// subcommand has finished it is run even if Action() panics +type AfterFunc func(*Context) error + +// ActionFunc is the action to execute when no subcommands are specified +type ActionFunc func(*Context) error + +// CommandNotFoundFunc is executed if the proper command cannot be found +type CommandNotFoundFunc func(*Context, string) + +// OnUsageErrorFunc is executed if an usage error occurs. This is useful for displaying +// customized usage error messages. This function is able to replace the +// original error messages. If this function is not set, the "Incorrect usage" +// is displayed and the execution is interrupted. +type OnUsageErrorFunc func(context *Context, err error, isSubcommand bool) error + +// FlagStringFunc is used by the help generation to display a flag, which is +// expected to be a single line. +type FlagStringFunc func(Flag) string diff --git a/vendor/github.com/urfave/cli/help.go b/vendor/github.com/urfave/cli/help.go new file mode 100644 index 0000000..57ec98d --- /dev/null +++ b/vendor/github.com/urfave/cli/help.go @@ -0,0 +1,338 @@ +package cli + +import ( + "fmt" + "io" + "os" + "strings" + "text/tabwriter" + "text/template" +) + +// AppHelpTemplate is the text template for the Default help topic. +// cli.go uses text/template to render templates. You can +// render custom help text by setting this variable. +var AppHelpTemplate = `NAME: + {{.Name}}{{if .Usage}} - {{.Usage}}{{end}} + +USAGE: + {{if .UsageText}}{{.UsageText}}{{else}}{{.HelpName}} {{if .VisibleFlags}}[global options]{{end}}{{if .Commands}} command [command options]{{end}} {{if .ArgsUsage}}{{.ArgsUsage}}{{else}}[arguments...]{{end}}{{end}}{{if .Version}}{{if not .HideVersion}} + +VERSION: + {{.Version}}{{end}}{{end}}{{if .Description}} + +DESCRIPTION: + {{.Description}}{{end}}{{if len .Authors}} + +AUTHOR{{with $length := len .Authors}}{{if ne 1 $length}}S{{end}}{{end}}: + {{range $index, $author := .Authors}}{{if $index}} + {{end}}{{$author}}{{end}}{{end}}{{if .VisibleCommands}} + +COMMANDS:{{range .VisibleCategories}}{{if .Name}} + {{.Name}}:{{end}}{{range .VisibleCommands}} + {{join .Names ", "}}{{"\t"}}{{.Usage}}{{end}}{{end}}{{end}}{{if .VisibleFlags}} + +GLOBAL OPTIONS: + {{range $index, $option := .VisibleFlags}}{{if $index}} + {{end}}{{$option}}{{end}}{{end}}{{if .Copyright}} + +COPYRIGHT: + {{.Copyright}}{{end}} +` + +// CommandHelpTemplate is the text template for the command help topic. +// cli.go uses text/template to render templates. You can +// render custom help text by setting this variable. +var CommandHelpTemplate = `NAME: + {{.HelpName}} - {{.Usage}} + +USAGE: + {{if .UsageText}}{{.UsageText}}{{else}}{{.HelpName}}{{if .VisibleFlags}} [command options]{{end}} {{if .ArgsUsage}}{{.ArgsUsage}}{{else}}[arguments...]{{end}}{{end}}{{if .Category}} + +CATEGORY: + {{.Category}}{{end}}{{if .Description}} + +DESCRIPTION: + {{.Description}}{{end}}{{if .VisibleFlags}} + +OPTIONS: + {{range .VisibleFlags}}{{.}} + {{end}}{{end}} +` + +// SubcommandHelpTemplate is the text template for the subcommand help topic. +// cli.go uses text/template to render templates. You can +// render custom help text by setting this variable. +var SubcommandHelpTemplate = `NAME: + {{.HelpName}} - {{if .Description}}{{.Description}}{{else}}{{.Usage}}{{end}} + +USAGE: + {{if .UsageText}}{{.UsageText}}{{else}}{{.HelpName}} command{{if .VisibleFlags}} [command options]{{end}} {{if .ArgsUsage}}{{.ArgsUsage}}{{else}}[arguments...]{{end}}{{end}} + +COMMANDS:{{range .VisibleCategories}}{{if .Name}} + {{.Name}}:{{end}}{{range .VisibleCommands}} + {{join .Names ", "}}{{"\t"}}{{.Usage}}{{end}} +{{end}}{{if .VisibleFlags}} +OPTIONS: + {{range .VisibleFlags}}{{.}} + {{end}}{{end}} +` + +var helpCommand = Command{ + Name: "help", + Aliases: []string{"h"}, + Usage: "Shows a list of commands or help for one command", + ArgsUsage: "[command]", + Action: func(c *Context) error { + args := c.Args() + if args.Present() { + return ShowCommandHelp(c, args.First()) + } + + ShowAppHelp(c) + return nil + }, +} + +var helpSubcommand = Command{ + Name: "help", + Aliases: []string{"h"}, + Usage: "Shows a list of commands or help for one command", + ArgsUsage: "[command]", + Action: func(c *Context) error { + args := c.Args() + if args.Present() { + return ShowCommandHelp(c, args.First()) + } + + return ShowSubcommandHelp(c) + }, +} + +// Prints help for the App or Command +type helpPrinter func(w io.Writer, templ string, data interface{}) + +// Prints help for the App or Command with custom template function. +type helpPrinterCustom func(w io.Writer, templ string, data interface{}, customFunc map[string]interface{}) + +// HelpPrinter is a function that writes the help output. If not set a default +// is used. The function signature is: +// func(w io.Writer, templ string, data interface{}) +var HelpPrinter helpPrinter = printHelp + +// HelpPrinterCustom is same as HelpPrinter but +// takes a custom function for template function map. +var HelpPrinterCustom helpPrinterCustom = printHelpCustom + +// VersionPrinter prints the version for the App +var VersionPrinter = printVersion + +// ShowAppHelpAndExit - Prints the list of subcommands for the app and exits with exit code. +func ShowAppHelpAndExit(c *Context, exitCode int) { + ShowAppHelp(c) + os.Exit(exitCode) +} + +// ShowAppHelp is an action that displays the help. +func ShowAppHelp(c *Context) (err error) { + if c.App.CustomAppHelpTemplate == "" { + HelpPrinter(c.App.Writer, AppHelpTemplate, c.App) + return + } + customAppData := func() map[string]interface{} { + if c.App.ExtraInfo == nil { + return nil + } + return map[string]interface{}{ + "ExtraInfo": c.App.ExtraInfo, + } + } + HelpPrinterCustom(c.App.Writer, c.App.CustomAppHelpTemplate, c.App, customAppData()) + return nil +} + +// DefaultAppComplete prints the list of subcommands as the default app completion method +func DefaultAppComplete(c *Context) { + for _, command := range c.App.Commands { + if command.Hidden { + continue + } + for _, name := range command.Names() { + fmt.Fprintln(c.App.Writer, name) + } + } +} + +// ShowCommandHelpAndExit - exits with code after showing help +func ShowCommandHelpAndExit(c *Context, command string, code int) { + ShowCommandHelp(c, command) + os.Exit(code) +} + +// ShowCommandHelp prints help for the given command +func ShowCommandHelp(ctx *Context, command string) error { + // show the subcommand help for a command with subcommands + if command == "" { + HelpPrinter(ctx.App.Writer, SubcommandHelpTemplate, ctx.App) + return nil + } + + for _, c := range ctx.App.Commands { + if c.HasName(command) { + if c.CustomHelpTemplate != "" { + HelpPrinterCustom(ctx.App.Writer, c.CustomHelpTemplate, c, nil) + } else { + HelpPrinter(ctx.App.Writer, CommandHelpTemplate, c) + } + return nil + } + } + + if ctx.App.CommandNotFound == nil { + return NewExitError(fmt.Sprintf("No help topic for '%v'", command), 3) + } + + ctx.App.CommandNotFound(ctx, command) + return nil +} + +// ShowSubcommandHelp prints help for the given subcommand +func ShowSubcommandHelp(c *Context) error { + return ShowCommandHelp(c, c.Command.Name) +} + +// ShowVersion prints the version number of the App +func ShowVersion(c *Context) { + VersionPrinter(c) +} + +func printVersion(c *Context) { + fmt.Fprintf(c.App.Writer, "%v version %v\n", c.App.Name, c.App.Version) +} + +// ShowCompletions prints the lists of commands within a given context +func ShowCompletions(c *Context) { + a := c.App + if a != nil && a.BashComplete != nil { + a.BashComplete(c) + } +} + +// ShowCommandCompletions prints the custom completions for a given command +func ShowCommandCompletions(ctx *Context, command string) { + c := ctx.App.Command(command) + if c != nil && c.BashComplete != nil { + c.BashComplete(ctx) + } +} + +func printHelpCustom(out io.Writer, templ string, data interface{}, customFunc map[string]interface{}) { + funcMap := template.FuncMap{ + "join": strings.Join, + } + if customFunc != nil { + for key, value := range customFunc { + funcMap[key] = value + } + } + + w := tabwriter.NewWriter(out, 1, 8, 2, ' ', 0) + t := template.Must(template.New("help").Funcs(funcMap).Parse(templ)) + err := t.Execute(w, data) + if err != nil { + // If the writer is closed, t.Execute will fail, and there's nothing + // we can do to recover. + if os.Getenv("CLI_TEMPLATE_ERROR_DEBUG") != "" { + fmt.Fprintf(ErrWriter, "CLI TEMPLATE ERROR: %#v\n", err) + } + return + } + w.Flush() +} + +func printHelp(out io.Writer, templ string, data interface{}) { + printHelpCustom(out, templ, data, nil) +} + +func checkVersion(c *Context) bool { + found := false + if VersionFlag.GetName() != "" { + eachName(VersionFlag.GetName(), func(name string) { + if c.GlobalBool(name) || c.Bool(name) { + found = true + } + }) + } + return found +} + +func checkHelp(c *Context) bool { + found := false + if HelpFlag.GetName() != "" { + eachName(HelpFlag.GetName(), func(name string) { + if c.GlobalBool(name) || c.Bool(name) { + found = true + } + }) + } + return found +} + +func checkCommandHelp(c *Context, name string) bool { + if c.Bool("h") || c.Bool("help") { + ShowCommandHelp(c, name) + return true + } + + return false +} + +func checkSubcommandHelp(c *Context) bool { + if c.Bool("h") || c.Bool("help") { + ShowSubcommandHelp(c) + return true + } + + return false +} + +func checkShellCompleteFlag(a *App, arguments []string) (bool, []string) { + if !a.EnableBashCompletion { + return false, arguments + } + + pos := len(arguments) - 1 + lastArg := arguments[pos] + + if lastArg != "--"+BashCompletionFlag.GetName() { + return false, arguments + } + + return true, arguments[:pos] +} + +func checkCompletions(c *Context) bool { + if !c.shellComplete { + return false + } + + if args := c.Args(); args.Present() { + name := args.First() + if cmd := c.App.Command(name); cmd != nil { + // let the command handle the completion + return false + } + } + + ShowCompletions(c) + return true +} + +func checkCommandCompletions(c *Context, name string) bool { + if !c.shellComplete { + return false + } + + ShowCommandCompletions(c, name) + return true +} diff --git a/vendor/github.com/vishvananda/netlink/LICENSE b/vendor/github.com/vishvananda/netlink/LICENSE new file mode 100644 index 0000000..9f64db8 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/LICENSE @@ -0,0 +1,192 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2014 Vishvananda Ishaya. + Copyright 2014 Docker, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/vishvananda/netlink/README.md b/vendor/github.com/vishvananda/netlink/README.md new file mode 100644 index 0000000..8cd50a9 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/README.md @@ -0,0 +1,89 @@ +# netlink - netlink library for go # + +[![Build Status](https://travis-ci.org/vishvananda/netlink.png?branch=master)](https://travis-ci.org/vishvananda/netlink) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink) + +The netlink package provides a simple netlink library for go. Netlink +is the interface a user-space program in linux uses to communicate with +the kernel. It can be used to add and remove interfaces, set ip addresses +and routes, and configure ipsec. Netlink communication requires elevated +privileges, so in most cases this code needs to be run as root. Since +low-level netlink messages are inscrutable at best, the library attempts +to provide an api that is loosely modeled on the CLI provied by iproute2. +Actions like `ip link add` will be accomplished via a similarly named +function like AddLink(). This library began its life as a fork of the +netlink functionality in +[docker/libcontainer](https://github.com/docker/libcontainer) but was +heavily rewritten to improve testability, performance, and to add new +functionality like ipsec xfrm handling. + +## Local Build and Test ## + +You can use go get command: + + go get github.com/vishvananda/netlink + +Testing dependencies: + + go get github.com/vishvananda/netns + +Testing (requires root): + + sudo -E go test github.com/vishvananda/netlink + +## Examples ## + +Add a new bridge and add eth1 into it: + +```go +package main + +import ( + "net" + "github.com/vishvananda/netlink" +) + +func main() { + la := netlink.NewLinkAttrs() + la.Name = "foo" + mybridge := &netlink.Bridge{la}} + _ := netlink.LinkAdd(mybridge) + eth1, _ := netlink.LinkByName("eth1") + netlink.LinkSetMaster(eth1, mybridge) +} + +``` +Note `NewLinkAttrs` constructor, it sets default values in structure. For now +it sets only `TxQLen` to `-1`, so kernel will set default by itself. If you're +using simple initialization(`LinkAttrs{Name: "foo"}`) `TxQLen` will be set to +`0` unless you specify it like `LinkAttrs{Name: "foo", TxQLen: 1000}`. + +Add a new ip address to loopback: + +```go +package main + +import ( + "net" + "github.com/vishvananda/netlink" +) + +func main() { + lo, _ := netlink.LinkByName("lo") + addr, _ := netlink.ParseAddr("169.254.169.254/32") + netlink.AddrAdd(lo, addr) +} + +``` + +## Future Work ## + +Many pieces of netlink are not yet fully supported in the high-level +interface. Aspects of virtually all of the high-level objects don't exist. +Many of the underlying primitives are there, so its a matter of putting +the right fields into the high-level objects and making sure that they +are serialized and deserialized correctly in the Add and List methods. + +There are also a few pieces of low level netlink functionality that still +need to be implemented. Routing rules are not in place and some of the +more advanced link types. Hopefully there is decent structure and testing +in place to make these fairly straightforward to add. diff --git a/vendor/github.com/vishvananda/netlink/addr.go b/vendor/github.com/vishvananda/netlink/addr.go new file mode 100644 index 0000000..9bbaf50 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/addr.go @@ -0,0 +1,43 @@ +package netlink + +import ( + "fmt" + "net" + "strings" +) + +// Addr represents an IP address from netlink. Netlink ip addresses +// include a mask, so it stores the address as a net.IPNet. +type Addr struct { + *net.IPNet + Label string +} + +// String returns $ip/$netmask $label +func (a Addr) String() string { + return fmt.Sprintf("%s %s", a.IPNet, a.Label) +} + +// ParseAddr parses the string representation of an address in the +// form $ip/$netmask $label. The label portion is optional +func ParseAddr(s string) (*Addr, error) { + label := "" + parts := strings.Split(s, " ") + if len(parts) > 1 { + s = parts[0] + label = parts[1] + } + m, err := ParseIPNet(s) + if err != nil { + return nil, err + } + return &Addr{IPNet: m, Label: label}, nil +} + +// Equal returns true if both Addrs have the same net.IPNet value. +func (a Addr) Equal(x Addr) bool { + sizea, _ := a.Mask.Size() + sizeb, _ := x.Mask.Size() + // ignore label for comparison + return a.IP.Equal(x.IP) && sizea == sizeb +} diff --git a/vendor/github.com/vishvananda/netlink/addr_linux.go b/vendor/github.com/vishvananda/netlink/addr_linux.go new file mode 100644 index 0000000..19aac0f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/addr_linux.go @@ -0,0 +1,128 @@ +package netlink + +import ( + "fmt" + "net" + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// AddrAdd will add an IP address to a link device. +// Equivalent to: `ip addr add $addr dev $link` +func AddrAdd(link Link, addr *Addr) error { + + req := nl.NewNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + return addrHandle(link, addr, req) +} + +// AddrDel will delete an IP address from a link device. +// Equivalent to: `ip addr del $addr dev $link` +func AddrDel(link Link, addr *Addr) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELADDR, syscall.NLM_F_ACK) + return addrHandle(link, addr, req) +} + +func addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error { + base := link.Attrs() + if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) { + return fmt.Errorf("label must begin with interface name") + } + ensureIndex(base) + + family := nl.GetIPFamily(addr.IP) + + msg := nl.NewIfAddrmsg(family) + msg.Index = uint32(base.Index) + prefixlen, _ := addr.Mask.Size() + msg.Prefixlen = uint8(prefixlen) + req.AddData(msg) + + var addrData []byte + if family == FAMILY_V4 { + addrData = addr.IP.To4() + } else { + addrData = addr.IP.To16() + } + + localData := nl.NewRtAttr(syscall.IFA_LOCAL, addrData) + req.AddData(localData) + + addressData := nl.NewRtAttr(syscall.IFA_ADDRESS, addrData) + req.AddData(addressData) + + if addr.Label != "" { + labelData := nl.NewRtAttr(syscall.IFA_LABEL, nl.ZeroTerminated(addr.Label)) + req.AddData(labelData) + } + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// AddrList gets a list of IP addresses in the system. +// Equivalent to: `ip addr show`. +// The list can be filtered by link and ip family. +func AddrList(link Link, family int) ([]Addr, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETADDR, syscall.NLM_F_DUMP) + msg := nl.NewIfInfomsg(family) + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWADDR) + if err != nil { + return nil, err + } + + index := 0 + if link != nil { + base := link.Attrs() + ensureIndex(base) + index = base.Index + } + + var res []Addr + for _, m := range msgs { + msg := nl.DeserializeIfAddrmsg(m) + + if link != nil && msg.Index != uint32(index) { + // Ignore messages from other interfaces + continue + } + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + var local, dst *net.IPNet + var addr Addr + for _, attr := range attrs { + switch attr.Attr.Type { + case syscall.IFA_ADDRESS: + dst = &net.IPNet{ + IP: attr.Value, + Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)), + } + case syscall.IFA_LOCAL: + local = &net.IPNet{ + IP: attr.Value, + Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)), + } + case syscall.IFA_LABEL: + addr.Label = string(attr.Value[:len(attr.Value)-1]) + } + } + + // IFA_LOCAL should be there but if not, fall back to IFA_ADDRESS + if local != nil { + addr.IPNet = local + } else { + addr.IPNet = dst + } + + res = append(res, addr) + } + + return res, nil +} diff --git a/vendor/github.com/vishvananda/netlink/filter.go b/vendor/github.com/vishvananda/netlink/filter.go new file mode 100644 index 0000000..83ad700 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/filter.go @@ -0,0 +1,55 @@ +package netlink + +import ( + "fmt" +) + +type Filter interface { + Attrs() *FilterAttrs + Type() string +} + +// Filter represents a netlink filter. A filter is associated with a link, +// has a handle and a parent. The root filter of a device should have a +// parent == HANDLE_ROOT. +type FilterAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Priority uint16 // lower is higher priority + Protocol uint16 // syscall.ETH_P_* +} + +func (q FilterAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol) +} + +// U32 filters on many packet related properties +type U32 struct { + FilterAttrs + // Currently only supports redirecting to another interface + RedirIndex int +} + +func (filter *U32) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *U32) Type() string { + return "u32" +} + +// GenericFilter filters represent types that are not currently understood +// by this netlink library. +type GenericFilter struct { + FilterAttrs + FilterType string +} + +func (filter *GenericFilter) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *GenericFilter) Type() string { + return filter.FilterType +} diff --git a/vendor/github.com/vishvananda/netlink/filter_linux.go b/vendor/github.com/vishvananda/netlink/filter_linux.go new file mode 100644 index 0000000..1ec6987 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/filter_linux.go @@ -0,0 +1,191 @@ +package netlink + +import ( + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// FilterDel will delete a filter from the system. +// Equivalent to: `tc filter del $filter` +func FilterDel(filter Filter) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK) + base := filter.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), + } + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// FilterAdd will add a filter to the system. +// Equivalent to: `tc filter add $filter` +func FilterAdd(filter Filter) error { + req := nl.NewNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + base := filter.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + if u32, ok := filter.(*U32); ok { + // match all + sel := nl.TcU32Sel{ + Nkeys: 1, + Flags: nl.TC_U32_TERMINAL, + } + sel.Keys = append(sel.Keys, nl.TcU32Key{}) + nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize()) + actions := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil) + table := nl.NewRtAttrChild(actions, nl.TCA_ACT_TAB, nil) + nl.NewRtAttrChild(table, nl.TCA_KIND, nl.ZeroTerminated("mirred")) + // redirect to other interface + mir := nl.TcMirred{ + Action: nl.TC_ACT_STOLEN, + Eaction: nl.TCA_EGRESS_REDIR, + Ifindex: uint32(u32.RedirIndex), + } + aopts := nl.NewRtAttrChild(table, nl.TCA_OPTIONS, nil) + nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mir.Serialize()) + } + req.AddData(options) + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// FilterList gets a list of filters in the system. +// Equivalent to: `tc filter show`. +// Generally retunrs nothing if link and parent are not specified. +func FilterList(link Link, parent uint32) ([]Filter, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP) + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Parent: parent, + } + if link != nil { + base := link.Attrs() + ensureIndex(base) + msg.Ifindex = int32(base.Index) + } + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER) + if err != nil { + return nil, err + } + + var res []Filter + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + base := FilterAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + } + base.Priority, base.Protocol = MajorMinor(msg.Info) + base.Protocol = nl.Swap16(base.Protocol) + + var filter Filter + filterType := "" + detailed := false + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + filterType = string(attr.Value[:len(attr.Value)-1]) + switch filterType { + case "u32": + filter = &U32{} + default: + filter = &GenericFilter{FilterType: filterType} + } + case nl.TCA_OPTIONS: + switch filterType { + case "u32": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + detailed, err = parseU32Data(filter, data) + if err != nil { + return nil, err + } + } + } + } + // only return the detailed version of the filter + if detailed { + *filter.Attrs() = base + res = append(res, filter) + } + } + + return res, nil +} + +func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + native = nl.NativeEndian() + u32 := filter.(*U32) + detailed := false + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_U32_SEL: + detailed = true + sel := nl.DeserializeTcU32Sel(datum.Value) + // only parse if we have a very basic redirect + if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 { + return detailed, nil + } + case nl.TCA_U32_ACT: + table, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return detailed, err + } + if len(table) != 1 || table[0].Attr.Type != nl.TCA_ACT_TAB { + return detailed, fmt.Errorf("Action table not formed properly") + } + aattrs, err := nl.ParseRouteAttr(table[0].Value) + for _, aattr := range aattrs { + switch aattr.Attr.Type { + case nl.TCA_KIND: + actionType := string(aattr.Value[:len(aattr.Value)-1]) + // only parse if the action is mirred + if actionType != "mirred" { + return detailed, nil + } + case nl.TCA_OPTIONS: + adata, err := nl.ParseRouteAttr(aattr.Value) + if err != nil { + return detailed, err + } + for _, adatum := range adata { + switch adatum.Attr.Type { + case nl.TCA_MIRRED_PARMS: + mir := nl.DeserializeTcMirred(adatum.Value) + u32.RedirIndex = int(mir.Ifindex) + } + } + } + } + } + } + return detailed, nil +} diff --git a/vendor/github.com/vishvananda/netlink/link.go b/vendor/github.com/vishvananda/netlink/link.go new file mode 100644 index 0000000..18fd175 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/link.go @@ -0,0 +1,223 @@ +package netlink + +import "net" + +// Link represents a link device from netlink. Shared link attributes +// like name may be retrieved using the Attrs() method. Unique data +// can be retrieved by casting the object to the proper type. +type Link interface { + Attrs() *LinkAttrs + Type() string +} + +type ( + NsPid int + NsFd int +) + +// LinkAttrs represents data shared by most link types +type LinkAttrs struct { + Index int + MTU int + TxQLen int // Transmit Queue Length + Name string + HardwareAddr net.HardwareAddr + Flags net.Flags + ParentIndex int // index of the parent link device + MasterIndex int // must be the index of a bridge + Namespace interface{} // nil | NsPid | NsFd +} + +// NewLinkAttrs returns LinkAttrs structure filled with default values +func NewLinkAttrs() LinkAttrs { + return LinkAttrs{ + TxQLen: -1, + } +} + +// Device links cannot be created via netlink. These links +// are links created by udev like 'lo' and 'etho0' +type Device struct { + LinkAttrs +} + +func (device *Device) Attrs() *LinkAttrs { + return &device.LinkAttrs +} + +func (device *Device) Type() string { + return "device" +} + +// Dummy links are dummy ethernet devices +type Dummy struct { + LinkAttrs +} + +func (dummy *Dummy) Attrs() *LinkAttrs { + return &dummy.LinkAttrs +} + +func (dummy *Dummy) Type() string { + return "dummy" +} + +// Ifb links are advanced dummy devices for packet filtering +type Ifb struct { + LinkAttrs +} + +func (ifb *Ifb) Attrs() *LinkAttrs { + return &ifb.LinkAttrs +} + +func (ifb *Ifb) Type() string { + return "ifb" +} + +// Bridge links are simple linux bridges +type Bridge struct { + LinkAttrs +} + +func (bridge *Bridge) Attrs() *LinkAttrs { + return &bridge.LinkAttrs +} + +func (bridge *Bridge) Type() string { + return "bridge" +} + +// Vlan links have ParentIndex set in their Attrs() +type Vlan struct { + LinkAttrs + VlanId int +} + +func (vlan *Vlan) Attrs() *LinkAttrs { + return &vlan.LinkAttrs +} + +func (vlan *Vlan) Type() string { + return "vlan" +} + +type MacvlanMode uint16 + +const ( + MACVLAN_MODE_DEFAULT MacvlanMode = iota + MACVLAN_MODE_PRIVATE + MACVLAN_MODE_VEPA + MACVLAN_MODE_BRIDGE + MACVLAN_MODE_PASSTHRU + MACVLAN_MODE_SOURCE +) + +// Macvlan links have ParentIndex set in their Attrs() +type Macvlan struct { + LinkAttrs + Mode MacvlanMode +} + +func (macvlan *Macvlan) Attrs() *LinkAttrs { + return &macvlan.LinkAttrs +} + +func (macvlan *Macvlan) Type() string { + return "macvlan" +} + +// Macvtap - macvtap is a virtual interfaces based on macvlan +type Macvtap struct { + Macvlan +} + +func (macvtap Macvtap) Type() string { + return "macvtap" +} + +// Veth devices must specify PeerName on create +type Veth struct { + LinkAttrs + PeerName string // veth on create only +} + +func (veth *Veth) Attrs() *LinkAttrs { + return &veth.LinkAttrs +} + +func (veth *Veth) Type() string { + return "veth" +} + +// GenericLink links represent types that are not currently understood +// by this netlink library. +type GenericLink struct { + LinkAttrs + LinkType string +} + +func (generic *GenericLink) Attrs() *LinkAttrs { + return &generic.LinkAttrs +} + +func (generic *GenericLink) Type() string { + return generic.LinkType +} + +type Vxlan struct { + LinkAttrs + VxlanId int + VtepDevIndex int + SrcAddr net.IP + Group net.IP + TTL int + TOS int + Learning bool + Proxy bool + RSC bool + L2miss bool + L3miss bool + NoAge bool + GBP bool + Age int + Limit int + Port int + PortLow int + PortHigh int +} + +func (vxlan *Vxlan) Attrs() *LinkAttrs { + return &vxlan.LinkAttrs +} + +func (vxlan *Vxlan) Type() string { + return "vxlan" +} + +type IPVlanMode uint16 + +const ( + IPVLAN_MODE_L2 IPVlanMode = iota + IPVLAN_MODE_L3 + IPVLAN_MODE_MAX +) + +type IPVlan struct { + LinkAttrs + Mode IPVlanMode +} + +func (ipvlan *IPVlan) Attrs() *LinkAttrs { + return &ipvlan.LinkAttrs +} + +func (ipvlan *IPVlan) Type() string { + return "ipvlan" +} + +// iproute2 supported devices; +// vlan | veth | vcan | dummy | ifb | macvlan | macvtap | +// bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan | +// gre | gretap | ip6gre | ip6gretap | vti | nlmon | +// bond_slave | ipvlan diff --git a/vendor/github.com/vishvananda/netlink/link_linux.go b/vendor/github.com/vishvananda/netlink/link_linux.go new file mode 100644 index 0000000..6851150 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/link_linux.go @@ -0,0 +1,750 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "fmt" + "net" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +var native = nl.NativeEndian() +var lookupByDump = false + +var macvlanModes = [...]uint32{ + 0, + nl.MACVLAN_MODE_PRIVATE, + nl.MACVLAN_MODE_VEPA, + nl.MACVLAN_MODE_BRIDGE, + nl.MACVLAN_MODE_PASSTHRU, + nl.MACVLAN_MODE_SOURCE, +} + +func ensureIndex(link *LinkAttrs) { + if link != nil && link.Index == 0 { + newlink, _ := LinkByName(link.Name) + if newlink != nil { + link.Index = newlink.Attrs().Index + } + } +} + +// LinkSetUp enables the link device. +// Equivalent to: `ip link set $link up` +func LinkSetUp(link Link) error { + base := link.Attrs() + ensureIndex(base) + req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Change = syscall.IFF_UP + msg.Flags = syscall.IFF_UP + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// LinkSetDown disables link device. +// Equivalent to: `ip link set $link down` +func LinkSetDown(link Link) error { + base := link.Attrs() + ensureIndex(base) + req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Change = syscall.IFF_UP + msg.Flags = 0 & ^syscall.IFF_UP + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// LinkSetMTU sets the mtu of the link device. +// Equivalent to: `ip link set $link mtu $mtu` +func LinkSetMTU(link Link, mtu int) error { + base := link.Attrs() + ensureIndex(base) + req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(mtu)) + + data := nl.NewRtAttr(syscall.IFLA_MTU, b) + req.AddData(data) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// LinkSetName sets the name of the link device. +// Equivalent to: `ip link set $link name $name` +func LinkSetName(link Link, name string) error { + base := link.Attrs() + ensureIndex(base) + req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(syscall.IFLA_IFNAME, []byte(name)) + req.AddData(data) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// LinkSetHardwareAddr sets the hardware address of the link device. +// Equivalent to: `ip link set $link address $hwaddr` +func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error { + base := link.Attrs() + ensureIndex(base) + req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(syscall.IFLA_ADDRESS, []byte(hwaddr)) + req.AddData(data) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// LinkSetMaster sets the master of the link device. +// Equivalent to: `ip link set $link master $master` +func LinkSetMaster(link Link, master *Bridge) error { + index := 0 + if master != nil { + masterBase := master.Attrs() + ensureIndex(masterBase) + index = masterBase.Index + } + return LinkSetMasterByIndex(link, index) +} + +// LinkSetMasterByIndex sets the master of the link device. +// Equivalent to: `ip link set $link master $master` +func LinkSetMasterByIndex(link Link, masterIndex int) error { + base := link.Attrs() + ensureIndex(base) + req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(masterIndex)) + + data := nl.NewRtAttr(syscall.IFLA_MASTER, b) + req.AddData(data) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// LinkSetNsPid puts the device into a new network namespace. The +// pid must be a pid of a running process. +// Equivalent to: `ip link set $link netns $pid` +func LinkSetNsPid(link Link, nspid int) error { + base := link.Attrs() + ensureIndex(base) + req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(nspid)) + + data := nl.NewRtAttr(syscall.IFLA_NET_NS_PID, b) + req.AddData(data) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// LinkSetNsFd puts the device into a new network namespace. The +// fd must be an open file descriptor to a network namespace. +// Similar to: `ip link set $link netns $ns` +func LinkSetNsFd(link Link, fd int) error { + base := link.Attrs() + ensureIndex(base) + req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(fd)) + + data := nl.NewRtAttr(nl.IFLA_NET_NS_FD, b) + req.AddData(data) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +func boolAttr(val bool) []byte { + var v uint8 + if val { + v = 1 + } + return nl.Uint8Attr(v) +} + +type vxlanPortRange struct { + Lo, Hi uint16 +} + +func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) { + data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_ID, nl.Uint32Attr(uint32(vxlan.VxlanId))) + if vxlan.VtepDevIndex != 0 { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LINK, nl.Uint32Attr(uint32(vxlan.VtepDevIndex))) + } + if vxlan.SrcAddr != nil { + ip := vxlan.SrcAddr.To4() + if ip != nil { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LOCAL, []byte(ip)) + } else { + ip = vxlan.SrcAddr.To16() + if ip != nil { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LOCAL6, []byte(ip)) + } + } + } + if vxlan.Group != nil { + group := vxlan.Group.To4() + if group != nil { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GROUP, []byte(group)) + } else { + group = vxlan.Group.To16() + if group != nil { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GROUP6, []byte(group)) + } + } + } + + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_TTL, nl.Uint8Attr(uint8(vxlan.TTL))) + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_TOS, nl.Uint8Attr(uint8(vxlan.TOS))) + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LEARNING, boolAttr(vxlan.Learning)) + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PROXY, boolAttr(vxlan.Proxy)) + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_RSC, boolAttr(vxlan.RSC)) + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L2MISS, boolAttr(vxlan.L2miss)) + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L3MISS, boolAttr(vxlan.L3miss)) + + if vxlan.GBP { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, boolAttr(vxlan.GBP)) + } + + if vxlan.NoAge { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0)) + } else if vxlan.Age > 0 { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(uint32(vxlan.Age))) + } + if vxlan.Limit > 0 { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LIMIT, nl.Uint32Attr(uint32(vxlan.Limit))) + } + if vxlan.Port > 0 { + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PORT, nl.Uint16Attr(uint16(vxlan.Port))) + } + if vxlan.PortLow > 0 || vxlan.PortHigh > 0 { + pr := vxlanPortRange{uint16(vxlan.PortLow), uint16(vxlan.PortHigh)} + + buf := new(bytes.Buffer) + binary.Write(buf, binary.BigEndian, &pr) + + nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PORT_RANGE, buf.Bytes()) + } +} + +// LinkAdd adds a new link device. The type and features of the device +// are taken fromt the parameters in the link object. +// Equivalent to: `ip link add $link` +func LinkAdd(link Link) error { + // TODO: set mtu and hardware address + // TODO: support extra data for macvlan + base := link.Attrs() + + if base.Name == "" { + return fmt.Errorf("LinkAttrs.Name cannot be empty!") + } + + req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + req.AddData(msg) + + if base.ParentIndex != 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(base.ParentIndex)) + data := nl.NewRtAttr(syscall.IFLA_LINK, b) + req.AddData(data) + } else if link.Type() == "ipvlan" { + return fmt.Errorf("Can't create ipvlan link without ParentIndex") + } + + nameData := nl.NewRtAttr(syscall.IFLA_IFNAME, nl.ZeroTerminated(base.Name)) + req.AddData(nameData) + + if base.MTU > 0 { + mtu := nl.NewRtAttr(syscall.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) + req.AddData(mtu) + } + + if base.TxQLen >= 0 { + qlen := nl.NewRtAttr(syscall.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) + req.AddData(qlen) + } + + if base.Namespace != nil { + var attr *nl.RtAttr + switch base.Namespace.(type) { + case NsPid: + val := nl.Uint32Attr(uint32(base.Namespace.(NsPid))) + attr = nl.NewRtAttr(syscall.IFLA_NET_NS_PID, val) + case NsFd: + val := nl.Uint32Attr(uint32(base.Namespace.(NsFd))) + attr = nl.NewRtAttr(nl.IFLA_NET_NS_FD, val) + } + + req.AddData(attr) + } + + linkInfo := nl.NewRtAttr(syscall.IFLA_LINKINFO, nil) + nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) + + if vlan, ok := link.(*Vlan); ok { + b := make([]byte, 2) + native.PutUint16(b, uint16(vlan.VlanId)) + data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + nl.NewRtAttrChild(data, nl.IFLA_VLAN_ID, b) + } else if veth, ok := link.(*Veth); ok { + data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + peer := nl.NewRtAttrChild(data, nl.VETH_INFO_PEER, nil) + nl.NewIfInfomsgChild(peer, syscall.AF_UNSPEC) + nl.NewRtAttrChild(peer, syscall.IFLA_IFNAME, nl.ZeroTerminated(veth.PeerName)) + if base.TxQLen >= 0 { + nl.NewRtAttrChild(peer, syscall.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) + } + if base.MTU > 0 { + nl.NewRtAttrChild(peer, syscall.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) + } + + } else if vxlan, ok := link.(*Vxlan); ok { + addVxlanAttrs(vxlan, linkInfo) + } else if ipv, ok := link.(*IPVlan); ok { + data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + nl.NewRtAttrChild(data, nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(ipv.Mode))) + } else if macv, ok := link.(*Macvlan); ok { + if macv.Mode != MACVLAN_MODE_DEFAULT { + data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode])) + } + } + + req.AddData(linkInfo) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + if err != nil { + return err + } + + ensureIndex(base) + + // can't set master during create, so set it afterwards + if base.MasterIndex != 0 { + // TODO: verify MasterIndex is actually a bridge? + return LinkSetMasterByIndex(link, base.MasterIndex) + } + return nil +} + +// LinkDel deletes link device. Either Index or Name must be set in +// the link object for it to be deleted. The other values are ignored. +// Equivalent to: `ip link del $link` +func LinkDel(link Link) error { + base := link.Attrs() + + ensureIndex(base) + + req := nl.NewNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +func linkByNameDump(name string) (Link, error) { + links, err := LinkList() + if err != nil { + return nil, err + } + + for _, link := range links { + if link.Attrs().Name == name { + return link, nil + } + } + return nil, fmt.Errorf("Link %s not found", name) +} + +// LinkByName finds a link by name and returns a pointer to the object. +func LinkByName(name string) (Link, error) { + if lookupByDump { + return linkByNameDump(name) + } + + req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + req.AddData(msg) + + nameData := nl.NewRtAttr(syscall.IFLA_IFNAME, nl.ZeroTerminated(name)) + req.AddData(nameData) + + link, err := execGetLink(req) + if err == syscall.EINVAL { + // older kernels don't support looking up via IFLA_IFNAME + // so fall back to dumping all links + lookupByDump = true + return linkByNameDump(name) + } + + return link, err +} + +// LinkByIndex finds a link by index and returns a pointer to the object. +func LinkByIndex(index int) (Link, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg.Index = int32(index) + req.AddData(msg) + + return execGetLink(req) +} + +func execGetLink(req *nl.NetlinkRequest) (Link, error) { + msgs, err := req.Execute(syscall.NETLINK_ROUTE, 0) + if err != nil { + if errno, ok := err.(syscall.Errno); ok { + if errno == syscall.ENODEV { + return nil, fmt.Errorf("Link not found") + } + } + return nil, err + } + + switch { + case len(msgs) == 0: + return nil, fmt.Errorf("Link not found") + + case len(msgs) == 1: + return linkDeserialize(msgs[0]) + + default: + return nil, fmt.Errorf("More than one link found") + } +} + +// linkDeserialize deserializes a raw message received from netlink into +// a link object. +func linkDeserialize(m []byte) (Link, error) { + msg := nl.DeserializeIfInfomsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + base := LinkAttrs{Index: int(msg.Index), Flags: linkFlags(msg.Flags)} + var link Link + linkType := "" + for _, attr := range attrs { + switch attr.Attr.Type { + case syscall.IFLA_LINKINFO: + infos, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + for _, info := range infos { + switch info.Attr.Type { + case nl.IFLA_INFO_KIND: + linkType = string(info.Value[:len(info.Value)-1]) + switch linkType { + case "dummy": + link = &Dummy{} + case "ifb": + link = &Ifb{} + case "bridge": + link = &Bridge{} + case "vlan": + link = &Vlan{} + case "veth": + link = &Veth{} + case "vxlan": + link = &Vxlan{} + case "ipvlan": + link = &IPVlan{} + case "macvlan": + link = &Macvlan{} + case "macvtap": + link = &Macvtap{} + default: + link = &GenericLink{LinkType: linkType} + } + case nl.IFLA_INFO_DATA: + data, err := nl.ParseRouteAttr(info.Value) + if err != nil { + return nil, err + } + switch linkType { + case "vlan": + parseVlanData(link, data) + case "vxlan": + parseVxlanData(link, data) + case "ipvlan": + parseIPVlanData(link, data) + case "macvlan": + parseMacvlanData(link, data) + case "macvtap": + parseMacvtapData(link, data) + } + } + } + case syscall.IFLA_ADDRESS: + var nonzero bool + for _, b := range attr.Value { + if b != 0 { + nonzero = true + } + } + if nonzero { + base.HardwareAddr = attr.Value[:] + } + case syscall.IFLA_IFNAME: + base.Name = string(attr.Value[:len(attr.Value)-1]) + case syscall.IFLA_MTU: + base.MTU = int(native.Uint32(attr.Value[0:4])) + case syscall.IFLA_LINK: + base.ParentIndex = int(native.Uint32(attr.Value[0:4])) + case syscall.IFLA_MASTER: + base.MasterIndex = int(native.Uint32(attr.Value[0:4])) + case syscall.IFLA_TXQLEN: + base.TxQLen = int(native.Uint32(attr.Value[0:4])) + } + } + // Links that don't have IFLA_INFO_KIND are hardware devices + if link == nil { + link = &Device{} + } + *link.Attrs() = base + + return link, nil +} + +// LinkList gets a list of link devices. +// Equivalent to: `ip link show` +func LinkList() ([]Link, error) { + // NOTE(vish): This duplicates functionality in net/iface_linux.go, but we need + // to get the message ourselves to parse link type. + req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP) + + msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWLINK) + if err != nil { + return nil, err + } + + var res []Link + for _, m := range msgs { + link, err := linkDeserialize(m) + if err != nil { + return nil, err + } + res = append(res, link) + } + + return res, nil +} + +func LinkSetHairpin(link Link, mode bool) error { + return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE) +} + +func LinkSetGuard(link Link, mode bool) error { + return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_GUARD) +} + +func LinkSetFastLeave(link Link, mode bool) error { + return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_FAST_LEAVE) +} + +func LinkSetLearning(link Link, mode bool) error { + return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_LEARNING) +} + +func LinkSetRootBlock(link Link, mode bool) error { + return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROTECT) +} + +func LinkSetFlood(link Link, mode bool) error { + return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_UNICAST_FLOOD) +} + +func setProtinfoAttr(link Link, mode bool, attr int) error { + base := link.Attrs() + ensureIndex(base) + req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + + msg := nl.NewIfInfomsg(syscall.AF_BRIDGE) + msg.Index = int32(base.Index) + req.AddData(msg) + + br := nl.NewRtAttr(syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED, nil) + nl.NewRtAttrChild(br, attr, boolToByte(mode)) + req.AddData(br) + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + if err != nil { + return err + } + return nil +} + +func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) { + vlan := link.(*Vlan) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_VLAN_ID: + vlan.VlanId = int(native.Uint16(datum.Value[0:2])) + } + } +} + +func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) { + vxlan := link.(*Vxlan) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_VXLAN_ID: + vxlan.VxlanId = int(native.Uint32(datum.Value[0:4])) + case nl.IFLA_VXLAN_LINK: + vxlan.VtepDevIndex = int(native.Uint32(datum.Value[0:4])) + case nl.IFLA_VXLAN_LOCAL: + vxlan.SrcAddr = net.IP(datum.Value[0:4]) + case nl.IFLA_VXLAN_LOCAL6: + vxlan.SrcAddr = net.IP(datum.Value[0:16]) + case nl.IFLA_VXLAN_GROUP: + vxlan.Group = net.IP(datum.Value[0:4]) + case nl.IFLA_VXLAN_GROUP6: + vxlan.Group = net.IP(datum.Value[0:16]) + case nl.IFLA_VXLAN_TTL: + vxlan.TTL = int(datum.Value[0]) + case nl.IFLA_VXLAN_TOS: + vxlan.TOS = int(datum.Value[0]) + case nl.IFLA_VXLAN_LEARNING: + vxlan.Learning = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_PROXY: + vxlan.Proxy = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_RSC: + vxlan.RSC = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_L2MISS: + vxlan.L2miss = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_L3MISS: + vxlan.L3miss = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_GBP: + vxlan.GBP = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_AGEING: + vxlan.Age = int(native.Uint32(datum.Value[0:4])) + vxlan.NoAge = vxlan.Age == 0 + case nl.IFLA_VXLAN_LIMIT: + vxlan.Limit = int(native.Uint32(datum.Value[0:4])) + case nl.IFLA_VXLAN_PORT: + vxlan.Port = int(native.Uint16(datum.Value[0:2])) + case nl.IFLA_VXLAN_PORT_RANGE: + buf := bytes.NewBuffer(datum.Value[0:4]) + var pr vxlanPortRange + if binary.Read(buf, binary.BigEndian, &pr) != nil { + vxlan.PortLow = int(pr.Lo) + vxlan.PortHigh = int(pr.Hi) + } + } + } +} + +func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) { + ipv := link.(*IPVlan) + for _, datum := range data { + if datum.Attr.Type == nl.IFLA_IPVLAN_MODE { + ipv.Mode = IPVlanMode(native.Uint32(datum.Value[0:4])) + return + } + } +} + +func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) { + macv := link.(*Macvtap) + parseMacvlanData(&macv.Macvlan, data) +} + +func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { + macv := link.(*Macvlan) + for _, datum := range data { + if datum.Attr.Type == nl.IFLA_MACVLAN_MODE { + switch native.Uint32(datum.Value[0:4]) { + case nl.MACVLAN_MODE_PRIVATE: + macv.Mode = MACVLAN_MODE_PRIVATE + case nl.MACVLAN_MODE_VEPA: + macv.Mode = MACVLAN_MODE_VEPA + case nl.MACVLAN_MODE_BRIDGE: + macv.Mode = MACVLAN_MODE_BRIDGE + case nl.MACVLAN_MODE_PASSTHRU: + macv.Mode = MACVLAN_MODE_PASSTHRU + case nl.MACVLAN_MODE_SOURCE: + macv.Mode = MACVLAN_MODE_SOURCE + } + return + } + } +} + +// copied from pkg/net_linux.go +func linkFlags(rawFlags uint32) net.Flags { + var f net.Flags + if rawFlags&syscall.IFF_UP != 0 { + f |= net.FlagUp + } + if rawFlags&syscall.IFF_BROADCAST != 0 { + f |= net.FlagBroadcast + } + if rawFlags&syscall.IFF_LOOPBACK != 0 { + f |= net.FlagLoopback + } + if rawFlags&syscall.IFF_POINTOPOINT != 0 { + f |= net.FlagPointToPoint + } + if rawFlags&syscall.IFF_MULTICAST != 0 { + f |= net.FlagMulticast + } + return f +} diff --git a/vendor/github.com/vishvananda/netlink/neigh.go b/vendor/github.com/vishvananda/netlink/neigh.go new file mode 100644 index 0000000..0e5eb90 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/neigh.go @@ -0,0 +1,22 @@ +package netlink + +import ( + "fmt" + "net" +) + +// Neigh represents a link layer neighbor from netlink. +type Neigh struct { + LinkIndex int + Family int + State int + Type int + Flags int + IP net.IP + HardwareAddr net.HardwareAddr +} + +// String returns $ip/$hwaddr $label +func (neigh *Neigh) String() string { + return fmt.Sprintf("%s %s", neigh.IP, neigh.HardwareAddr) +} diff --git a/vendor/github.com/vishvananda/netlink/neigh_linux.go b/vendor/github.com/vishvananda/netlink/neigh_linux.go new file mode 100644 index 0000000..620a0ee --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/neigh_linux.go @@ -0,0 +1,189 @@ +package netlink + +import ( + "net" + "syscall" + "unsafe" + + "github.com/vishvananda/netlink/nl" +) + +const ( + NDA_UNSPEC = iota + NDA_DST + NDA_LLADDR + NDA_CACHEINFO + NDA_PROBES + NDA_VLAN + NDA_PORT + NDA_VNI + NDA_IFINDEX + NDA_MAX = NDA_IFINDEX +) + +// Neighbor Cache Entry States. +const ( + NUD_NONE = 0x00 + NUD_INCOMPLETE = 0x01 + NUD_REACHABLE = 0x02 + NUD_STALE = 0x04 + NUD_DELAY = 0x08 + NUD_PROBE = 0x10 + NUD_FAILED = 0x20 + NUD_NOARP = 0x40 + NUD_PERMANENT = 0x80 +) + +// Neighbor Flags +const ( + NTF_USE = 0x01 + NTF_SELF = 0x02 + NTF_MASTER = 0x04 + NTF_PROXY = 0x08 + NTF_ROUTER = 0x80 +) + +type Ndmsg struct { + Family uint8 + Index uint32 + State uint16 + Flags uint8 + Type uint8 +} + +func deserializeNdmsg(b []byte) *Ndmsg { + var dummy Ndmsg + return (*Ndmsg)(unsafe.Pointer(&b[0:unsafe.Sizeof(dummy)][0])) +} + +func (msg *Ndmsg) Serialize() []byte { + return (*(*[unsafe.Sizeof(*msg)]byte)(unsafe.Pointer(msg)))[:] +} + +func (msg *Ndmsg) Len() int { + return int(unsafe.Sizeof(*msg)) +} + +// NeighAdd will add an IP to MAC mapping to the ARP table +// Equivalent to: `ip neigh add ....` +func NeighAdd(neigh *Neigh) error { + return neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL) +} + +// NeighAdd will add or replace an IP to MAC mapping to the ARP table +// Equivalent to: `ip neigh replace....` +func NeighSet(neigh *Neigh) error { + return neighAdd(neigh, syscall.NLM_F_CREATE) +} + +// NeighAppend will append an entry to FDB +// Equivalent to: `bridge fdb append...` +func NeighAppend(neigh *Neigh) error { + return neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_APPEND) +} + +func neighAdd(neigh *Neigh, mode int) error { + req := nl.NewNetlinkRequest(syscall.RTM_NEWNEIGH, mode|syscall.NLM_F_ACK) + return neighHandle(neigh, req) +} + +// NeighDel will delete an IP address from a link device. +// Equivalent to: `ip addr del $addr dev $link` +func NeighDel(neigh *Neigh) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELNEIGH, syscall.NLM_F_ACK) + return neighHandle(neigh, req) +} + +func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error { + var family int + if neigh.Family > 0 { + family = neigh.Family + } else { + family = nl.GetIPFamily(neigh.IP) + } + + msg := Ndmsg{ + Family: uint8(family), + Index: uint32(neigh.LinkIndex), + State: uint16(neigh.State), + Type: uint8(neigh.Type), + Flags: uint8(neigh.Flags), + } + req.AddData(&msg) + + ipData := neigh.IP.To4() + if ipData == nil { + ipData = neigh.IP.To16() + } + + dstData := nl.NewRtAttr(NDA_DST, ipData) + req.AddData(dstData) + + hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr)) + req.AddData(hwData) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// NeighList gets a list of IP-MAC mappings in the system (ARP table). +// Equivalent to: `ip neighbor show`. +// The list can be filtered by link and ip family. +func NeighList(linkIndex, family int) ([]Neigh, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETNEIGH, syscall.NLM_F_DUMP) + msg := Ndmsg{ + Family: uint8(family), + } + req.AddData(&msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWNEIGH) + if err != nil { + return nil, err + } + + var res []Neigh + for _, m := range msgs { + ndm := deserializeNdmsg(m) + if linkIndex != 0 && int(ndm.Index) != linkIndex { + // Ignore messages from other interfaces + continue + } + + neigh, err := NeighDeserialize(m) + if err != nil { + continue + } + + res = append(res, *neigh) + } + + return res, nil +} + +func NeighDeserialize(m []byte) (*Neigh, error) { + msg := deserializeNdmsg(m) + + neigh := Neigh{ + LinkIndex: int(msg.Index), + Family: int(msg.Family), + State: int(msg.State), + Type: int(msg.Type), + Flags: int(msg.Flags), + } + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case NDA_DST: + neigh.IP = net.IP(attr.Value) + case NDA_LLADDR: + neigh.HardwareAddr = net.HardwareAddr(attr.Value) + } + } + + return &neigh, nil +} diff --git a/vendor/github.com/vishvananda/netlink/netlink.go b/vendor/github.com/vishvananda/netlink/netlink.go new file mode 100644 index 0000000..41ebdb1 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/netlink.go @@ -0,0 +1,39 @@ +// Package netlink provides a simple library for netlink. Netlink is +// the interface a user-space program in linux uses to communicate with +// the kernel. It can be used to add and remove interfaces, set up ip +// addresses and routes, and confiugre ipsec. Netlink communication +// requires elevated privileges, so in most cases this code needs to +// be run as root. The low level primitives for netlink are contained +// in the nl subpackage. This package attempts to provide a high-level +// interface that is loosly modeled on the iproute2 cli. +package netlink + +import ( + "net" + + "github.com/vishvananda/netlink/nl" +) + +const ( + // Family type definitions + FAMILY_ALL = nl.FAMILY_ALL + FAMILY_V4 = nl.FAMILY_V4 + FAMILY_V6 = nl.FAMILY_V6 +) + +// ParseIPNet parses a string in ip/net format and returns a net.IPNet. +// This is valuable because addresses in netlink are often IPNets and +// ParseCIDR returns an IPNet with the IP part set to the base IP of the +// range. +func ParseIPNet(s string) (*net.IPNet, error) { + ip, ipNet, err := net.ParseCIDR(s) + if err != nil { + return nil, err + } + return &net.IPNet{IP: ip, Mask: ipNet.Mask}, nil +} + +// NewIPNet generates an IPNet from an ip address using a netmask of 32. +func NewIPNet(ip net.IP) *net.IPNet { + return &net.IPNet{IP: ip, Mask: net.CIDRMask(32, 32)} +} diff --git a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go new file mode 100644 index 0000000..10c49c1 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go @@ -0,0 +1,143 @@ +// +build !linux + +package netlink + +import ( + "errors" +) + +var ( + ErrNotImplemented = errors.New("not implemented") +) + +func LinkSetUp(link *Link) error { + return ErrNotImplemented +} + +func LinkSetDown(link *Link) error { + return ErrNotImplemented +} + +func LinkSetMTU(link *Link, mtu int) error { + return ErrNotImplemented +} + +func LinkSetMaster(link *Link, master *Link) error { + return ErrNotImplemented +} + +func LinkSetNsPid(link *Link, nspid int) error { + return ErrNotImplemented +} + +func LinkSetNsFd(link *Link, fd int) error { + return ErrNotImplemented +} + +func LinkAdd(link *Link) error { + return ErrNotImplemented +} + +func LinkDel(link *Link) error { + return ErrNotImplemented +} + +func SetHairpin(link Link, mode bool) error { + return ErrNotImplemented +} + +func SetGuard(link Link, mode bool) error { + return ErrNotImplemented +} + +func SetFastLeave(link Link, mode bool) error { + return ErrNotImplemented +} + +func SetLearning(link Link, mode bool) error { + return ErrNotImplemented +} + +func SetRootBlock(link Link, mode bool) error { + return ErrNotImplemented +} + +func SetFlood(link Link, mode bool) error { + return ErrNotImplemented +} + +func LinkList() ([]Link, error) { + return nil, ErrNotImplemented +} + +func AddrAdd(link *Link, addr *Addr) error { + return ErrNotImplemented +} + +func AddrDel(link *Link, addr *Addr) error { + return ErrNotImplemented +} + +func AddrList(link *Link, family int) ([]Addr, error) { + return nil, ErrNotImplemented +} + +func RouteAdd(route *Route) error { + return ErrNotImplemented +} + +func RouteDel(route *Route) error { + return ErrNotImplemented +} + +func RouteList(link *Link, family int) ([]Route, error) { + return nil, ErrNotImplemented +} + +func XfrmPolicyAdd(policy *XfrmPolicy) error { + return ErrNotImplemented +} + +func XfrmPolicyDel(policy *XfrmPolicy) error { + return ErrNotImplemented +} + +func XfrmPolicyList(family int) ([]XfrmPolicy, error) { + return nil, ErrNotImplemented +} + +func XfrmStateAdd(policy *XfrmState) error { + return ErrNotImplemented +} + +func XfrmStateDel(policy *XfrmState) error { + return ErrNotImplemented +} + +func XfrmStateList(family int) ([]XfrmState, error) { + return nil, ErrNotImplemented +} + +func NeighAdd(neigh *Neigh) error { + return ErrNotImplemented +} + +func NeighSet(neigh *Neigh) error { + return ErrNotImplemented +} + +func NeighAppend(neigh *Neigh) error { + return ErrNotImplemented +} + +func NeighDel(neigh *Neigh) error { + return ErrNotImplemented +} + +func NeighList(linkIndex, family int) ([]Neigh, error) { + return nil, ErrNotImplemented +} + +func NeighDeserialize(m []byte) (*Ndmsg, *Neigh, error) { + return nil, nil, ErrNotImplemented +} diff --git a/vendor/github.com/vishvananda/netlink/nl/addr_linux.go b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go new file mode 100644 index 0000000..17088fa --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go @@ -0,0 +1,47 @@ +package nl + +import ( + "syscall" + "unsafe" +) + +type IfAddrmsg struct { + syscall.IfAddrmsg +} + +func NewIfAddrmsg(family int) *IfAddrmsg { + return &IfAddrmsg{ + IfAddrmsg: syscall.IfAddrmsg{ + Family: uint8(family), + }, + } +} + +// struct ifaddrmsg { +// __u8 ifa_family; +// __u8 ifa_prefixlen; /* The prefix length */ +// __u8 ifa_flags; /* Flags */ +// __u8 ifa_scope; /* Address scope */ +// __u32 ifa_index; /* Link index */ +// }; + +// type IfAddrmsg struct { +// Family uint8 +// Prefixlen uint8 +// Flags uint8 +// Scope uint8 +// Index uint32 +// } +// SizeofIfAddrmsg = 0x8 + +func DeserializeIfAddrmsg(b []byte) *IfAddrmsg { + return (*IfAddrmsg)(unsafe.Pointer(&b[0:syscall.SizeofIfAddrmsg][0])) +} + +func (msg *IfAddrmsg) Serialize() []byte { + return (*(*[syscall.SizeofIfAddrmsg]byte)(unsafe.Pointer(msg)))[:] +} + +func (msg *IfAddrmsg) Len() int { + return syscall.SizeofIfAddrmsg +} diff --git a/vendor/github.com/vishvananda/netlink/nl/link_linux.go b/vendor/github.com/vishvananda/netlink/nl/link_linux.go new file mode 100644 index 0000000..1f9ab08 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/link_linux.go @@ -0,0 +1,104 @@ +package nl + +const ( + DEFAULT_CHANGE = 0xFFFFFFFF +) + +const ( + IFLA_INFO_UNSPEC = iota + IFLA_INFO_KIND + IFLA_INFO_DATA + IFLA_INFO_XSTATS + IFLA_INFO_MAX = IFLA_INFO_XSTATS +) + +const ( + IFLA_VLAN_UNSPEC = iota + IFLA_VLAN_ID + IFLA_VLAN_FLAGS + IFLA_VLAN_EGRESS_QOS + IFLA_VLAN_INGRESS_QOS + IFLA_VLAN_PROTOCOL + IFLA_VLAN_MAX = IFLA_VLAN_PROTOCOL +) + +const ( + VETH_INFO_UNSPEC = iota + VETH_INFO_PEER + VETH_INFO_MAX = VETH_INFO_PEER +) + +const ( + IFLA_VXLAN_UNSPEC = iota + IFLA_VXLAN_ID + IFLA_VXLAN_GROUP + IFLA_VXLAN_LINK + IFLA_VXLAN_LOCAL + IFLA_VXLAN_TTL + IFLA_VXLAN_TOS + IFLA_VXLAN_LEARNING + IFLA_VXLAN_AGEING + IFLA_VXLAN_LIMIT + IFLA_VXLAN_PORT_RANGE + IFLA_VXLAN_PROXY + IFLA_VXLAN_RSC + IFLA_VXLAN_L2MISS + IFLA_VXLAN_L3MISS + IFLA_VXLAN_PORT + IFLA_VXLAN_GROUP6 + IFLA_VXLAN_LOCAL6 + IFLA_VXLAN_UDP_CSUM + IFLA_VXLAN_UDP_ZERO_CSUM6_TX + IFLA_VXLAN_UDP_ZERO_CSUM6_RX + IFLA_VXLAN_REMCSUM_TX + IFLA_VXLAN_REMCSUM_RX + IFLA_VXLAN_GBP + IFLA_VXLAN_REMCSUM_NOPARTIAL + IFLA_VXLAN_FLOWBASED + IFLA_VXLAN_MAX = IFLA_VXLAN_FLOWBASED +) + +const ( + BRIDGE_MODE_UNSPEC = iota + BRIDGE_MODE_HAIRPIN +) + +const ( + IFLA_BRPORT_UNSPEC = iota + IFLA_BRPORT_STATE + IFLA_BRPORT_PRIORITY + IFLA_BRPORT_COST + IFLA_BRPORT_MODE + IFLA_BRPORT_GUARD + IFLA_BRPORT_PROTECT + IFLA_BRPORT_FAST_LEAVE + IFLA_BRPORT_LEARNING + IFLA_BRPORT_UNICAST_FLOOD + IFLA_BRPORT_MAX = IFLA_BRPORT_UNICAST_FLOOD +) + +const ( + IFLA_IPVLAN_UNSPEC = iota + IFLA_IPVLAN_MODE + IFLA_IPVLAN_MAX = IFLA_IPVLAN_MODE +) + +const ( + // not defined in syscall + IFLA_NET_NS_FD = 28 +) + +const ( + IFLA_MACVLAN_UNSPEC = iota + IFLA_MACVLAN_MODE + IFLA_MACVLAN_FLAGS + IFLA_MACVLAN_MAX = IFLA_MACVLAN_FLAGS +) + +const ( + MACVLAN_MODE_PRIVATE = 1 + MACVLAN_MODE_VEPA = 2 + MACVLAN_MODE_BRIDGE = 4 + MACVLAN_MODE_PASSTHRU = 8 + MACVLAN_MODE_SOURCE = 16 +) diff --git a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go new file mode 100644 index 0000000..8dbd92b --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go @@ -0,0 +1,418 @@ +// Package nl has low level primitives for making Netlink calls. +package nl + +import ( + "bytes" + "encoding/binary" + "fmt" + "net" + "sync/atomic" + "syscall" + "unsafe" +) + +const ( + // Family type definitions + FAMILY_ALL = syscall.AF_UNSPEC + FAMILY_V4 = syscall.AF_INET + FAMILY_V6 = syscall.AF_INET6 +) + +var nextSeqNr uint32 + +// GetIPFamily returns the family type of a net.IP. +func GetIPFamily(ip net.IP) int { + if len(ip) <= net.IPv4len { + return FAMILY_V4 + } + if ip.To4() != nil { + return FAMILY_V4 + } + return FAMILY_V6 +} + +var nativeEndian binary.ByteOrder + +// Get native endianness for the system +func NativeEndian() binary.ByteOrder { + if nativeEndian == nil { + var x uint32 = 0x01020304 + if *(*byte)(unsafe.Pointer(&x)) == 0x01 { + nativeEndian = binary.BigEndian + } else { + nativeEndian = binary.LittleEndian + } + } + return nativeEndian +} + +// Byte swap a 16 bit value if we aren't big endian +func Swap16(i uint16) uint16 { + if NativeEndian() == binary.BigEndian { + return i + } + return (i&0xff00)>>8 | (i&0xff)<<8 +} + +// Byte swap a 32 bit value if aren't big endian +func Swap32(i uint32) uint32 { + if NativeEndian() == binary.BigEndian { + return i + } + return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24 +} + +type NetlinkRequestData interface { + Len() int + Serialize() []byte +} + +// IfInfomsg is related to links, but it is used for list requests as well +type IfInfomsg struct { + syscall.IfInfomsg +} + +// Create an IfInfomsg with family specified +func NewIfInfomsg(family int) *IfInfomsg { + return &IfInfomsg{ + IfInfomsg: syscall.IfInfomsg{ + Family: uint8(family), + }, + } +} + +func DeserializeIfInfomsg(b []byte) *IfInfomsg { + return (*IfInfomsg)(unsafe.Pointer(&b[0:syscall.SizeofIfInfomsg][0])) +} + +func (msg *IfInfomsg) Serialize() []byte { + return (*(*[syscall.SizeofIfInfomsg]byte)(unsafe.Pointer(msg)))[:] +} + +func (msg *IfInfomsg) Len() int { + return syscall.SizeofIfInfomsg +} + +func rtaAlignOf(attrlen int) int { + return (attrlen + syscall.RTA_ALIGNTO - 1) & ^(syscall.RTA_ALIGNTO - 1) +} + +func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg { + msg := NewIfInfomsg(family) + parent.children = append(parent.children, msg) + return msg +} + +// Extend RtAttr to handle data and children +type RtAttr struct { + syscall.RtAttr + Data []byte + children []NetlinkRequestData +} + +// Create a new Extended RtAttr object +func NewRtAttr(attrType int, data []byte) *RtAttr { + return &RtAttr{ + RtAttr: syscall.RtAttr{ + Type: uint16(attrType), + }, + children: []NetlinkRequestData{}, + Data: data, + } +} + +// Create a new RtAttr obj anc add it as a child of an existing object +func NewRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr { + attr := NewRtAttr(attrType, data) + parent.children = append(parent.children, attr) + return attr +} + +func (a *RtAttr) Len() int { + if len(a.children) == 0 { + return (syscall.SizeofRtAttr + len(a.Data)) + } + + l := 0 + for _, child := range a.children { + l += rtaAlignOf(child.Len()) + } + l += syscall.SizeofRtAttr + return rtaAlignOf(l + len(a.Data)) +} + +// Serialize the RtAttr into a byte array +// This can't just unsafe.cast because it must iterate through children. +func (a *RtAttr) Serialize() []byte { + native := NativeEndian() + + length := a.Len() + buf := make([]byte, rtaAlignOf(length)) + + if a.Data != nil { + copy(buf[4:], a.Data) + } else { + next := 4 + for _, child := range a.children { + childBuf := child.Serialize() + copy(buf[next:], childBuf) + next += rtaAlignOf(len(childBuf)) + } + } + + if l := uint16(length); l != 0 { + native.PutUint16(buf[0:2], l) + } + native.PutUint16(buf[2:4], a.Type) + return buf +} + +type NetlinkRequest struct { + syscall.NlMsghdr + Data []NetlinkRequestData +} + +// Serialize the Netlink Request into a byte array +func (req *NetlinkRequest) Serialize() []byte { + length := syscall.SizeofNlMsghdr + dataBytes := make([][]byte, len(req.Data)) + for i, data := range req.Data { + dataBytes[i] = data.Serialize() + length = length + len(dataBytes[i]) + } + req.Len = uint32(length) + b := make([]byte, length) + hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:] + next := syscall.SizeofNlMsghdr + copy(b[0:next], hdr) + for _, data := range dataBytes { + for _, dataByte := range data { + b[next] = dataByte + next = next + 1 + } + } + return b +} + +func (req *NetlinkRequest) AddData(data NetlinkRequestData) { + if data != nil { + req.Data = append(req.Data, data) + } +} + +// Execute the request against a the given sockType. +// Returns a list of netlink messages in seriaized format, optionally filtered +// by resType. +func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) { + s, err := getNetlinkSocket(sockType) + if err != nil { + return nil, err + } + defer s.Close() + + if err := s.Send(req); err != nil { + return nil, err + } + + pid, err := s.GetPid() + if err != nil { + return nil, err + } + + var res [][]byte + +done: + for { + msgs, err := s.Receive() + if err != nil { + return nil, err + } + for _, m := range msgs { + if m.Header.Seq != req.Seq { + return nil, fmt.Errorf("Wrong Seq nr %d, expected 1", m.Header.Seq) + } + if m.Header.Pid != pid { + return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid) + } + if m.Header.Type == syscall.NLMSG_DONE { + break done + } + if m.Header.Type == syscall.NLMSG_ERROR { + native := NativeEndian() + error := int32(native.Uint32(m.Data[0:4])) + if error == 0 { + break done + } + return nil, syscall.Errno(-error) + } + if resType != 0 && m.Header.Type != resType { + continue + } + res = append(res, m.Data) + if m.Header.Flags&syscall.NLM_F_MULTI == 0 { + break done + } + } + } + return res, nil +} + +// Create a new netlink request from proto and flags +// Note the Len value will be inaccurate once data is added until +// the message is serialized +func NewNetlinkRequest(proto, flags int) *NetlinkRequest { + return &NetlinkRequest{ + NlMsghdr: syscall.NlMsghdr{ + Len: uint32(syscall.SizeofNlMsghdr), + Type: uint16(proto), + Flags: syscall.NLM_F_REQUEST | uint16(flags), + Seq: atomic.AddUint32(&nextSeqNr, 1), + }, + } +} + +type NetlinkSocket struct { + fd int + lsa syscall.SockaddrNetlink +} + +func getNetlinkSocket(protocol int) (*NetlinkSocket, error) { + fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, protocol) + if err != nil { + return nil, err + } + s := &NetlinkSocket{ + fd: fd, + } + s.lsa.Family = syscall.AF_NETLINK + if err := syscall.Bind(fd, &s.lsa); err != nil { + syscall.Close(fd) + return nil, err + } + + return s, nil +} + +// Create a netlink socket with a given protocol (e.g. NETLINK_ROUTE) +// and subscribe it to multicast groups passed in variable argument list. +// Returns the netlink socket on which Receive() method can be called +// to retrieve the messages from the kernel. +func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) { + fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, protocol) + if err != nil { + return nil, err + } + s := &NetlinkSocket{ + fd: fd, + } + s.lsa.Family = syscall.AF_NETLINK + + for _, g := range groups { + s.lsa.Groups |= (1 << (g - 1)) + } + + if err := syscall.Bind(fd, &s.lsa); err != nil { + syscall.Close(fd) + return nil, err + } + + return s, nil +} + +func (s *NetlinkSocket) Close() { + syscall.Close(s.fd) +} + +func (s *NetlinkSocket) Send(request *NetlinkRequest) error { + if err := syscall.Sendto(s.fd, request.Serialize(), 0, &s.lsa); err != nil { + return err + } + return nil +} + +func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) { + rb := make([]byte, syscall.Getpagesize()) + nr, _, err := syscall.Recvfrom(s.fd, rb, 0) + if err != nil { + return nil, err + } + if nr < syscall.NLMSG_HDRLEN { + return nil, fmt.Errorf("Got short response from netlink") + } + rb = rb[:nr] + return syscall.ParseNetlinkMessage(rb) +} + +func (s *NetlinkSocket) GetPid() (uint32, error) { + lsa, err := syscall.Getsockname(s.fd) + if err != nil { + return 0, err + } + switch v := lsa.(type) { + case *syscall.SockaddrNetlink: + return v.Pid, nil + } + return 0, fmt.Errorf("Wrong socket type") +} + +func ZeroTerminated(s string) []byte { + bytes := make([]byte, len(s)+1) + for i := 0; i < len(s); i++ { + bytes[i] = s[i] + } + bytes[len(s)] = 0 + return bytes +} + +func NonZeroTerminated(s string) []byte { + bytes := make([]byte, len(s)) + for i := 0; i < len(s); i++ { + bytes[i] = s[i] + } + return bytes +} + +func BytesToString(b []byte) string { + n := bytes.Index(b, []byte{0}) + return string(b[:n]) +} + +func Uint8Attr(v uint8) []byte { + return []byte{byte(v)} +} + +func Uint16Attr(v uint16) []byte { + native := NativeEndian() + bytes := make([]byte, 2) + native.PutUint16(bytes, v) + return bytes +} + +func Uint32Attr(v uint32) []byte { + native := NativeEndian() + bytes := make([]byte, 4) + native.PutUint32(bytes, v) + return bytes +} + +func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) { + var attrs []syscall.NetlinkRouteAttr + for len(b) >= syscall.SizeofRtAttr { + a, vbuf, alen, err := netlinkRouteAttrAndValue(b) + if err != nil { + return nil, err + } + ra := syscall.NetlinkRouteAttr{Attr: *a, Value: vbuf[:int(a.Len)-syscall.SizeofRtAttr]} + attrs = append(attrs, ra) + b = b[alen:] + } + return attrs, nil +} + +func netlinkRouteAttrAndValue(b []byte) (*syscall.RtAttr, []byte, int, error) { + a := (*syscall.RtAttr)(unsafe.Pointer(&b[0])) + if int(a.Len) < syscall.SizeofRtAttr || int(a.Len) > len(b) { + return nil, nil, 0, syscall.EINVAL + } + return a, b[syscall.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil +} diff --git a/vendor/github.com/vishvananda/netlink/nl/route_linux.go b/vendor/github.com/vishvananda/netlink/nl/route_linux.go new file mode 100644 index 0000000..447e83e --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/route_linux.go @@ -0,0 +1,42 @@ +package nl + +import ( + "syscall" + "unsafe" +) + +type RtMsg struct { + syscall.RtMsg +} + +func NewRtMsg() *RtMsg { + return &RtMsg{ + RtMsg: syscall.RtMsg{ + Table: syscall.RT_TABLE_MAIN, + Scope: syscall.RT_SCOPE_UNIVERSE, + Protocol: syscall.RTPROT_BOOT, + Type: syscall.RTN_UNICAST, + }, + } +} + +func NewRtDelMsg() *RtMsg { + return &RtMsg{ + RtMsg: syscall.RtMsg{ + Table: syscall.RT_TABLE_MAIN, + Scope: syscall.RT_SCOPE_NOWHERE, + }, + } +} + +func (msg *RtMsg) Len() int { + return syscall.SizeofRtMsg +} + +func DeserializeRtMsg(b []byte) *RtMsg { + return (*RtMsg)(unsafe.Pointer(&b[0:syscall.SizeofRtMsg][0])) +} + +func (msg *RtMsg) Serialize() []byte { + return (*(*[syscall.SizeofRtMsg]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/tc_linux.go b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go new file mode 100644 index 0000000..c9bfe8d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go @@ -0,0 +1,359 @@ +package nl + +import ( + "unsafe" +) + +// Message types +const ( + TCA_UNSPEC = iota + TCA_KIND + TCA_OPTIONS + TCA_STATS + TCA_XSTATS + TCA_RATE + TCA_FCNT + TCA_STATS2 + TCA_STAB + TCA_MAX = TCA_STAB +) + +const ( + TCA_ACT_TAB = 1 + TCAA_MAX = 1 +) + +const ( + TCA_PRIO_UNSPEC = iota + TCA_PRIO_MQ + TCA_PRIO_MAX = TCA_PRIO_MQ +) + +const ( + SizeofTcMsg = 0x14 + SizeofTcActionMsg = 0x04 + SizeofTcPrioMap = 0x14 + SizeofTcRateSpec = 0x0c + SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c + SizeofTcU32Key = 0x10 + SizeofTcU32Sel = 0x10 // without keys + SizeofTcMirred = 0x1c +) + +// struct tcmsg { +// unsigned char tcm_family; +// unsigned char tcm__pad1; +// unsigned short tcm__pad2; +// int tcm_ifindex; +// __u32 tcm_handle; +// __u32 tcm_parent; +// __u32 tcm_info; +// }; + +type TcMsg struct { + Family uint8 + Pad [3]byte + Ifindex int32 + Handle uint32 + Parent uint32 + Info uint32 +} + +func (msg *TcMsg) Len() int { + return SizeofTcMsg +} + +func DeserializeTcMsg(b []byte) *TcMsg { + return (*TcMsg)(unsafe.Pointer(&b[0:SizeofTcMsg][0])) +} + +func (x *TcMsg) Serialize() []byte { + return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:] +} + +// struct tcamsg { +// unsigned char tca_family; +// unsigned char tca__pad1; +// unsigned short tca__pad2; +// }; + +type TcActionMsg struct { + Family uint8 + Pad [3]byte +} + +func (msg *TcActionMsg) Len() int { + return SizeofTcActionMsg +} + +func DeserializeTcActionMsg(b []byte) *TcActionMsg { + return (*TcActionMsg)(unsafe.Pointer(&b[0:SizeofTcActionMsg][0])) +} + +func (x *TcActionMsg) Serialize() []byte { + return (*(*[SizeofTcActionMsg]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TC_PRIO_MAX = 15 +) + +// struct tc_prio_qopt { +// int bands; /* Number of bands */ +// __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +// }; + +type TcPrioMap struct { + Bands int32 + Priomap [TC_PRIO_MAX + 1]uint8 +} + +func (msg *TcPrioMap) Len() int { + return SizeofTcPrioMap +} + +func DeserializeTcPrioMap(b []byte) *TcPrioMap { + return (*TcPrioMap)(unsafe.Pointer(&b[0:SizeofTcPrioMap][0])) +} + +func (x *TcPrioMap) Serialize() []byte { + return (*(*[SizeofTcPrioMap]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_TBF_UNSPEC = iota + TCA_TBF_PARMS + TCA_TBF_RTAB + TCA_TBF_PTAB + TCA_TBF_RATE64 + TCA_TBF_PRATE64 + TCA_TBF_BURST + TCA_TBF_PBURST + TCA_TBF_MAX = TCA_TBF_PBURST +) + +// struct tc_ratespec { +// unsigned char cell_log; +// __u8 linklayer; /* lower 4 bits */ +// unsigned short overhead; +// short cell_align; +// unsigned short mpu; +// __u32 rate; +// }; + +type TcRateSpec struct { + CellLog uint8 + Linklayer uint8 + Overhead uint16 + CellAlign int16 + Mpu uint16 + Rate uint32 +} + +func (msg *TcRateSpec) Len() int { + return SizeofTcRateSpec +} + +func DeserializeTcRateSpec(b []byte) *TcRateSpec { + return (*TcRateSpec)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0])) +} + +func (x *TcRateSpec) Serialize() []byte { + return (*(*[SizeofTcRateSpec]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_tbf_qopt { +// struct tc_ratespec rate; +// struct tc_ratespec peakrate; +// __u32 limit; +// __u32 buffer; +// __u32 mtu; +// }; + +type TcTbfQopt struct { + Rate TcRateSpec + Peakrate TcRateSpec + Limit uint32 + Buffer uint32 + Mtu uint32 +} + +func (msg *TcTbfQopt) Len() int { + return SizeofTcTbfQopt +} + +func DeserializeTcTbfQopt(b []byte) *TcTbfQopt { + return (*TcTbfQopt)(unsafe.Pointer(&b[0:SizeofTcTbfQopt][0])) +} + +func (x *TcTbfQopt) Serialize() []byte { + return (*(*[SizeofTcTbfQopt]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_U32_UNSPEC = iota + TCA_U32_CLASSID + TCA_U32_HASH + TCA_U32_LINK + TCA_U32_DIVISOR + TCA_U32_SEL + TCA_U32_POLICE + TCA_U32_ACT + TCA_U32_INDEV + TCA_U32_PCNT + TCA_U32_MARK + TCA_U32_MAX = TCA_U32_MARK +) + +// struct tc_u32_key { +// __be32 mask; +// __be32 val; +// int off; +// int offmask; +// }; + +type TcU32Key struct { + Mask uint32 // big endian + Val uint32 // big endian + Off int32 + OffMask int32 +} + +func (msg *TcU32Key) Len() int { + return SizeofTcU32Key +} + +func DeserializeTcU32Key(b []byte) *TcU32Key { + return (*TcU32Key)(unsafe.Pointer(&b[0:SizeofTcU32Key][0])) +} + +func (x *TcU32Key) Serialize() []byte { + return (*(*[SizeofTcU32Key]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_u32_sel { +// unsigned char flags; +// unsigned char offshift; +// unsigned char nkeys; +// +// __be16 offmask; +// __u16 off; +// short offoff; +// +// short hoff; +// __be32 hmask; +// struct tc_u32_key keys[0]; +// }; + +const ( + TC_U32_TERMINAL = 1 << iota + TC_U32_OFFSET = 1 << iota + TC_U32_VAROFFSET = 1 << iota + TC_U32_EAT = 1 << iota +) + +type TcU32Sel struct { + Flags uint8 + Offshift uint8 + Nkeys uint8 + Pad uint8 + Offmask uint16 // big endian + Off uint16 + Offoff int16 + Hoff int16 + Hmask uint32 // big endian + Keys []TcU32Key +} + +func (msg *TcU32Sel) Len() int { + return SizeofTcU32Sel + int(msg.Nkeys)*SizeofTcU32Key +} + +func DeserializeTcU32Sel(b []byte) *TcU32Sel { + x := &TcU32Sel{} + copy((*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:], b) + next := SizeofTcU32Sel + var i uint8 + for i = 0; i < x.Nkeys; i++ { + x.Keys = append(x.Keys, *DeserializeTcU32Key(b[next:])) + next += SizeofTcU32Key + } + return x +} + +func (x *TcU32Sel) Serialize() []byte { + // This can't just unsafe.cast because it must iterate through keys. + buf := make([]byte, x.Len()) + copy(buf, (*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:]) + next := SizeofTcU32Sel + for _, key := range x.Keys { + keyBuf := key.Serialize() + copy(buf[next:], keyBuf) + next += SizeofTcU32Key + } + return buf +} + +const ( + TCA_ACT_MIRRED = 8 +) + +const ( + TCA_MIRRED_UNSPEC = iota + TCA_MIRRED_TM + TCA_MIRRED_PARMS + TCA_MIRRED_MAX = TCA_MIRRED_PARMS +) + +const ( + TCA_EGRESS_REDIR = 1 /* packet redirect to EGRESS*/ + TCA_EGRESS_MIRROR = 2 /* mirror packet to EGRESS */ + TCA_INGRESS_REDIR = 3 /* packet redirect to INGRESS*/ + TCA_INGRESS_MIRROR = 4 /* mirror packet to INGRESS */ +) + +const ( + TC_ACT_UNSPEC = int32(-1) + TC_ACT_OK = 0 + TC_ACT_RECLASSIFY = 1 + TC_ACT_SHOT = 2 + TC_ACT_PIPE = 3 + TC_ACT_STOLEN = 4 + TC_ACT_QUEUED = 5 + TC_ACT_REPEAT = 6 + TC_ACT_JUMP = 0x10000000 +) + +// #define tc_gen \ +// __u32 index; \ +// __u32 capab; \ +// int action; \ +// int refcnt; \ +// int bindcnt +// struct tc_mirred { +// tc_gen; +// int eaction; /* one of IN/EGRESS_MIRROR/REDIR */ +// __u32 ifindex; /* ifindex of egress port */ +// }; + +type TcMirred struct { + Index uint32 + Capab uint32 + Action int32 + Refcnt int32 + Bindcnt int32 + Eaction int32 + Ifindex uint32 +} + +func (msg *TcMirred) Len() int { + return SizeofTcMirred +} + +func DeserializeTcMirred(b []byte) *TcMirred { + return (*TcMirred)(unsafe.Pointer(&b[0:SizeofTcMirred][0])) +} + +func (x *TcMirred) Serialize() []byte { + return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go new file mode 100644 index 0000000..d24637d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go @@ -0,0 +1,258 @@ +package nl + +import ( + "bytes" + "net" + "unsafe" +) + +// Infinity for packet and byte counts +const ( + XFRM_INF = ^uint64(0) +) + +// Message Types +const ( + XFRM_MSG_BASE = 0x10 + XFRM_MSG_NEWSA = 0x10 + XFRM_MSG_DELSA = 0x11 + XFRM_MSG_GETSA = 0x12 + XFRM_MSG_NEWPOLICY = 0x13 + XFRM_MSG_DELPOLICY = 0x14 + XFRM_MSG_GETPOLICY = 0x15 + XFRM_MSG_ALLOCSPI = 0x16 + XFRM_MSG_ACQUIRE = 0x17 + XFRM_MSG_EXPIRE = 0x18 + XFRM_MSG_UPDPOLICY = 0x19 + XFRM_MSG_UPDSA = 0x1a + XFRM_MSG_POLEXPIRE = 0x1b + XFRM_MSG_FLUSHSA = 0x1c + XFRM_MSG_FLUSHPOLICY = 0x1d + XFRM_MSG_NEWAE = 0x1e + XFRM_MSG_GETAE = 0x1f + XFRM_MSG_REPORT = 0x20 + XFRM_MSG_MIGRATE = 0x21 + XFRM_MSG_NEWSADINFO = 0x22 + XFRM_MSG_GETSADINFO = 0x23 + XFRM_MSG_NEWSPDINFO = 0x24 + XFRM_MSG_GETSPDINFO = 0x25 + XFRM_MSG_MAPPING = 0x26 + XFRM_MSG_MAX = 0x26 + XFRM_NR_MSGTYPES = 0x17 +) + +// Attribute types +const ( + /* Netlink message attributes. */ + XFRMA_UNSPEC = 0x00 + XFRMA_ALG_AUTH = 0x01 /* struct xfrm_algo */ + XFRMA_ALG_CRYPT = 0x02 /* struct xfrm_algo */ + XFRMA_ALG_COMP = 0x03 /* struct xfrm_algo */ + XFRMA_ENCAP = 0x04 /* struct xfrm_algo + struct xfrm_encap_tmpl */ + XFRMA_TMPL = 0x05 /* 1 or more struct xfrm_user_tmpl */ + XFRMA_SA = 0x06 /* struct xfrm_usersa_info */ + XFRMA_POLICY = 0x07 /* struct xfrm_userpolicy_info */ + XFRMA_SEC_CTX = 0x08 /* struct xfrm_sec_ctx */ + XFRMA_LTIME_VAL = 0x09 + XFRMA_REPLAY_VAL = 0x0a + XFRMA_REPLAY_THRESH = 0x0b + XFRMA_ETIMER_THRESH = 0x0c + XFRMA_SRCADDR = 0x0d /* xfrm_address_t */ + XFRMA_COADDR = 0x0e /* xfrm_address_t */ + XFRMA_LASTUSED = 0x0f /* unsigned long */ + XFRMA_POLICY_TYPE = 0x10 /* struct xfrm_userpolicy_type */ + XFRMA_MIGRATE = 0x11 + XFRMA_ALG_AEAD = 0x12 /* struct xfrm_algo_aead */ + XFRMA_KMADDRESS = 0x13 /* struct xfrm_user_kmaddress */ + XFRMA_ALG_AUTH_TRUNC = 0x14 /* struct xfrm_algo_auth */ + XFRMA_MARK = 0x15 /* struct xfrm_mark */ + XFRMA_TFCPAD = 0x16 /* __u32 */ + XFRMA_REPLAY_ESN_VAL = 0x17 /* struct xfrm_replay_esn */ + XFRMA_SA_EXTRA_FLAGS = 0x18 /* __u32 */ + XFRMA_MAX = 0x18 +) + +const ( + SizeofXfrmAddress = 0x10 + SizeofXfrmSelector = 0x38 + SizeofXfrmLifetimeCfg = 0x40 + SizeofXfrmLifetimeCur = 0x20 + SizeofXfrmId = 0x18 +) + +// typedef union { +// __be32 a4; +// __be32 a6[4]; +// } xfrm_address_t; + +type XfrmAddress [SizeofXfrmAddress]byte + +func (x *XfrmAddress) ToIP() net.IP { + var empty = [12]byte{} + ip := make(net.IP, net.IPv6len) + if bytes.Equal(x[4:16], empty[:]) { + ip[10] = 0xff + ip[11] = 0xff + copy(ip[12:16], x[0:4]) + } else { + copy(ip[:], x[:]) + } + return ip +} + +func (x *XfrmAddress) ToIPNet(prefixlen uint8) *net.IPNet { + ip := x.ToIP() + if GetIPFamily(ip) == FAMILY_V4 { + return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 32)} + } + return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 128)} +} + +func (x *XfrmAddress) FromIP(ip net.IP) { + var empty = [16]byte{} + if len(ip) < net.IPv4len { + copy(x[4:16], empty[:]) + } else if GetIPFamily(ip) == FAMILY_V4 { + copy(x[0:4], ip.To4()[0:4]) + copy(x[4:16], empty[:12]) + } else { + copy(x[0:16], ip.To16()[0:16]) + } +} + +func DeserializeXfrmAddress(b []byte) *XfrmAddress { + return (*XfrmAddress)(unsafe.Pointer(&b[0:SizeofXfrmAddress][0])) +} + +func (x *XfrmAddress) Serialize() []byte { + return (*(*[SizeofXfrmAddress]byte)(unsafe.Pointer(x)))[:] +} + +// struct xfrm_selector { +// xfrm_address_t daddr; +// xfrm_address_t saddr; +// __be16 dport; +// __be16 dport_mask; +// __be16 sport; +// __be16 sport_mask; +// __u16 family; +// __u8 prefixlen_d; +// __u8 prefixlen_s; +// __u8 proto; +// int ifindex; +// __kernel_uid32_t user; +// }; + +type XfrmSelector struct { + Daddr XfrmAddress + Saddr XfrmAddress + Dport uint16 // big endian + DportMask uint16 // big endian + Sport uint16 // big endian + SportMask uint16 // big endian + Family uint16 + PrefixlenD uint8 + PrefixlenS uint8 + Proto uint8 + Pad [3]byte + Ifindex int32 + User uint32 +} + +func (msg *XfrmSelector) Len() int { + return SizeofXfrmSelector +} + +func DeserializeXfrmSelector(b []byte) *XfrmSelector { + return (*XfrmSelector)(unsafe.Pointer(&b[0:SizeofXfrmSelector][0])) +} + +func (msg *XfrmSelector) Serialize() []byte { + return (*(*[SizeofXfrmSelector]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_lifetime_cfg { +// __u64 soft_byte_limit; +// __u64 hard_byte_limit; +// __u64 soft_packet_limit; +// __u64 hard_packet_limit; +// __u64 soft_add_expires_seconds; +// __u64 hard_add_expires_seconds; +// __u64 soft_use_expires_seconds; +// __u64 hard_use_expires_seconds; +// }; +// + +type XfrmLifetimeCfg struct { + SoftByteLimit uint64 + HardByteLimit uint64 + SoftPacketLimit uint64 + HardPacketLimit uint64 + SoftAddExpiresSeconds uint64 + HardAddExpiresSeconds uint64 + SoftUseExpiresSeconds uint64 + HardUseExpiresSeconds uint64 +} + +func (msg *XfrmLifetimeCfg) Len() int { + return SizeofXfrmLifetimeCfg +} + +func DeserializeXfrmLifetimeCfg(b []byte) *XfrmLifetimeCfg { + return (*XfrmLifetimeCfg)(unsafe.Pointer(&b[0:SizeofXfrmLifetimeCfg][0])) +} + +func (msg *XfrmLifetimeCfg) Serialize() []byte { + return (*(*[SizeofXfrmLifetimeCfg]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_lifetime_cur { +// __u64 bytes; +// __u64 packets; +// __u64 add_time; +// __u64 use_time; +// }; + +type XfrmLifetimeCur struct { + Bytes uint64 + Packets uint64 + AddTime uint64 + UseTime uint64 +} + +func (msg *XfrmLifetimeCur) Len() int { + return SizeofXfrmLifetimeCur +} + +func DeserializeXfrmLifetimeCur(b []byte) *XfrmLifetimeCur { + return (*XfrmLifetimeCur)(unsafe.Pointer(&b[0:SizeofXfrmLifetimeCur][0])) +} + +func (msg *XfrmLifetimeCur) Serialize() []byte { + return (*(*[SizeofXfrmLifetimeCur]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_id { +// xfrm_address_t daddr; +// __be32 spi; +// __u8 proto; +// }; + +type XfrmId struct { + Daddr XfrmAddress + Spi uint32 // big endian + Proto uint8 + Pad [3]byte +} + +func (msg *XfrmId) Len() int { + return SizeofXfrmId +} + +func DeserializeXfrmId(b []byte) *XfrmId { + return (*XfrmId)(unsafe.Pointer(&b[0:SizeofXfrmId][0])) +} + +func (msg *XfrmId) Serialize() []byte { + return (*(*[SizeofXfrmId]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_policy_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_policy_linux.go new file mode 100644 index 0000000..66f7e03 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_policy_linux.go @@ -0,0 +1,119 @@ +package nl + +import ( + "unsafe" +) + +const ( + SizeofXfrmUserpolicyId = 0x40 + SizeofXfrmUserpolicyInfo = 0xa8 + SizeofXfrmUserTmpl = 0x40 +) + +// struct xfrm_userpolicy_id { +// struct xfrm_selector sel; +// __u32 index; +// __u8 dir; +// }; +// + +type XfrmUserpolicyId struct { + Sel XfrmSelector + Index uint32 + Dir uint8 + Pad [3]byte +} + +func (msg *XfrmUserpolicyId) Len() int { + return SizeofXfrmUserpolicyId +} + +func DeserializeXfrmUserpolicyId(b []byte) *XfrmUserpolicyId { + return (*XfrmUserpolicyId)(unsafe.Pointer(&b[0:SizeofXfrmUserpolicyId][0])) +} + +func (msg *XfrmUserpolicyId) Serialize() []byte { + return (*(*[SizeofXfrmUserpolicyId]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_userpolicy_info { +// struct xfrm_selector sel; +// struct xfrm_lifetime_cfg lft; +// struct xfrm_lifetime_cur curlft; +// __u32 priority; +// __u32 index; +// __u8 dir; +// __u8 action; +// #define XFRM_POLICY_ALLOW 0 +// #define XFRM_POLICY_BLOCK 1 +// __u8 flags; +// #define XFRM_POLICY_LOCALOK 1 /* Allow user to override global policy */ +// /* Automatically expand selector to include matching ICMP payloads. */ +// #define XFRM_POLICY_ICMP 2 +// __u8 share; +// }; + +type XfrmUserpolicyInfo struct { + Sel XfrmSelector + Lft XfrmLifetimeCfg + Curlft XfrmLifetimeCur + Priority uint32 + Index uint32 + Dir uint8 + Action uint8 + Flags uint8 + Share uint8 + Pad [4]byte +} + +func (msg *XfrmUserpolicyInfo) Len() int { + return SizeofXfrmUserpolicyInfo +} + +func DeserializeXfrmUserpolicyInfo(b []byte) *XfrmUserpolicyInfo { + return (*XfrmUserpolicyInfo)(unsafe.Pointer(&b[0:SizeofXfrmUserpolicyInfo][0])) +} + +func (msg *XfrmUserpolicyInfo) Serialize() []byte { + return (*(*[SizeofXfrmUserpolicyInfo]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_user_tmpl { +// struct xfrm_id id; +// __u16 family; +// xfrm_address_t saddr; +// __u32 reqid; +// __u8 mode; +// __u8 share; +// __u8 optional; +// __u32 aalgos; +// __u32 ealgos; +// __u32 calgos; +// } + +type XfrmUserTmpl struct { + XfrmId XfrmId + Family uint16 + Pad1 [2]byte + Saddr XfrmAddress + Reqid uint32 + Mode uint8 + Share uint8 + Optional uint8 + Pad2 byte + Aalgos uint32 + Ealgos uint32 + Calgos uint32 +} + +func (msg *XfrmUserTmpl) Len() int { + return SizeofXfrmUserTmpl +} + +func DeserializeXfrmUserTmpl(b []byte) *XfrmUserTmpl { + return (*XfrmUserTmpl)(unsafe.Pointer(&b[0:SizeofXfrmUserTmpl][0])) +} + +func (msg *XfrmUserTmpl) Serialize() []byte { + return (*(*[SizeofXfrmUserTmpl]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go new file mode 100644 index 0000000..4876ce4 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go @@ -0,0 +1,221 @@ +package nl + +import ( + "unsafe" +) + +const ( + SizeofXfrmUsersaId = 0x18 + SizeofXfrmStats = 0x0c + SizeofXfrmUsersaInfo = 0xe0 + SizeofXfrmAlgo = 0x44 + SizeofXfrmAlgoAuth = 0x48 + SizeofXfrmEncapTmpl = 0x18 +) + +// struct xfrm_usersa_id { +// xfrm_address_t daddr; +// __be32 spi; +// __u16 family; +// __u8 proto; +// }; + +type XfrmUsersaId struct { + Daddr XfrmAddress + Spi uint32 // big endian + Family uint16 + Proto uint8 + Pad byte +} + +func (msg *XfrmUsersaId) Len() int { + return SizeofXfrmUsersaId +} + +func DeserializeXfrmUsersaId(b []byte) *XfrmUsersaId { + return (*XfrmUsersaId)(unsafe.Pointer(&b[0:SizeofXfrmUsersaId][0])) +} + +func (msg *XfrmUsersaId) Serialize() []byte { + return (*(*[SizeofXfrmUsersaId]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_stats { +// __u32 replay_window; +// __u32 replay; +// __u32 integrity_failed; +// }; + +type XfrmStats struct { + ReplayWindow uint32 + Replay uint32 + IntegrityFailed uint32 +} + +func (msg *XfrmStats) Len() int { + return SizeofXfrmStats +} + +func DeserializeXfrmStats(b []byte) *XfrmStats { + return (*XfrmStats)(unsafe.Pointer(&b[0:SizeofXfrmStats][0])) +} + +func (msg *XfrmStats) Serialize() []byte { + return (*(*[SizeofXfrmStats]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_usersa_info { +// struct xfrm_selector sel; +// struct xfrm_id id; +// xfrm_address_t saddr; +// struct xfrm_lifetime_cfg lft; +// struct xfrm_lifetime_cur curlft; +// struct xfrm_stats stats; +// __u32 seq; +// __u32 reqid; +// __u16 family; +// __u8 mode; /* XFRM_MODE_xxx */ +// __u8 replay_window; +// __u8 flags; +// #define XFRM_STATE_NOECN 1 +// #define XFRM_STATE_DECAP_DSCP 2 +// #define XFRM_STATE_NOPMTUDISC 4 +// #define XFRM_STATE_WILDRECV 8 +// #define XFRM_STATE_ICMP 16 +// #define XFRM_STATE_AF_UNSPEC 32 +// #define XFRM_STATE_ALIGN4 64 +// #define XFRM_STATE_ESN 128 +// }; +// +// #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 +// + +type XfrmUsersaInfo struct { + Sel XfrmSelector + Id XfrmId + Saddr XfrmAddress + Lft XfrmLifetimeCfg + Curlft XfrmLifetimeCur + Stats XfrmStats + Seq uint32 + Reqid uint32 + Family uint16 + Mode uint8 + ReplayWindow uint8 + Flags uint8 + Pad [7]byte +} + +func (msg *XfrmUsersaInfo) Len() int { + return SizeofXfrmUsersaInfo +} + +func DeserializeXfrmUsersaInfo(b []byte) *XfrmUsersaInfo { + return (*XfrmUsersaInfo)(unsafe.Pointer(&b[0:SizeofXfrmUsersaInfo][0])) +} + +func (msg *XfrmUsersaInfo) Serialize() []byte { + return (*(*[SizeofXfrmUsersaInfo]byte)(unsafe.Pointer(msg)))[:] +} + +// struct xfrm_algo { +// char alg_name[64]; +// unsigned int alg_key_len; /* in bits */ +// char alg_key[0]; +// }; + +type XfrmAlgo struct { + AlgName [64]byte + AlgKeyLen uint32 + AlgKey []byte +} + +func (msg *XfrmAlgo) Len() int { + return SizeofXfrmAlgo + int(msg.AlgKeyLen/8) +} + +func DeserializeXfrmAlgo(b []byte) *XfrmAlgo { + ret := XfrmAlgo{} + copy(ret.AlgName[:], b[0:64]) + ret.AlgKeyLen = *(*uint32)(unsafe.Pointer(&b[64])) + ret.AlgKey = b[68:ret.Len()] + return &ret +} + +func (msg *XfrmAlgo) Serialize() []byte { + b := make([]byte, msg.Len()) + copy(b[0:64], msg.AlgName[:]) + copy(b[64:68], (*(*[4]byte)(unsafe.Pointer(&msg.AlgKeyLen)))[:]) + copy(b[68:msg.Len()], msg.AlgKey[:]) + return b +} + +// struct xfrm_algo_auth { +// char alg_name[64]; +// unsigned int alg_key_len; /* in bits */ +// unsigned int alg_trunc_len; /* in bits */ +// char alg_key[0]; +// }; + +type XfrmAlgoAuth struct { + AlgName [64]byte + AlgKeyLen uint32 + AlgTruncLen uint32 + AlgKey []byte +} + +func (msg *XfrmAlgoAuth) Len() int { + return SizeofXfrmAlgoAuth + int(msg.AlgKeyLen/8) +} + +func DeserializeXfrmAlgoAuth(b []byte) *XfrmAlgoAuth { + ret := XfrmAlgoAuth{} + copy(ret.AlgName[:], b[0:64]) + ret.AlgKeyLen = *(*uint32)(unsafe.Pointer(&b[64])) + ret.AlgTruncLen = *(*uint32)(unsafe.Pointer(&b[68])) + ret.AlgKey = b[72:ret.Len()] + return &ret +} + +func (msg *XfrmAlgoAuth) Serialize() []byte { + b := make([]byte, msg.Len()) + copy(b[0:64], msg.AlgName[:]) + copy(b[64:68], (*(*[4]byte)(unsafe.Pointer(&msg.AlgKeyLen)))[:]) + copy(b[68:72], (*(*[4]byte)(unsafe.Pointer(&msg.AlgTruncLen)))[:]) + copy(b[72:msg.Len()], msg.AlgKey[:]) + return b +} + +// struct xfrm_algo_aead { +// char alg_name[64]; +// unsigned int alg_key_len; /* in bits */ +// unsigned int alg_icv_len; /* in bits */ +// char alg_key[0]; +// } + +// struct xfrm_encap_tmpl { +// __u16 encap_type; +// __be16 encap_sport; +// __be16 encap_dport; +// xfrm_address_t encap_oa; +// }; + +type XfrmEncapTmpl struct { + EncapType uint16 + EncapSport uint16 // big endian + EncapDport uint16 // big endian + Pad [2]byte + EncapOa XfrmAddress +} + +func (msg *XfrmEncapTmpl) Len() int { + return SizeofXfrmEncapTmpl +} + +func DeserializeXfrmEncapTmpl(b []byte) *XfrmEncapTmpl { + return (*XfrmEncapTmpl)(unsafe.Pointer(&b[0:SizeofXfrmEncapTmpl][0])) +} + +func (msg *XfrmEncapTmpl) Serialize() []byte { + return (*(*[SizeofXfrmEncapTmpl]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/protinfo.go b/vendor/github.com/vishvananda/netlink/protinfo.go new file mode 100644 index 0000000..f39ab8f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/protinfo.go @@ -0,0 +1,53 @@ +package netlink + +import ( + "strings" +) + +// Protinfo represents bridge flags from netlink. +type Protinfo struct { + Hairpin bool + Guard bool + FastLeave bool + RootBlock bool + Learning bool + Flood bool +} + +// String returns a list of enabled flags +func (prot *Protinfo) String() string { + var boolStrings []string + if prot.Hairpin { + boolStrings = append(boolStrings, "Hairpin") + } + if prot.Guard { + boolStrings = append(boolStrings, "Guard") + } + if prot.FastLeave { + boolStrings = append(boolStrings, "FastLeave") + } + if prot.RootBlock { + boolStrings = append(boolStrings, "RootBlock") + } + if prot.Learning { + boolStrings = append(boolStrings, "Learning") + } + if prot.Flood { + boolStrings = append(boolStrings, "Flood") + } + return strings.Join(boolStrings, " ") +} + +func boolToByte(x bool) []byte { + if x { + return []byte{1} + } + return []byte{0} +} + +func byteToBool(x byte) bool { + if uint8(x) != 0 { + return true + } + return false +} diff --git a/vendor/github.com/vishvananda/netlink/protinfo_linux.go b/vendor/github.com/vishvananda/netlink/protinfo_linux.go new file mode 100644 index 0000000..7181eba --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/protinfo_linux.go @@ -0,0 +1,60 @@ +package netlink + +import ( + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +func LinkGetProtinfo(link Link) (Protinfo, error) { + base := link.Attrs() + ensureIndex(base) + var pi Protinfo + req := nl.NewNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP) + msg := nl.NewIfInfomsg(syscall.AF_BRIDGE) + req.AddData(msg) + msgs, err := req.Execute(syscall.NETLINK_ROUTE, 0) + if err != nil { + return pi, err + } + + for _, m := range msgs { + ans := nl.DeserializeIfInfomsg(m) + if int(ans.Index) != base.Index { + continue + } + attrs, err := nl.ParseRouteAttr(m[ans.Len():]) + if err != nil { + return pi, err + } + for _, attr := range attrs { + if attr.Attr.Type != syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED { + continue + } + infos, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return pi, err + } + var pi Protinfo + for _, info := range infos { + switch info.Attr.Type { + case nl.IFLA_BRPORT_MODE: + pi.Hairpin = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_GUARD: + pi.Guard = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_FAST_LEAVE: + pi.FastLeave = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_PROTECT: + pi.RootBlock = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_LEARNING: + pi.Learning = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_UNICAST_FLOOD: + pi.Flood = byteToBool(info.Value[0]) + } + } + return pi, nil + } + } + return pi, fmt.Errorf("Device with index %d not found", base.Index) +} diff --git a/vendor/github.com/vishvananda/netlink/qdisc.go b/vendor/github.com/vishvananda/netlink/qdisc.go new file mode 100644 index 0000000..8e3d020 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/qdisc.go @@ -0,0 +1,138 @@ +package netlink + +import ( + "fmt" +) + +const ( + HANDLE_NONE = 0 + HANDLE_INGRESS = 0xFFFFFFF1 + HANDLE_ROOT = 0xFFFFFFFF + PRIORITY_MAP_LEN = 16 +) + +type Qdisc interface { + Attrs() *QdiscAttrs + Type() string +} + +// Qdisc represents a netlink qdisc. A qdisc is associated with a link, +// has a handle, a parent and a refcnt. The root qdisc of a device should +// have parent == HANDLE_ROOT. +type QdiscAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Refcnt uint32 // read only +} + +func (q QdiscAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt) +} + +func MakeHandle(major, minor uint16) uint32 { + return (uint32(major) << 16) | uint32(minor) +} + +func MajorMinor(handle uint32) (uint16, uint16) { + return uint16((handle & 0xFFFF0000) >> 16), uint16(handle & 0x0000FFFFF) +} + +func HandleStr(handle uint32) string { + switch handle { + case HANDLE_NONE: + return "none" + case HANDLE_INGRESS: + return "ingress" + case HANDLE_ROOT: + return "root" + default: + major, minor := MajorMinor(handle) + return fmt.Sprintf("%x:%x", major, minor) + } +} + +// PfifoFast is the default qdisc created by the kernel if one has not +// been defined for the interface +type PfifoFast struct { + QdiscAttrs + Bands uint8 + PriorityMap [PRIORITY_MAP_LEN]uint8 +} + +func (qdisc *PfifoFast) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *PfifoFast) Type() string { + return "pfifo_fast" +} + +// Prio is a basic qdisc that works just like PfifoFast +type Prio struct { + QdiscAttrs + Bands uint8 + PriorityMap [PRIORITY_MAP_LEN]uint8 +} + +func NewPrio(attrs QdiscAttrs) *Prio { + return &Prio{ + QdiscAttrs: attrs, + Bands: 3, + PriorityMap: [PRIORITY_MAP_LEN]uint8{1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}, + } +} + +func (qdisc *Prio) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Prio) Type() string { + return "prio" +} + +// Tbf is a classful qdisc that rate limits based on tokens +type Tbf struct { + QdiscAttrs + // TODO: handle 64bit rate properly + Rate uint64 + Limit uint32 + Buffer uint32 + // TODO: handle other settings +} + +func (qdisc *Tbf) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Tbf) Type() string { + return "tbf" +} + +// Ingress is a qdisc for adding ingress filters +type Ingress struct { + QdiscAttrs +} + +func (qdisc *Ingress) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Ingress) Type() string { + return "ingress" +} + +// GenericQdisc qdiscs represent types that are not currently understood +// by this netlink library. +type GenericQdisc struct { + QdiscAttrs + QdiscType string +} + +func (qdisc *GenericQdisc) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *GenericQdisc) Type() string { + return qdisc.QdiscType +} diff --git a/vendor/github.com/vishvananda/netlink/qdisc_linux.go b/vendor/github.com/vishvananda/netlink/qdisc_linux.go new file mode 100644 index 0000000..2531c9d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/qdisc_linux.go @@ -0,0 +1,263 @@ +package netlink + +import ( + "fmt" + "io/ioutil" + "strconv" + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// QdiscDel will delete a qdisc from the system. +// Equivalent to: `tc qdisc del $qdisc` +func QdiscDel(qdisc Qdisc) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELQDISC, syscall.NLM_F_ACK) + base := qdisc.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// QdiscAdd will add a qdisc to the system. +// Equivalent to: `tc qdisc add $qdisc` +func QdiscAdd(qdisc Qdisc) error { + req := nl.NewNetlinkRequest(syscall.RTM_NEWQDISC, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + base := qdisc.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + if prio, ok := qdisc.(*Prio); ok { + tcmap := nl.TcPrioMap{ + Bands: int32(prio.Bands), + Priomap: prio.PriorityMap, + } + options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize()) + } else if tbf, ok := qdisc.(*Tbf); ok { + opt := nl.TcTbfQopt{} + // TODO: handle rate > uint32 + opt.Rate.Rate = uint32(tbf.Rate) + opt.Limit = tbf.Limit + opt.Buffer = tbf.Buffer + nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize()) + } else if _, ok := qdisc.(*Ingress); ok { + // ingress filters must use the proper handle + if msg.Parent != HANDLE_INGRESS { + return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS") + } + } + req.AddData(options) + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// QdiscList gets a list of qdiscs in the system. +// Equivalent to: `tc qdisc show`. +// The list can be filtered by link. +func QdiscList(link Link) ([]Qdisc, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP) + index := int32(0) + if link != nil { + base := link.Attrs() + ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + } + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC) + if err != nil { + return nil, err + } + + var res []Qdisc + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + // skip qdiscs from other interfaces + if link != nil && msg.Ifindex != index { + continue + } + + base := QdiscAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + Refcnt: msg.Info, + } + var qdisc Qdisc + qdiscType := "" + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + qdiscType = string(attr.Value[:len(attr.Value)-1]) + switch qdiscType { + case "pfifo_fast": + qdisc = &PfifoFast{} + case "prio": + qdisc = &Prio{} + case "tbf": + qdisc = &Tbf{} + case "ingress": + qdisc = &Ingress{} + default: + qdisc = &GenericQdisc{QdiscType: qdiscType} + } + case nl.TCA_OPTIONS: + switch qdiscType { + case "pfifo_fast": + // pfifo returns TcPrioMap directly without wrapping it in rtattr + if err := parsePfifoFastData(qdisc, attr.Value); err != nil { + return nil, err + } + case "prio": + // prio returns TcPrioMap directly without wrapping it in rtattr + if err := parsePrioData(qdisc, attr.Value); err != nil { + return nil, err + } + case "tbf": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseTbfData(qdisc, data); err != nil { + return nil, err + } + // no options for ingress + } + } + } + *qdisc.Attrs() = base + res = append(res, qdisc) + } + + return res, nil +} + +func parsePfifoFastData(qdisc Qdisc, value []byte) error { + pfifo := qdisc.(*PfifoFast) + tcmap := nl.DeserializeTcPrioMap(value) + pfifo.PriorityMap = tcmap.Priomap + pfifo.Bands = uint8(tcmap.Bands) + return nil +} + +func parsePrioData(qdisc Qdisc, value []byte) error { + prio := qdisc.(*Prio) + tcmap := nl.DeserializeTcPrioMap(value) + prio.PriorityMap = tcmap.Priomap + prio.Bands = uint8(tcmap.Bands) + return nil +} + +func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + native = nl.NativeEndian() + tbf := qdisc.(*Tbf) + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_TBF_PARMS: + opt := nl.DeserializeTcTbfQopt(datum.Value) + tbf.Rate = uint64(opt.Rate.Rate) + tbf.Limit = opt.Limit + tbf.Buffer = opt.Buffer + case nl.TCA_TBF_RATE64: + tbf.Rate = native.Uint64(datum.Value[0:4]) + } + } + return nil +} + +const ( + TIME_UNITS_PER_SEC = 1000000 +) + +var ( + tickInUsec float64 = 0.0 + clockFactor float64 = 0.0 +) + +func initClock() { + data, err := ioutil.ReadFile("/proc/net/psched") + if err != nil { + return + } + parts := strings.Split(strings.TrimSpace(string(data)), " ") + if len(parts) < 3 { + return + } + var vals [3]uint64 + for i := range vals { + val, err := strconv.ParseUint(parts[i], 16, 32) + if err != nil { + return + } + vals[i] = val + } + // compatibility + if vals[2] == 1000000000 { + vals[0] = vals[1] + } + clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC + tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor +} + +func TickInUsec() float64 { + if tickInUsec == 0.0 { + initClock() + } + return tickInUsec +} + +func ClockFactor() float64 { + if clockFactor == 0.0 { + initClock() + } + return clockFactor +} + +func time2Tick(time uint32) uint32 { + return uint32(float64(time) * TickInUsec()) +} + +func tick2Time(tick uint32) uint32 { + return uint32(float64(tick) / TickInUsec()) +} + +func time2Ktime(time uint32) uint32 { + return uint32(float64(time) * ClockFactor()) +} + +func ktime2Time(ktime uint32) uint32 { + return uint32(float64(ktime) / ClockFactor()) +} + +func burst(rate uint64, buffer uint32) uint32 { + return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC) +} + +func latency(rate uint64, limit, buffer uint32) float64 { + return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer)) +} diff --git a/vendor/github.com/vishvananda/netlink/route.go b/vendor/github.com/vishvananda/netlink/route.go new file mode 100644 index 0000000..6218546 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/route.go @@ -0,0 +1,35 @@ +package netlink + +import ( + "fmt" + "net" + "syscall" +) + +// Scope is an enum representing a route scope. +type Scope uint8 + +const ( + SCOPE_UNIVERSE Scope = syscall.RT_SCOPE_UNIVERSE + SCOPE_SITE Scope = syscall.RT_SCOPE_SITE + SCOPE_LINK Scope = syscall.RT_SCOPE_LINK + SCOPE_HOST Scope = syscall.RT_SCOPE_HOST + SCOPE_NOWHERE Scope = syscall.RT_SCOPE_NOWHERE +) + +// Route represents a netlink route. A route is associated with a link, +// has a destination network, an optional source ip, and optional +// gateway. Advanced route parameters and non-main routing tables are +// currently not supported. +type Route struct { + LinkIndex int + Scope Scope + Dst *net.IPNet + Src net.IP + Gw net.IP +} + +func (r Route) String() string { + return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s}", r.LinkIndex, r.Dst, + r.Src, r.Gw) +} diff --git a/vendor/github.com/vishvananda/netlink/route_linux.go b/vendor/github.com/vishvananda/netlink/route_linux.go new file mode 100644 index 0000000..9e76d44 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/route_linux.go @@ -0,0 +1,225 @@ +package netlink + +import ( + "fmt" + "net" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// RtAttr is shared so it is in netlink_linux.go + +// RouteAdd will add a route to the system. +// Equivalent to: `ip route add $route` +func RouteAdd(route *Route) error { + req := nl.NewNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + return routeHandle(route, req, nl.NewRtMsg()) +} + +// RouteAdd will delete a route from the system. +// Equivalent to: `ip route del $route` +func RouteDel(route *Route) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK) + return routeHandle(route, req, nl.NewRtDelMsg()) +} + +func routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error { + if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil { + return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil") + } + + msg.Scope = uint8(route.Scope) + family := -1 + var rtAttrs []*nl.RtAttr + + if route.Dst != nil && route.Dst.IP != nil { + dstLen, _ := route.Dst.Mask.Size() + msg.Dst_len = uint8(dstLen) + dstFamily := nl.GetIPFamily(route.Dst.IP) + family = dstFamily + var dstData []byte + if dstFamily == FAMILY_V4 { + dstData = route.Dst.IP.To4() + } else { + dstData = route.Dst.IP.To16() + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, dstData)) + } + + if route.Src != nil { + srcFamily := nl.GetIPFamily(route.Src) + if family != -1 && family != srcFamily { + return fmt.Errorf("source and destination ip are not the same IP family") + } + family = srcFamily + var srcData []byte + if srcFamily == FAMILY_V4 { + srcData = route.Src.To4() + } else { + srcData = route.Src.To16() + } + // The commonly used src ip for routes is actually PREFSRC + rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_PREFSRC, srcData)) + } + + if route.Gw != nil { + gwFamily := nl.GetIPFamily(route.Gw) + if family != -1 && family != gwFamily { + return fmt.Errorf("gateway, source, and destination ip are not the same IP family") + } + family = gwFamily + var gwData []byte + if gwFamily == FAMILY_V4 { + gwData = route.Gw.To4() + } else { + gwData = route.Gw.To16() + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_GATEWAY, gwData)) + } + + msg.Family = uint8(family) + + req.AddData(msg) + for _, attr := range rtAttrs { + req.AddData(attr) + } + + var ( + b = make([]byte, 4) + native = nl.NativeEndian() + ) + native.PutUint32(b, uint32(route.LinkIndex)) + + req.AddData(nl.NewRtAttr(syscall.RTA_OIF, b)) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// RouteList gets a list of routes in the system. +// Equivalent to: `ip route show`. +// The list can be filtered by link and ip family. +func RouteList(link Link, family int) ([]Route, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP) + msg := nl.NewIfInfomsg(family) + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWROUTE) + if err != nil { + return nil, err + } + + index := 0 + if link != nil { + base := link.Attrs() + ensureIndex(base) + index = base.Index + } + + native := nl.NativeEndian() + var res []Route +MsgLoop: + for _, m := range msgs { + msg := nl.DeserializeRtMsg(m) + + if msg.Flags&syscall.RTM_F_CLONED != 0 { + // Ignore cloned routes + continue + } + + if msg.Table != syscall.RT_TABLE_MAIN { + // Ignore non-main tables + continue + } + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + route := Route{Scope: Scope(msg.Scope)} + for _, attr := range attrs { + switch attr.Attr.Type { + case syscall.RTA_GATEWAY: + route.Gw = net.IP(attr.Value) + case syscall.RTA_PREFSRC: + route.Src = net.IP(attr.Value) + case syscall.RTA_DST: + route.Dst = &net.IPNet{ + IP: attr.Value, + Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)), + } + case syscall.RTA_OIF: + routeIndex := int(native.Uint32(attr.Value[0:4])) + if link != nil && routeIndex != index { + // Ignore routes from other interfaces + continue MsgLoop + } + route.LinkIndex = routeIndex + } + } + res = append(res, route) + } + + return res, nil +} + +// RouteGet gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get'. +func RouteGet(destination net.IP) ([]Route, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_REQUEST) + family := nl.GetIPFamily(destination) + var destinationData []byte + var bitlen uint8 + if family == FAMILY_V4 { + destinationData = destination.To4() + bitlen = 32 + } else { + destinationData = destination.To16() + bitlen = 128 + } + msg := &nl.RtMsg{} + msg.Family = uint8(family) + msg.Dst_len = bitlen + req.AddData(msg) + + rtaDst := nl.NewRtAttr(syscall.RTA_DST, destinationData) + req.AddData(rtaDst) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWROUTE) + if err != nil { + return nil, err + } + + native := nl.NativeEndian() + var res []Route + for _, m := range msgs { + msg := nl.DeserializeRtMsg(m) + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + route := Route{} + for _, attr := range attrs { + switch attr.Attr.Type { + case syscall.RTA_GATEWAY: + route.Gw = net.IP(attr.Value) + case syscall.RTA_PREFSRC: + route.Src = net.IP(attr.Value) + case syscall.RTA_DST: + route.Dst = &net.IPNet{ + IP: attr.Value, + Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)), + } + case syscall.RTA_OIF: + routeIndex := int(native.Uint32(attr.Value[0:4])) + route.LinkIndex = routeIndex + } + } + res = append(res, route) + } + return res, nil + +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm.go b/vendor/github.com/vishvananda/netlink/xfrm.go new file mode 100644 index 0000000..621ffb6 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm.go @@ -0,0 +1,64 @@ +package netlink + +import ( + "fmt" + "syscall" +) + +// Proto is an enum representing an ipsec protocol. +type Proto uint8 + +const ( + XFRM_PROTO_ROUTE2 Proto = syscall.IPPROTO_ROUTING + XFRM_PROTO_ESP Proto = syscall.IPPROTO_ESP + XFRM_PROTO_AH Proto = syscall.IPPROTO_AH + XFRM_PROTO_HAO Proto = syscall.IPPROTO_DSTOPTS + XFRM_PROTO_COMP Proto = syscall.IPPROTO_COMP + XFRM_PROTO_IPSEC_ANY Proto = syscall.IPPROTO_RAW +) + +func (p Proto) String() string { + switch p { + case XFRM_PROTO_ROUTE2: + return "route2" + case XFRM_PROTO_ESP: + return "esp" + case XFRM_PROTO_AH: + return "ah" + case XFRM_PROTO_HAO: + return "hao" + case XFRM_PROTO_COMP: + return "comp" + case XFRM_PROTO_IPSEC_ANY: + return "ipsec-any" + } + return fmt.Sprintf("%d", p) +} + +// Mode is an enum representing an ipsec transport. +type Mode uint8 + +const ( + XFRM_MODE_TRANSPORT Mode = iota + XFRM_MODE_TUNNEL + XFRM_MODE_ROUTEOPTIMIZATION + XFRM_MODE_IN_TRIGGER + XFRM_MODE_BEET + XFRM_MODE_MAX +) + +func (m Mode) String() string { + switch m { + case XFRM_MODE_TRANSPORT: + return "transport" + case XFRM_MODE_TUNNEL: + return "tunnel" + case XFRM_MODE_ROUTEOPTIMIZATION: + return "ro" + case XFRM_MODE_IN_TRIGGER: + return "in_trigger" + case XFRM_MODE_BEET: + return "beet" + } + return fmt.Sprintf("%d", m) +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy.go b/vendor/github.com/vishvananda/netlink/xfrm_policy.go new file mode 100644 index 0000000..d85c65d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_policy.go @@ -0,0 +1,59 @@ +package netlink + +import ( + "fmt" + "net" +) + +// Dir is an enum representing an ipsec template direction. +type Dir uint8 + +const ( + XFRM_DIR_IN Dir = iota + XFRM_DIR_OUT + XFRM_DIR_FWD + XFRM_SOCKET_IN + XFRM_SOCKET_OUT + XFRM_SOCKET_FWD +) + +func (d Dir) String() string { + switch d { + case XFRM_DIR_IN: + return "dir in" + case XFRM_DIR_OUT: + return "dir out" + case XFRM_DIR_FWD: + return "dir fwd" + case XFRM_SOCKET_IN: + return "socket in" + case XFRM_SOCKET_OUT: + return "socket out" + case XFRM_SOCKET_FWD: + return "socket fwd" + } + return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN) +} + +// XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec +// policy. These rules are matched with XfrmState to determine encryption +// and authentication algorithms. +type XfrmPolicyTmpl struct { + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Reqid int +} + +// XfrmPolicy represents an ipsec policy. It represents the overlay network +// and has a list of XfrmPolicyTmpls representing the base addresses of +// the policy. +type XfrmPolicy struct { + Dst *net.IPNet + Src *net.IPNet + Dir Dir + Priority int + Index int + Tmpls []XfrmPolicyTmpl +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go new file mode 100644 index 0000000..2daf6dc --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go @@ -0,0 +1,127 @@ +package netlink + +import ( + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) { + sel.Family = uint16(nl.GetIPFamily(policy.Dst.IP)) + sel.Daddr.FromIP(policy.Dst.IP) + sel.Saddr.FromIP(policy.Src.IP) + prefixlenD, _ := policy.Dst.Mask.Size() + sel.PrefixlenD = uint8(prefixlenD) + prefixlenS, _ := policy.Src.Mask.Size() + sel.PrefixlenS = uint8(prefixlenS) +} + +// XfrmPolicyAdd will add an xfrm policy to the system. +// Equivalent to: `ip xfrm policy add $policy` +func XfrmPolicyAdd(policy *XfrmPolicy) error { + req := nl.NewNetlinkRequest(nl.XFRM_MSG_NEWPOLICY, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + + msg := &nl.XfrmUserpolicyInfo{} + selFromPolicy(&msg.Sel, policy) + msg.Priority = uint32(policy.Priority) + msg.Index = uint32(policy.Index) + msg.Dir = uint8(policy.Dir) + msg.Lft.SoftByteLimit = nl.XFRM_INF + msg.Lft.HardByteLimit = nl.XFRM_INF + msg.Lft.SoftPacketLimit = nl.XFRM_INF + msg.Lft.HardPacketLimit = nl.XFRM_INF + req.AddData(msg) + + tmplData := make([]byte, nl.SizeofXfrmUserTmpl*len(policy.Tmpls)) + for i, tmpl := range policy.Tmpls { + start := i * nl.SizeofXfrmUserTmpl + userTmpl := nl.DeserializeXfrmUserTmpl(tmplData[start : start+nl.SizeofXfrmUserTmpl]) + userTmpl.XfrmId.Daddr.FromIP(tmpl.Dst) + userTmpl.Saddr.FromIP(tmpl.Src) + userTmpl.XfrmId.Proto = uint8(tmpl.Proto) + userTmpl.Mode = uint8(tmpl.Mode) + userTmpl.Reqid = uint32(tmpl.Reqid) + userTmpl.Aalgos = ^uint32(0) + userTmpl.Ealgos = ^uint32(0) + userTmpl.Calgos = ^uint32(0) + } + if len(tmplData) > 0 { + tmpls := nl.NewRtAttr(nl.XFRMA_TMPL, tmplData) + req.AddData(tmpls) + } + + _, err := req.Execute(syscall.NETLINK_XFRM, 0) + return err +} + +// XfrmPolicyDel will delete an xfrm policy from the system. Note that +// the Tmpls are ignored when matching the policy to delete. +// Equivalent to: `ip xfrm policy del $policy` +func XfrmPolicyDel(policy *XfrmPolicy) error { + req := nl.NewNetlinkRequest(nl.XFRM_MSG_DELPOLICY, syscall.NLM_F_ACK) + + msg := &nl.XfrmUserpolicyId{} + selFromPolicy(&msg.Sel, policy) + msg.Index = uint32(policy.Index) + msg.Dir = uint8(policy.Dir) + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_XFRM, 0) + return err +} + +// XfrmPolicyList gets a list of xfrm policies in the system. +// Equivalent to: `ip xfrm policy show`. +// The list can be filtered by ip family. +func XfrmPolicyList(family int) ([]XfrmPolicy, error) { + req := nl.NewNetlinkRequest(nl.XFRM_MSG_GETPOLICY, syscall.NLM_F_DUMP) + + msg := nl.NewIfInfomsg(family) + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_XFRM, nl.XFRM_MSG_NEWPOLICY) + if err != nil { + return nil, err + } + + var res []XfrmPolicy + for _, m := range msgs { + msg := nl.DeserializeXfrmUserpolicyInfo(m) + + if family != FAMILY_ALL && family != int(msg.Sel.Family) { + continue + } + + var policy XfrmPolicy + + policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD) + policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS) + policy.Priority = int(msg.Priority) + policy.Index = int(msg.Index) + policy.Dir = Dir(msg.Dir) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.XFRMA_TMPL: + max := len(attr.Value) + for i := 0; i < max; i += nl.SizeofXfrmUserTmpl { + var resTmpl XfrmPolicyTmpl + tmpl := nl.DeserializeXfrmUserTmpl(attr.Value[i : i+nl.SizeofXfrmUserTmpl]) + resTmpl.Dst = tmpl.XfrmId.Daddr.ToIP() + resTmpl.Src = tmpl.Saddr.ToIP() + resTmpl.Proto = Proto(tmpl.XfrmId.Proto) + resTmpl.Mode = Mode(tmpl.Mode) + resTmpl.Reqid = int(tmpl.Reqid) + policy.Tmpls = append(policy.Tmpls, resTmpl) + } + } + } + res = append(res, policy) + } + return res, nil +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state.go b/vendor/github.com/vishvananda/netlink/xfrm_state.go new file mode 100644 index 0000000..5b8f2df --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_state.go @@ -0,0 +1,53 @@ +package netlink + +import ( + "net" +) + +// XfrmStateAlgo represents the algorithm to use for the ipsec encryption. +type XfrmStateAlgo struct { + Name string + Key []byte + TruncateLen int // Auth only +} + +// EncapType is an enum representing an ipsec template direction. +type EncapType uint8 + +const ( + XFRM_ENCAP_ESPINUDP_NONIKE EncapType = iota + 1 + XFRM_ENCAP_ESPINUDP +) + +func (e EncapType) String() string { + switch e { + case XFRM_ENCAP_ESPINUDP_NONIKE: + return "espinudp-nonike" + case XFRM_ENCAP_ESPINUDP: + return "espinudp" + } + return "unknown" +} + +// XfrmEncap represents the encapsulation to use for the ipsec encryption. +type XfrmStateEncap struct { + Type EncapType + SrcPort int + DstPort int + OriginalAddress net.IP +} + +// XfrmState represents the state of an ipsec policy. It optionally +// contains an XfrmStateAlgo for encryption and one for authentication. +type XfrmState struct { + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Spi int + Reqid int + ReplayWindow int + Auth *XfrmStateAlgo + Crypt *XfrmStateAlgo + Encap *XfrmStateEncap +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go new file mode 100644 index 0000000..5f44ec8 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go @@ -0,0 +1,181 @@ +package netlink + +import ( + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +func writeStateAlgo(a *XfrmStateAlgo) []byte { + algo := nl.XfrmAlgo{ + AlgKeyLen: uint32(len(a.Key) * 8), + AlgKey: a.Key, + } + end := len(a.Name) + if end > 64 { + end = 64 + } + copy(algo.AlgName[:end], a.Name) + return algo.Serialize() +} + +func writeStateAlgoAuth(a *XfrmStateAlgo) []byte { + algo := nl.XfrmAlgoAuth{ + AlgKeyLen: uint32(len(a.Key) * 8), + AlgTruncLen: uint32(a.TruncateLen), + AlgKey: a.Key, + } + end := len(a.Name) + if end > 64 { + end = 64 + } + copy(algo.AlgName[:end], a.Name) + return algo.Serialize() +} + +// XfrmStateAdd will add an xfrm state to the system. +// Equivalent to: `ip xfrm state add $state` +func XfrmStateAdd(state *XfrmState) error { + // A state with spi 0 can't be deleted so don't allow it to be set + if state.Spi == 0 { + return fmt.Errorf("Spi must be set when adding xfrm state.") + } + req := nl.NewNetlinkRequest(nl.XFRM_MSG_NEWSA, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + + msg := &nl.XfrmUsersaInfo{} + msg.Family = uint16(nl.GetIPFamily(state.Dst)) + msg.Id.Daddr.FromIP(state.Dst) + msg.Saddr.FromIP(state.Src) + msg.Id.Proto = uint8(state.Proto) + msg.Mode = uint8(state.Mode) + msg.Id.Spi = nl.Swap32(uint32(state.Spi)) + msg.Reqid = uint32(state.Reqid) + msg.ReplayWindow = uint8(state.ReplayWindow) + msg.Lft.SoftByteLimit = nl.XFRM_INF + msg.Lft.HardByteLimit = nl.XFRM_INF + msg.Lft.SoftPacketLimit = nl.XFRM_INF + msg.Lft.HardPacketLimit = nl.XFRM_INF + req.AddData(msg) + + if state.Auth != nil { + out := nl.NewRtAttr(nl.XFRMA_ALG_AUTH_TRUNC, writeStateAlgoAuth(state.Auth)) + req.AddData(out) + } + if state.Crypt != nil { + out := nl.NewRtAttr(nl.XFRMA_ALG_CRYPT, writeStateAlgo(state.Crypt)) + req.AddData(out) + } + if state.Encap != nil { + encapData := make([]byte, nl.SizeofXfrmEncapTmpl) + encap := nl.DeserializeXfrmEncapTmpl(encapData) + encap.EncapType = uint16(state.Encap.Type) + encap.EncapSport = nl.Swap16(uint16(state.Encap.SrcPort)) + encap.EncapDport = nl.Swap16(uint16(state.Encap.DstPort)) + encap.EncapOa.FromIP(state.Encap.OriginalAddress) + out := nl.NewRtAttr(nl.XFRMA_ENCAP, encapData) + req.AddData(out) + } + + _, err := req.Execute(syscall.NETLINK_XFRM, 0) + return err +} + +// XfrmStateDel will delete an xfrm state from the system. Note that +// the Algos are ignored when matching the state to delete. +// Equivalent to: `ip xfrm state del $state` +func XfrmStateDel(state *XfrmState) error { + req := nl.NewNetlinkRequest(nl.XFRM_MSG_DELSA, syscall.NLM_F_ACK) + + msg := &nl.XfrmUsersaId{} + msg.Daddr.FromIP(state.Dst) + msg.Family = uint16(nl.GetIPFamily(state.Dst)) + msg.Proto = uint8(state.Proto) + msg.Spi = nl.Swap32(uint32(state.Spi)) + req.AddData(msg) + + saddr := nl.XfrmAddress{} + saddr.FromIP(state.Src) + srcdata := nl.NewRtAttr(nl.XFRMA_SRCADDR, saddr.Serialize()) + + req.AddData(srcdata) + + _, err := req.Execute(syscall.NETLINK_XFRM, 0) + return err +} + +// XfrmStateList gets a list of xfrm states in the system. +// Equivalent to: `ip xfrm state show`. +// The list can be filtered by ip family. +func XfrmStateList(family int) ([]XfrmState, error) { + req := nl.NewNetlinkRequest(nl.XFRM_MSG_GETSA, syscall.NLM_F_DUMP) + + msg := nl.NewIfInfomsg(family) + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_XFRM, nl.XFRM_MSG_NEWSA) + if err != nil { + return nil, err + } + + var res []XfrmState + for _, m := range msgs { + msg := nl.DeserializeXfrmUsersaInfo(m) + + if family != FAMILY_ALL && family != int(msg.Family) { + continue + } + + var state XfrmState + + state.Dst = msg.Id.Daddr.ToIP() + state.Src = msg.Saddr.ToIP() + state.Proto = Proto(msg.Id.Proto) + state.Mode = Mode(msg.Mode) + state.Spi = int(nl.Swap32(msg.Id.Spi)) + state.Reqid = int(msg.Reqid) + state.ReplayWindow = int(msg.ReplayWindow) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.XFRMA_ALG_AUTH, nl.XFRMA_ALG_CRYPT: + var resAlgo *XfrmStateAlgo + if attr.Attr.Type == nl.XFRMA_ALG_AUTH { + if state.Auth == nil { + state.Auth = new(XfrmStateAlgo) + } + resAlgo = state.Auth + } else { + state.Crypt = new(XfrmStateAlgo) + resAlgo = state.Crypt + } + algo := nl.DeserializeXfrmAlgo(attr.Value[:]) + (*resAlgo).Name = nl.BytesToString(algo.AlgName[:]) + (*resAlgo).Key = algo.AlgKey + case nl.XFRMA_ALG_AUTH_TRUNC: + if state.Auth == nil { + state.Auth = new(XfrmStateAlgo) + } + algo := nl.DeserializeXfrmAlgoAuth(attr.Value[:]) + state.Auth.Name = nl.BytesToString(algo.AlgName[:]) + state.Auth.Key = algo.AlgKey + state.Auth.TruncateLen = int(algo.AlgTruncLen) + case nl.XFRMA_ENCAP: + encap := nl.DeserializeXfrmEncapTmpl(attr.Value[:]) + state.Encap = new(XfrmStateEncap) + state.Encap.Type = EncapType(encap.EncapType) + state.Encap.SrcPort = int(nl.Swap16(encap.EncapSport)) + state.Encap.DstPort = int(nl.Swap16(encap.EncapDport)) + state.Encap.OriginalAddress = encap.EncapOa.ToIP() + } + + } + res = append(res, state) + } + return res, nil +} diff --git a/vendor/golang.org/x/sys/LICENSE b/vendor/golang.org/x/sys/LICENSE new file mode 100644 index 0000000..6a66aea --- /dev/null +++ b/vendor/golang.org/x/sys/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/sys/PATENTS b/vendor/golang.org/x/sys/PATENTS new file mode 100644 index 0000000..7330990 --- /dev/null +++ b/vendor/golang.org/x/sys/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the Go project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of Go, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of Go. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of Go or any code incorporated within this +implementation of Go constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of Go +shall terminate as of the date such litigation is filed. diff --git a/vendor/golang.org/x/sys/README.md b/vendor/golang.org/x/sys/README.md new file mode 100644 index 0000000..ef6c9e5 --- /dev/null +++ b/vendor/golang.org/x/sys/README.md @@ -0,0 +1,18 @@ +# sys + +This repository holds supplemental Go packages for low-level interactions with +the operating system. + +## Download/Install + +The easiest way to install is to run `go get -u golang.org/x/sys`. You can +also manually git clone the repository to `$GOPATH/src/golang.org/x/sys`. + +## Report Issues / Send Patches + +This repository uses Gerrit for code changes. To learn how to submit changes to +this repository, see https://golang.org/doc/contribute.html. + +The main issue tracker for the sys repository is located at +https://github.com/golang/go/issues. Prefix your issue with "x/sys:" in the +subject line, so it is easy to find. diff --git a/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s b/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s new file mode 100644 index 0000000..06f84b8 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s @@ -0,0 +1,17 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !gccgo + +#include "textflag.h" + +// +// System calls for ppc64, AIX are implemented in runtime/syscall_aix.go +// + +TEXT ·syscall6(SB),NOSPLIT,$0-88 + JMP syscall·syscall6(SB) + +TEXT ·rawSyscall6(SB),NOSPLIT,$0-88 + JMP syscall·rawSyscall6(SB) diff --git a/vendor/golang.org/x/sys/cpu/byteorder.go b/vendor/golang.org/x/sys/cpu/byteorder.go new file mode 100644 index 0000000..da6b9e4 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/byteorder.go @@ -0,0 +1,30 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +import ( + "encoding/binary" + "runtime" +) + +// hostByteOrder returns binary.LittleEndian on little-endian machines and +// binary.BigEndian on big-endian machines. +func hostByteOrder() binary.ByteOrder { + switch runtime.GOARCH { + case "386", "amd64", "amd64p32", + "arm", "arm64", + "mipsle", "mips64le", "mips64p32le", + "ppc64le", + "riscv", "riscv64": + return binary.LittleEndian + case "armbe", "arm64be", + "mips", "mips64", "mips64p32", + "ppc", "ppc64", + "s390", "s390x", + "sparc", "sparc64": + return binary.BigEndian + } + panic("unknown architecture") +} diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go new file mode 100644 index 0000000..679e78c --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu.go @@ -0,0 +1,126 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cpu implements processor feature detection for +// various CPU architectures. +package cpu + +// Initialized reports whether the CPU features were initialized. +// +// For some GOOS/GOARCH combinations initialization of the CPU features depends +// on reading an operating specific file, e.g. /proc/self/auxv on linux/arm +// Initialized will report false if reading the file fails. +var Initialized bool + +// CacheLinePad is used to pad structs to avoid false sharing. +type CacheLinePad struct{ _ [cacheLineSize]byte } + +// X86 contains the supported CPU features of the +// current X86/AMD64 platform. If the current platform +// is not X86/AMD64 then all feature flags are false. +// +// X86 is padded to avoid false sharing. Further the HasAVX +// and HasAVX2 are only set if the OS supports XMM and YMM +// registers in addition to the CPUID feature bit being set. +var X86 struct { + _ CacheLinePad + HasAES bool // AES hardware implementation (AES NI) + HasADX bool // Multi-precision add-carry instruction extensions + HasAVX bool // Advanced vector extension + HasAVX2 bool // Advanced vector extension 2 + HasBMI1 bool // Bit manipulation instruction set 1 + HasBMI2 bool // Bit manipulation instruction set 2 + HasERMS bool // Enhanced REP for MOVSB and STOSB + HasFMA bool // Fused-multiply-add instructions + HasOSXSAVE bool // OS supports XSAVE/XRESTOR for saving/restoring XMM registers. + HasPCLMULQDQ bool // PCLMULQDQ instruction - most often used for AES-GCM + HasPOPCNT bool // Hamming weight instruction POPCNT. + HasRDRAND bool // RDRAND instruction (on-chip random number generator) + HasRDSEED bool // RDSEED instruction (on-chip random number generator) + HasSSE2 bool // Streaming SIMD extension 2 (always available on amd64) + HasSSE3 bool // Streaming SIMD extension 3 + HasSSSE3 bool // Supplemental streaming SIMD extension 3 + HasSSE41 bool // Streaming SIMD extension 4 and 4.1 + HasSSE42 bool // Streaming SIMD extension 4 and 4.2 + _ CacheLinePad +} + +// ARM64 contains the supported CPU features of the +// current ARMv8(aarch64) platform. If the current platform +// is not arm64 then all feature flags are false. +var ARM64 struct { + _ CacheLinePad + HasFP bool // Floating-point instruction set (always available) + HasASIMD bool // Advanced SIMD (always available) + HasEVTSTRM bool // Event stream support + HasAES bool // AES hardware implementation + HasPMULL bool // Polynomial multiplication instruction set + HasSHA1 bool // SHA1 hardware implementation + HasSHA2 bool // SHA2 hardware implementation + HasCRC32 bool // CRC32 hardware implementation + HasATOMICS bool // Atomic memory operation instruction set + HasFPHP bool // Half precision floating-point instruction set + HasASIMDHP bool // Advanced SIMD half precision instruction set + HasCPUID bool // CPUID identification scheme registers + HasASIMDRDM bool // Rounding double multiply add/subtract instruction set + HasJSCVT bool // Javascript conversion from floating-point to integer + HasFCMA bool // Floating-point multiplication and addition of complex numbers + HasLRCPC bool // Release Consistent processor consistent support + HasDCPOP bool // Persistent memory support + HasSHA3 bool // SHA3 hardware implementation + HasSM3 bool // SM3 hardware implementation + HasSM4 bool // SM4 hardware implementation + HasASIMDDP bool // Advanced SIMD double precision instruction set + HasSHA512 bool // SHA512 hardware implementation + HasSVE bool // Scalable Vector Extensions + HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32 + _ CacheLinePad +} + +// PPC64 contains the supported CPU features of the current ppc64/ppc64le platforms. +// If the current platform is not ppc64/ppc64le then all feature flags are false. +// +// For ppc64/ppc64le, it is safe to check only for ISA level starting on ISA v3.00, +// since there are no optional categories. There are some exceptions that also +// require kernel support to work (DARN, SCV), so there are feature bits for +// those as well. The minimum processor requirement is POWER8 (ISA 2.07). +// The struct is padded to avoid false sharing. +var PPC64 struct { + _ CacheLinePad + HasDARN bool // Hardware random number generator (requires kernel enablement) + HasSCV bool // Syscall vectored (requires kernel enablement) + IsPOWER8 bool // ISA v2.07 (POWER8) + IsPOWER9 bool // ISA v3.00 (POWER9) + _ CacheLinePad +} + +// S390X contains the supported CPU features of the current IBM Z +// (s390x) platform. If the current platform is not IBM Z then all +// feature flags are false. +// +// S390X is padded to avoid false sharing. Further HasVX is only set +// if the OS supports vector registers in addition to the STFLE +// feature bit being set. +var S390X struct { + _ CacheLinePad + HasZARCH bool // z/Architecture mode is active [mandatory] + HasSTFLE bool // store facility list extended + HasLDISP bool // long (20-bit) displacements + HasEIMM bool // 32-bit immediates + HasDFP bool // decimal floating point + HasETF3EH bool // ETF-3 enhanced + HasMSA bool // message security assist (CPACF) + HasAES bool // KM-AES{128,192,256} functions + HasAESCBC bool // KMC-AES{128,192,256} functions + HasAESCTR bool // KMCTR-AES{128,192,256} functions + HasAESGCM bool // KMA-GCM-AES{128,192,256} functions + HasGHASH bool // KIMD-GHASH function + HasSHA1 bool // K{I,L}MD-SHA-1 functions + HasSHA256 bool // K{I,L}MD-SHA-256 functions + HasSHA512 bool // K{I,L}MD-SHA-512 functions + HasSHA3 bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions + HasVX bool // vector facility + HasVXE bool // vector-enhancements facility 1 + _ CacheLinePad +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_aix_ppc64.go b/vendor/golang.org/x/sys/cpu/cpu_aix_ppc64.go new file mode 100644 index 0000000..be60272 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_aix_ppc64.go @@ -0,0 +1,34 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build aix,ppc64 + +package cpu + +const cacheLineSize = 128 + +const ( + // getsystemcfg constants + _SC_IMPL = 2 + _IMPL_POWER8 = 0x10000 + _IMPL_POWER9 = 0x20000 +) + +func init() { + impl := getsystemcfg(_SC_IMPL) + if impl&_IMPL_POWER8 != 0 { + PPC64.IsPOWER8 = true + } + if impl&_IMPL_POWER9 != 0 { + PPC64.IsPOWER9 = true + } + + Initialized = true +} + +func getsystemcfg(label int) (n uint64) { + r0, _ := callgetsystemcfg(label) + n = uint64(r0) + return +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm.go b/vendor/golang.org/x/sys/cpu/cpu_arm.go new file mode 100644 index 0000000..7f2348b --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_arm.go @@ -0,0 +1,9 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +const cacheLineSize = 32 + +func doinit() {} diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go new file mode 100644 index 0000000..568bcd0 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go @@ -0,0 +1,21 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !gccgo + +package cpu + +// haveAsmFunctions reports whether the other functions in this file can +// be safely called. +func haveAsmFunctions() bool { return true } + +// The following feature detection functions are defined in cpu_s390x.s. +// They are likely to be expensive to call so the results should be cached. +func stfle() facilityList +func kmQuery() queryResult +func kmcQuery() queryResult +func kmctrQuery() queryResult +func kmaQuery() queryResult +func kimdQuery() queryResult +func klmdQuery() queryResult diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go new file mode 100644 index 0000000..f7cb469 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go @@ -0,0 +1,16 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build 386 amd64 amd64p32 +// +build !gccgo + +package cpu + +// cpuid is implemented in cpu_x86.s for gc compiler +// and in cpu_gccgo.c for gccgo. +func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) + +// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler +// and in cpu_gccgo.c for gccgo. +func xgetbv() (eax, edx uint32) diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo.c b/vendor/golang.org/x/sys/cpu/cpu_gccgo.c new file mode 100644 index 0000000..e363c7d --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo.c @@ -0,0 +1,43 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build 386 amd64 amd64p32 +// +build gccgo + +#include +#include + +// Need to wrap __get_cpuid_count because it's declared as static. +int +gccgoGetCpuidCount(uint32_t leaf, uint32_t subleaf, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + return __get_cpuid_count(leaf, subleaf, eax, ebx, ecx, edx); +} + +// xgetbv reads the contents of an XCR (Extended Control Register) +// specified in the ECX register into registers EDX:EAX. +// Currently, the only supported value for XCR is 0. +// +// TODO: Replace with a better alternative: +// +// #include +// +// #pragma GCC target("xsave") +// +// void gccgoXgetbv(uint32_t *eax, uint32_t *edx) { +// unsigned long long x = _xgetbv(0); +// *eax = x & 0xffffffff; +// *edx = (x >> 32) & 0xffffffff; +// } +// +// Note that _xgetbv is defined starting with GCC 8. +void +gccgoXgetbv(uint32_t *eax, uint32_t *edx) +{ + __asm(" xorl %%ecx, %%ecx\n" + " xgetbv" + : "=a"(*eax), "=d"(*edx)); +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo.go new file mode 100644 index 0000000..ba49b91 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo.go @@ -0,0 +1,26 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build 386 amd64 amd64p32 +// +build gccgo + +package cpu + +//extern gccgoGetCpuidCount +func gccgoGetCpuidCount(eaxArg, ecxArg uint32, eax, ebx, ecx, edx *uint32) + +func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) { + var a, b, c, d uint32 + gccgoGetCpuidCount(eaxArg, ecxArg, &a, &b, &c, &d) + return a, b, c, d +} + +//extern gccgoXgetbv +func gccgoXgetbv(eax, edx *uint32) + +func xgetbv() (eax, edx uint32) { + var a, d uint32 + gccgoXgetbv(&a, &d) + return a, d +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go new file mode 100644 index 0000000..aa986f7 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go @@ -0,0 +1,22 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build gccgo + +package cpu + +// haveAsmFunctions reports whether the other functions in this file can +// be safely called. +func haveAsmFunctions() bool { return false } + +// TODO(mundaym): the following feature detection functions are currently +// stubs. See https://golang.org/cl/162887 for how to fix this. +// They are likely to be expensive to call so the results should be cached. +func stfle() facilityList { panic("not implemented for gccgo") } +func kmQuery() queryResult { panic("not implemented for gccgo") } +func kmcQuery() queryResult { panic("not implemented for gccgo") } +func kmctrQuery() queryResult { panic("not implemented for gccgo") } +func kmaQuery() queryResult { panic("not implemented for gccgo") } +func kimdQuery() queryResult { panic("not implemented for gccgo") } +func klmdQuery() queryResult { panic("not implemented for gccgo") } diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux.go b/vendor/golang.org/x/sys/cpu/cpu_linux.go new file mode 100644 index 0000000..76b5f50 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux.go @@ -0,0 +1,59 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//+build !amd64,!amd64p32,!386 + +package cpu + +import ( + "io/ioutil" +) + +const ( + _AT_HWCAP = 16 + _AT_HWCAP2 = 26 + + procAuxv = "/proc/self/auxv" + + uintSize = int(32 << (^uint(0) >> 63)) +) + +// For those platforms don't have a 'cpuid' equivalent we use HWCAP/HWCAP2 +// These are initialized in cpu_$GOARCH.go +// and should not be changed after they are initialized. +var hwCap uint +var hwCap2 uint + +func init() { + buf, err := ioutil.ReadFile(procAuxv) + if err != nil { + // e.g. on android /proc/self/auxv is not accessible, so silently + // ignore the error and leave Initialized = false + return + } + + bo := hostByteOrder() + for len(buf) >= 2*(uintSize/8) { + var tag, val uint + switch uintSize { + case 32: + tag = uint(bo.Uint32(buf[0:])) + val = uint(bo.Uint32(buf[4:])) + buf = buf[8:] + case 64: + tag = uint(bo.Uint64(buf[0:])) + val = uint(bo.Uint64(buf[8:])) + buf = buf[16:] + } + switch tag { + case _AT_HWCAP: + hwCap = val + case _AT_HWCAP2: + hwCap2 = val + } + } + doinit() + + Initialized = true +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go new file mode 100644 index 0000000..fa7fb1b --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go @@ -0,0 +1,67 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +const cacheLineSize = 64 + +// HWCAP/HWCAP2 bits. These are exposed by Linux. +const ( + hwcap_FP = 1 << 0 + hwcap_ASIMD = 1 << 1 + hwcap_EVTSTRM = 1 << 2 + hwcap_AES = 1 << 3 + hwcap_PMULL = 1 << 4 + hwcap_SHA1 = 1 << 5 + hwcap_SHA2 = 1 << 6 + hwcap_CRC32 = 1 << 7 + hwcap_ATOMICS = 1 << 8 + hwcap_FPHP = 1 << 9 + hwcap_ASIMDHP = 1 << 10 + hwcap_CPUID = 1 << 11 + hwcap_ASIMDRDM = 1 << 12 + hwcap_JSCVT = 1 << 13 + hwcap_FCMA = 1 << 14 + hwcap_LRCPC = 1 << 15 + hwcap_DCPOP = 1 << 16 + hwcap_SHA3 = 1 << 17 + hwcap_SM3 = 1 << 18 + hwcap_SM4 = 1 << 19 + hwcap_ASIMDDP = 1 << 20 + hwcap_SHA512 = 1 << 21 + hwcap_SVE = 1 << 22 + hwcap_ASIMDFHM = 1 << 23 +) + +func doinit() { + // HWCAP feature bits + ARM64.HasFP = isSet(hwCap, hwcap_FP) + ARM64.HasASIMD = isSet(hwCap, hwcap_ASIMD) + ARM64.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM) + ARM64.HasAES = isSet(hwCap, hwcap_AES) + ARM64.HasPMULL = isSet(hwCap, hwcap_PMULL) + ARM64.HasSHA1 = isSet(hwCap, hwcap_SHA1) + ARM64.HasSHA2 = isSet(hwCap, hwcap_SHA2) + ARM64.HasCRC32 = isSet(hwCap, hwcap_CRC32) + ARM64.HasATOMICS = isSet(hwCap, hwcap_ATOMICS) + ARM64.HasFPHP = isSet(hwCap, hwcap_FPHP) + ARM64.HasASIMDHP = isSet(hwCap, hwcap_ASIMDHP) + ARM64.HasCPUID = isSet(hwCap, hwcap_CPUID) + ARM64.HasASIMDRDM = isSet(hwCap, hwcap_ASIMDRDM) + ARM64.HasJSCVT = isSet(hwCap, hwcap_JSCVT) + ARM64.HasFCMA = isSet(hwCap, hwcap_FCMA) + ARM64.HasLRCPC = isSet(hwCap, hwcap_LRCPC) + ARM64.HasDCPOP = isSet(hwCap, hwcap_DCPOP) + ARM64.HasSHA3 = isSet(hwCap, hwcap_SHA3) + ARM64.HasSM3 = isSet(hwCap, hwcap_SM3) + ARM64.HasSM4 = isSet(hwCap, hwcap_SM4) + ARM64.HasASIMDDP = isSet(hwCap, hwcap_ASIMDDP) + ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512) + ARM64.HasSVE = isSet(hwCap, hwcap_SVE) + ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM) +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go new file mode 100644 index 0000000..6c8d975 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go @@ -0,0 +1,33 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux +// +build ppc64 ppc64le + +package cpu + +const cacheLineSize = 128 + +// HWCAP/HWCAP2 bits. These are exposed by the kernel. +const ( + // ISA Level + _PPC_FEATURE2_ARCH_2_07 = 0x80000000 + _PPC_FEATURE2_ARCH_3_00 = 0x00800000 + + // CPU features + _PPC_FEATURE2_DARN = 0x00200000 + _PPC_FEATURE2_SCV = 0x00100000 +) + +func doinit() { + // HWCAP2 feature bits + PPC64.IsPOWER8 = isSet(hwCap2, _PPC_FEATURE2_ARCH_2_07) + PPC64.IsPOWER9 = isSet(hwCap2, _PPC_FEATURE2_ARCH_3_00) + PPC64.HasDARN = isSet(hwCap2, _PPC_FEATURE2_DARN) + PPC64.HasSCV = isSet(hwCap2, _PPC_FEATURE2_SCV) +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go new file mode 100644 index 0000000..d579eae --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go @@ -0,0 +1,161 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +const cacheLineSize = 256 + +const ( + // bit mask values from /usr/include/bits/hwcap.h + hwcap_ZARCH = 2 + hwcap_STFLE = 4 + hwcap_MSA = 8 + hwcap_LDISP = 16 + hwcap_EIMM = 32 + hwcap_DFP = 64 + hwcap_ETF3EH = 256 + hwcap_VX = 2048 + hwcap_VXE = 8192 +) + +// bitIsSet reports whether the bit at index is set. The bit index +// is in big endian order, so bit index 0 is the leftmost bit. +func bitIsSet(bits []uint64, index uint) bool { + return bits[index/64]&((1<<63)>>(index%64)) != 0 +} + +// function is the code for the named cryptographic function. +type function uint8 + +const ( + // KM{,A,C,CTR} function codes + aes128 function = 18 // AES-128 + aes192 function = 19 // AES-192 + aes256 function = 20 // AES-256 + + // K{I,L}MD function codes + sha1 function = 1 // SHA-1 + sha256 function = 2 // SHA-256 + sha512 function = 3 // SHA-512 + sha3_224 function = 32 // SHA3-224 + sha3_256 function = 33 // SHA3-256 + sha3_384 function = 34 // SHA3-384 + sha3_512 function = 35 // SHA3-512 + shake128 function = 36 // SHAKE-128 + shake256 function = 37 // SHAKE-256 + + // KLMD function codes + ghash function = 65 // GHASH +) + +// queryResult contains the result of a Query function +// call. Bits are numbered in big endian order so the +// leftmost bit (the MSB) is at index 0. +type queryResult struct { + bits [2]uint64 +} + +// Has reports whether the given functions are present. +func (q *queryResult) Has(fns ...function) bool { + if len(fns) == 0 { + panic("no function codes provided") + } + for _, f := range fns { + if !bitIsSet(q.bits[:], uint(f)) { + return false + } + } + return true +} + +// facility is a bit index for the named facility. +type facility uint8 + +const ( + // cryptography facilities + msa4 facility = 77 // message-security-assist extension 4 + msa8 facility = 146 // message-security-assist extension 8 +) + +// facilityList contains the result of an STFLE call. +// Bits are numbered in big endian order so the +// leftmost bit (the MSB) is at index 0. +type facilityList struct { + bits [4]uint64 +} + +// Has reports whether the given facilities are present. +func (s *facilityList) Has(fs ...facility) bool { + if len(fs) == 0 { + panic("no facility bits provided") + } + for _, f := range fs { + if !bitIsSet(s.bits[:], uint(f)) { + return false + } + } + return true +} + +func doinit() { + // test HWCAP bit vector + has := func(featureMask uint) bool { + return hwCap&featureMask == featureMask + } + + // mandatory + S390X.HasZARCH = has(hwcap_ZARCH) + + // optional + S390X.HasSTFLE = has(hwcap_STFLE) + S390X.HasLDISP = has(hwcap_LDISP) + S390X.HasEIMM = has(hwcap_EIMM) + S390X.HasETF3EH = has(hwcap_ETF3EH) + S390X.HasDFP = has(hwcap_DFP) + S390X.HasMSA = has(hwcap_MSA) + S390X.HasVX = has(hwcap_VX) + if S390X.HasVX { + S390X.HasVXE = has(hwcap_VXE) + } + + // We need implementations of stfle, km and so on + // to detect cryptographic features. + if !haveAsmFunctions() { + return + } + + // optional cryptographic functions + if S390X.HasMSA { + aes := []function{aes128, aes192, aes256} + + // cipher message + km, kmc := kmQuery(), kmcQuery() + S390X.HasAES = km.Has(aes...) + S390X.HasAESCBC = kmc.Has(aes...) + if S390X.HasSTFLE { + facilities := stfle() + if facilities.Has(msa4) { + kmctr := kmctrQuery() + S390X.HasAESCTR = kmctr.Has(aes...) + } + if facilities.Has(msa8) { + kma := kmaQuery() + S390X.HasAESGCM = kma.Has(aes...) + } + } + + // compute message digest + kimd := kimdQuery() // intermediate (no padding) + klmd := klmdQuery() // last (padding) + S390X.HasSHA1 = kimd.Has(sha1) && klmd.Has(sha1) + S390X.HasSHA256 = kimd.Has(sha256) && klmd.Has(sha256) + S390X.HasSHA512 = kimd.Has(sha512) && klmd.Has(sha512) + S390X.HasGHASH = kimd.Has(ghash) // KLMD-GHASH does not exist + sha3 := []function{ + sha3_224, sha3_256, sha3_384, sha3_512, + shake128, shake256, + } + S390X.HasSHA3 = kimd.Has(sha3...) && klmd.Has(sha3...) + } +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_mips64x.go b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go new file mode 100644 index 0000000..f55e0c8 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go @@ -0,0 +1,11 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build mips64 mips64le + +package cpu + +const cacheLineSize = 32 + +func doinit() {} diff --git a/vendor/golang.org/x/sys/cpu/cpu_mipsx.go b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go new file mode 100644 index 0000000..cda87b1 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go @@ -0,0 +1,11 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build mips mipsle + +package cpu + +const cacheLineSize = 32 + +func doinit() {} diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go new file mode 100644 index 0000000..dd1e76d --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go @@ -0,0 +1,11 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !linux,arm64 + +package cpu + +const cacheLineSize = 64 + +func doinit() {} diff --git a/vendor/golang.org/x/sys/cpu/cpu_s390x.s b/vendor/golang.org/x/sys/cpu/cpu_s390x.s new file mode 100644 index 0000000..e5037d9 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_s390x.s @@ -0,0 +1,57 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !gccgo + +#include "textflag.h" + +// func stfle() facilityList +TEXT ·stfle(SB), NOSPLIT|NOFRAME, $0-32 + MOVD $ret+0(FP), R1 + MOVD $3, R0 // last doubleword index to store + XC $32, (R1), (R1) // clear 4 doublewords (32 bytes) + WORD $0xb2b01000 // store facility list extended (STFLE) + RET + +// func kmQuery() queryResult +TEXT ·kmQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KM-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xB92E0024 // cipher message (KM) + RET + +// func kmcQuery() queryResult +TEXT ·kmcQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KMC-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xB92F0024 // cipher message with chaining (KMC) + RET + +// func kmctrQuery() queryResult +TEXT ·kmctrQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KMCTR-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xB92D4024 // cipher message with counter (KMCTR) + RET + +// func kmaQuery() queryResult +TEXT ·kmaQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KMA-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xb9296024 // cipher message with authentication (KMA) + RET + +// func kimdQuery() queryResult +TEXT ·kimdQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KIMD-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xB93E0024 // compute intermediate message digest (KIMD) + RET + +// func klmdQuery() queryResult +TEXT ·klmdQuery(SB), NOSPLIT|NOFRAME, $0-16 + MOVD $0, R0 // set function code to 0 (KLMD-Query) + MOVD $ret+0(FP), R1 // address of 16-byte return value + WORD $0xB93F0024 // compute last message digest (KLMD) + RET diff --git a/vendor/golang.org/x/sys/cpu/cpu_wasm.go b/vendor/golang.org/x/sys/cpu/cpu_wasm.go new file mode 100644 index 0000000..bd9bbda --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_wasm.go @@ -0,0 +1,15 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build wasm + +package cpu + +// We're compiling the cpu package for an unknown (software-abstracted) CPU. +// Make CacheLinePad an empty struct and hope that the usual struct alignment +// rules are good enough. + +const cacheLineSize = 0 + +func doinit() {} diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.go b/vendor/golang.org/x/sys/cpu/cpu_x86.go new file mode 100644 index 0000000..d70d317 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_x86.go @@ -0,0 +1,59 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build 386 amd64 amd64p32 + +package cpu + +const cacheLineSize = 64 + +func init() { + Initialized = true + + maxID, _, _, _ := cpuid(0, 0) + + if maxID < 1 { + return + } + + _, _, ecx1, edx1 := cpuid(1, 0) + X86.HasSSE2 = isSet(26, edx1) + + X86.HasSSE3 = isSet(0, ecx1) + X86.HasPCLMULQDQ = isSet(1, ecx1) + X86.HasSSSE3 = isSet(9, ecx1) + X86.HasFMA = isSet(12, ecx1) + X86.HasSSE41 = isSet(19, ecx1) + X86.HasSSE42 = isSet(20, ecx1) + X86.HasPOPCNT = isSet(23, ecx1) + X86.HasAES = isSet(25, ecx1) + X86.HasOSXSAVE = isSet(27, ecx1) + X86.HasRDRAND = isSet(30, ecx1) + + osSupportsAVX := false + // For XGETBV, OSXSAVE bit is required and sufficient. + if X86.HasOSXSAVE { + eax, _ := xgetbv() + // Check if XMM and YMM registers have OS support. + osSupportsAVX = isSet(1, eax) && isSet(2, eax) + } + + X86.HasAVX = isSet(28, ecx1) && osSupportsAVX + + if maxID < 7 { + return + } + + _, ebx7, _, _ := cpuid(7, 0) + X86.HasBMI1 = isSet(3, ebx7) + X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX + X86.HasBMI2 = isSet(8, ebx7) + X86.HasERMS = isSet(9, ebx7) + X86.HasRDSEED = isSet(18, ebx7) + X86.HasADX = isSet(19, ebx7) +} + +func isSet(bitpos uint, value uint32) bool { + return value&(1<